|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.4005607850991388, |
|
"eval_steps": 250, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0004005607850991388, |
|
"grad_norm": 2.3918890953063965, |
|
"learning_rate": 2e-05, |
|
"loss": 3.099, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0004005607850991388, |
|
"eval_loss": 3.081904649734497, |
|
"eval_runtime": 32.6252, |
|
"eval_samples_per_second": 32.245, |
|
"eval_steps_per_second": 16.122, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0008011215701982776, |
|
"grad_norm": 1.6500884294509888, |
|
"learning_rate": 4e-05, |
|
"loss": 2.9763, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0012016823552974164, |
|
"grad_norm": 2.728886365890503, |
|
"learning_rate": 6e-05, |
|
"loss": 3.5098, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0016022431403965552, |
|
"grad_norm": 2.935586929321289, |
|
"learning_rate": 8e-05, |
|
"loss": 3.7738, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.002002803925495694, |
|
"grad_norm": 2.857252597808838, |
|
"learning_rate": 0.0001, |
|
"loss": 3.2156, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.002403364710594833, |
|
"grad_norm": 2.191563129425049, |
|
"learning_rate": 0.00012, |
|
"loss": 2.8823, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0028039254956939716, |
|
"grad_norm": 1.8152596950531006, |
|
"learning_rate": 0.00014, |
|
"loss": 2.8204, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0032044862807931104, |
|
"grad_norm": 2.2765188217163086, |
|
"learning_rate": 0.00016, |
|
"loss": 2.9313, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0036050470658922492, |
|
"grad_norm": 1.715293288230896, |
|
"learning_rate": 0.00018, |
|
"loss": 2.6587, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.004005607850991388, |
|
"grad_norm": 2.0164053440093994, |
|
"learning_rate": 0.0002, |
|
"loss": 2.4329, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004406168636090527, |
|
"grad_norm": 1.4467110633850098, |
|
"learning_rate": 0.00019999949650055513, |
|
"loss": 1.8227, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.004806729421189666, |
|
"grad_norm": 2.2541091442108154, |
|
"learning_rate": 0.00019999798600729064, |
|
"loss": 2.7455, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0052072902062888045, |
|
"grad_norm": 2.2924540042877197, |
|
"learning_rate": 0.0001999954685354173, |
|
"loss": 2.0061, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.005607850991387943, |
|
"grad_norm": 1.9437510967254639, |
|
"learning_rate": 0.00019999194411028594, |
|
"loss": 2.1656, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.006008411776487082, |
|
"grad_norm": 1.911136269569397, |
|
"learning_rate": 0.00019998741276738754, |
|
"loss": 2.1187, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.006408972561586221, |
|
"grad_norm": 1.9220823049545288, |
|
"learning_rate": 0.0001999818745523526, |
|
"loss": 2.0687, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00680953334668536, |
|
"grad_norm": 1.681922197341919, |
|
"learning_rate": 0.00019997532952095094, |
|
"loss": 2.0196, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0072100941317844985, |
|
"grad_norm": 2.091493844985962, |
|
"learning_rate": 0.00019996777773909093, |
|
"loss": 2.6052, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.007610654916883637, |
|
"grad_norm": 1.233519196510315, |
|
"learning_rate": 0.00019995921928281894, |
|
"loss": 2.3185, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.008011215701982776, |
|
"grad_norm": 2.18742299079895, |
|
"learning_rate": 0.00019994965423831854, |
|
"loss": 2.0989, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.008411776487081914, |
|
"grad_norm": 1.4857062101364136, |
|
"learning_rate": 0.0001999390827019096, |
|
"loss": 2.4048, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.008812337272181054, |
|
"grad_norm": 1.6035873889923096, |
|
"learning_rate": 0.00019992750478004738, |
|
"loss": 1.8086, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.009212898057280192, |
|
"grad_norm": 1.6860904693603516, |
|
"learning_rate": 0.00019991492058932142, |
|
"loss": 2.3041, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.009613458842379331, |
|
"grad_norm": 1.9687587022781372, |
|
"learning_rate": 0.0001999013302564544, |
|
"loss": 1.8581, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01001401962747847, |
|
"grad_norm": 1.0902950763702393, |
|
"learning_rate": 0.0001998867339183008, |
|
"loss": 2.2356, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.010414580412577609, |
|
"grad_norm": 1.4099599123001099, |
|
"learning_rate": 0.00019987113172184563, |
|
"loss": 1.9318, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.010815141197676747, |
|
"grad_norm": 1.20814049243927, |
|
"learning_rate": 0.00019985452382420275, |
|
"loss": 2.5392, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.011215701982775887, |
|
"grad_norm": 1.4848078489303589, |
|
"learning_rate": 0.00019983691039261357, |
|
"loss": 1.9063, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.011616262767875024, |
|
"grad_norm": 1.263766884803772, |
|
"learning_rate": 0.00019981829160444514, |
|
"loss": 2.1898, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.012016823552974164, |
|
"grad_norm": 1.2411807775497437, |
|
"learning_rate": 0.00019979866764718843, |
|
"loss": 1.9232, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.012417384338073302, |
|
"grad_norm": 1.0204557180404663, |
|
"learning_rate": 0.0001997780387184565, |
|
"loss": 1.9399, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.012817945123172442, |
|
"grad_norm": 1.5857188701629639, |
|
"learning_rate": 0.00019975640502598244, |
|
"loss": 1.9523, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01321850590827158, |
|
"grad_norm": 1.3558052778244019, |
|
"learning_rate": 0.00019973376678761724, |
|
"loss": 2.32, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.01361906669337072, |
|
"grad_norm": 1.187568187713623, |
|
"learning_rate": 0.00019971012423132775, |
|
"loss": 2.2765, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.014019627478469857, |
|
"grad_norm": 1.5870120525360107, |
|
"learning_rate": 0.00019968547759519425, |
|
"loss": 2.3662, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.014420188263568997, |
|
"grad_norm": 1.1726365089416504, |
|
"learning_rate": 0.00019965982712740808, |
|
"loss": 2.1026, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.014820749048668135, |
|
"grad_norm": 1.651104211807251, |
|
"learning_rate": 0.00019963317308626914, |
|
"loss": 2.0235, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.015221309833767275, |
|
"grad_norm": 1.4108325242996216, |
|
"learning_rate": 0.0001996055157401834, |
|
"loss": 2.0739, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.015621870618866412, |
|
"grad_norm": 1.1008392572402954, |
|
"learning_rate": 0.00019957685536765995, |
|
"loss": 2.2383, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.016022431403965552, |
|
"grad_norm": 1.4775562286376953, |
|
"learning_rate": 0.00019954719225730847, |
|
"loss": 1.9138, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.016422992189064692, |
|
"grad_norm": 1.7750931978225708, |
|
"learning_rate": 0.00019951652670783615, |
|
"loss": 2.2775, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.016823552974163828, |
|
"grad_norm": 1.2284572124481201, |
|
"learning_rate": 0.0001994848590280447, |
|
"loss": 2.579, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.017224113759262968, |
|
"grad_norm": 1.8161015510559082, |
|
"learning_rate": 0.00019945218953682734, |
|
"loss": 2.6322, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.017624674544362107, |
|
"grad_norm": 1.3858716487884521, |
|
"learning_rate": 0.00019941851856316548, |
|
"loss": 2.241, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.018025235329461247, |
|
"grad_norm": 0.994133472442627, |
|
"learning_rate": 0.00019938384644612543, |
|
"loss": 2.0473, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.018425796114560383, |
|
"grad_norm": 1.183368444442749, |
|
"learning_rate": 0.00019934817353485501, |
|
"loss": 1.6789, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.018826356899659523, |
|
"grad_norm": 1.7817606925964355, |
|
"learning_rate": 0.00019931150018858012, |
|
"loss": 1.7251, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.019226917684758663, |
|
"grad_norm": 1.1427100896835327, |
|
"learning_rate": 0.00019927382677660088, |
|
"loss": 1.7527, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.019627478469857802, |
|
"grad_norm": 1.077853798866272, |
|
"learning_rate": 0.0001992351536782881, |
|
"loss": 2.4137, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.02002803925495694, |
|
"grad_norm": 1.5589914321899414, |
|
"learning_rate": 0.00019919548128307954, |
|
"loss": 1.778, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.020428600040056078, |
|
"grad_norm": 1.5429751873016357, |
|
"learning_rate": 0.00019915480999047573, |
|
"loss": 2.0978, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.020829160825155218, |
|
"grad_norm": 1.2046990394592285, |
|
"learning_rate": 0.00019911314021003613, |
|
"loss": 1.9611, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.021229721610254357, |
|
"grad_norm": 1.5479800701141357, |
|
"learning_rate": 0.00019907047236137498, |
|
"loss": 2.2686, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.021630282395353494, |
|
"grad_norm": 1.1960687637329102, |
|
"learning_rate": 0.00019902680687415705, |
|
"loss": 1.8488, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.022030843180452633, |
|
"grad_norm": 1.1699072122573853, |
|
"learning_rate": 0.0001989821441880933, |
|
"loss": 1.9985, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.022431403965551773, |
|
"grad_norm": 1.3737468719482422, |
|
"learning_rate": 0.00019893648475293648, |
|
"loss": 1.9347, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.022831964750650913, |
|
"grad_norm": 1.416020154953003, |
|
"learning_rate": 0.00019888982902847656, |
|
"loss": 2.1075, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.02323252553575005, |
|
"grad_norm": 1.4748502969741821, |
|
"learning_rate": 0.00019884217748453623, |
|
"loss": 2.0286, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.02363308632084919, |
|
"grad_norm": 1.2293422222137451, |
|
"learning_rate": 0.00019879353060096603, |
|
"loss": 2.0071, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.024033647105948328, |
|
"grad_norm": 1.217598795890808, |
|
"learning_rate": 0.00019874388886763944, |
|
"loss": 1.6995, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.024434207891047468, |
|
"grad_norm": 1.6704044342041016, |
|
"learning_rate": 0.00019869325278444824, |
|
"loss": 2.1929, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.024834768676146604, |
|
"grad_norm": 1.6492418050765991, |
|
"learning_rate": 0.0001986416228612972, |
|
"loss": 2.2252, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.025235329461245744, |
|
"grad_norm": 1.2590185403823853, |
|
"learning_rate": 0.00019858899961809905, |
|
"loss": 2.079, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.025635890246344883, |
|
"grad_norm": 1.2326877117156982, |
|
"learning_rate": 0.00019853538358476932, |
|
"loss": 1.7934, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.026036451031444023, |
|
"grad_norm": 1.123470425605774, |
|
"learning_rate": 0.00019848077530122083, |
|
"loss": 1.9044, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.02643701181654316, |
|
"grad_norm": 1.474822998046875, |
|
"learning_rate": 0.00019842517531735838, |
|
"loss": 2.0033, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0268375726016423, |
|
"grad_norm": 1.433060646057129, |
|
"learning_rate": 0.00019836858419307324, |
|
"loss": 1.9392, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.02723813338674144, |
|
"grad_norm": 1.131077527999878, |
|
"learning_rate": 0.00019831100249823733, |
|
"loss": 1.98, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.02763869417184058, |
|
"grad_norm": 1.2101255655288696, |
|
"learning_rate": 0.00019825243081269774, |
|
"loss": 2.1136, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.028039254956939715, |
|
"grad_norm": 1.6935054063796997, |
|
"learning_rate": 0.00019819286972627066, |
|
"loss": 1.9742, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.028439815742038854, |
|
"grad_norm": 1.0219398736953735, |
|
"learning_rate": 0.0001981323198387356, |
|
"loss": 2.1198, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.028840376527137994, |
|
"grad_norm": 1.2463573217391968, |
|
"learning_rate": 0.00019807078175982924, |
|
"loss": 2.2247, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.029240937312237134, |
|
"grad_norm": 1.3553636074066162, |
|
"learning_rate": 0.00019800825610923934, |
|
"loss": 2.235, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.02964149809733627, |
|
"grad_norm": 1.2697566747665405, |
|
"learning_rate": 0.00019794474351659852, |
|
"loss": 2.0797, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.03004205888243541, |
|
"grad_norm": 1.2558093070983887, |
|
"learning_rate": 0.00019788024462147788, |
|
"loss": 1.5283, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03044261966753455, |
|
"grad_norm": 1.3813128471374512, |
|
"learning_rate": 0.00019781476007338058, |
|
"loss": 2.0476, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.03084318045263369, |
|
"grad_norm": 1.2129108905792236, |
|
"learning_rate": 0.00019774829053173526, |
|
"loss": 2.0728, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.031243741237732825, |
|
"grad_norm": 1.1871914863586426, |
|
"learning_rate": 0.00019768083666588953, |
|
"loss": 1.8535, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.031644302022831965, |
|
"grad_norm": 1.6774837970733643, |
|
"learning_rate": 0.00019761239915510302, |
|
"loss": 2.3511, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.032044862807931104, |
|
"grad_norm": 1.4928390979766846, |
|
"learning_rate": 0.00019754297868854073, |
|
"loss": 2.1108, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.032445423593030244, |
|
"grad_norm": 1.1172386407852173, |
|
"learning_rate": 0.00019747257596526593, |
|
"loss": 2.1435, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.032845984378129384, |
|
"grad_norm": 1.5492007732391357, |
|
"learning_rate": 0.00019740119169423337, |
|
"loss": 1.9114, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.03324654516322852, |
|
"grad_norm": 1.2928754091262817, |
|
"learning_rate": 0.00019732882659428177, |
|
"loss": 2.0398, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.033647105948327656, |
|
"grad_norm": 1.22319495677948, |
|
"learning_rate": 0.00019725548139412692, |
|
"loss": 1.6426, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.034047666733426796, |
|
"grad_norm": 1.3174762725830078, |
|
"learning_rate": 0.00019718115683235417, |
|
"loss": 2.0354, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.034448227518525935, |
|
"grad_norm": 0.8051577806472778, |
|
"learning_rate": 0.00019710585365741103, |
|
"loss": 2.3035, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.034848788303625075, |
|
"grad_norm": 1.3085061311721802, |
|
"learning_rate": 0.00019702957262759965, |
|
"loss": 1.8958, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.035249349088724215, |
|
"grad_norm": 1.5627758502960205, |
|
"learning_rate": 0.00019695231451106912, |
|
"loss": 2.0707, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.035649909873823354, |
|
"grad_norm": 1.181349277496338, |
|
"learning_rate": 0.00019687408008580784, |
|
"loss": 1.761, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.036050470658922494, |
|
"grad_norm": 1.2541050910949707, |
|
"learning_rate": 0.00019679487013963564, |
|
"loss": 1.9864, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03645103144402163, |
|
"grad_norm": 1.2387609481811523, |
|
"learning_rate": 0.00019671468547019573, |
|
"loss": 1.912, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.036851592229120766, |
|
"grad_norm": 1.175809621810913, |
|
"learning_rate": 0.00019663352688494684, |
|
"loss": 2.0847, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.037252153014219906, |
|
"grad_norm": 1.1415826082229614, |
|
"learning_rate": 0.0001965513952011551, |
|
"loss": 2.1211, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.037652713799319046, |
|
"grad_norm": 1.2264269590377808, |
|
"learning_rate": 0.0001964682912458856, |
|
"loss": 1.8611, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.038053274584418185, |
|
"grad_norm": 1.2230157852172852, |
|
"learning_rate": 0.00019638421585599423, |
|
"loss": 2.3288, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.038453835369517325, |
|
"grad_norm": 0.8598111271858215, |
|
"learning_rate": 0.00019629916987811926, |
|
"loss": 1.8238, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.038854396154616465, |
|
"grad_norm": 1.5519123077392578, |
|
"learning_rate": 0.00019621315416867274, |
|
"loss": 1.8526, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.039254956939715605, |
|
"grad_norm": 1.3238226175308228, |
|
"learning_rate": 0.0001961261695938319, |
|
"loss": 2.278, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.03965551772481474, |
|
"grad_norm": 1.6846929788589478, |
|
"learning_rate": 0.00019603821702953046, |
|
"loss": 2.0994, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.04005607850991388, |
|
"grad_norm": 1.5321959257125854, |
|
"learning_rate": 0.00019594929736144976, |
|
"loss": 1.965, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04045663929501302, |
|
"grad_norm": 1.0697311162948608, |
|
"learning_rate": 0.00019585941148500985, |
|
"loss": 2.1486, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.040857200080112156, |
|
"grad_norm": 1.4754281044006348, |
|
"learning_rate": 0.00019576856030536054, |
|
"loss": 1.7525, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.041257760865211296, |
|
"grad_norm": 1.2071729898452759, |
|
"learning_rate": 0.00019567674473737218, |
|
"loss": 1.7292, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.041658321650310436, |
|
"grad_norm": 1.14888596534729, |
|
"learning_rate": 0.0001955839657056265, |
|
"loss": 2.0676, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.042058882435409575, |
|
"grad_norm": 1.120998501777649, |
|
"learning_rate": 0.0001954902241444074, |
|
"loss": 1.7143, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.042459443220508715, |
|
"grad_norm": 1.1699857711791992, |
|
"learning_rate": 0.00019539552099769126, |
|
"loss": 2.0488, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.04286000400560785, |
|
"grad_norm": 1.3505367040634155, |
|
"learning_rate": 0.00019529985721913778, |
|
"loss": 2.1923, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.04326056479070699, |
|
"grad_norm": 1.2390555143356323, |
|
"learning_rate": 0.00019520323377208017, |
|
"loss": 2.0017, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.04366112557580613, |
|
"grad_norm": 1.6898435354232788, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 2.0393, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.04406168636090527, |
|
"grad_norm": 1.3708515167236328, |
|
"learning_rate": 0.00019500711177409454, |
|
"loss": 1.6338, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.044462247146004406, |
|
"grad_norm": 1.3160320520401, |
|
"learning_rate": 0.00019490761519811293, |
|
"loss": 1.9489, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.044862807931103546, |
|
"grad_norm": 1.2316290140151978, |
|
"learning_rate": 0.00019480716290349995, |
|
"loss": 1.9994, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.045263368716202686, |
|
"grad_norm": 1.2488354444503784, |
|
"learning_rate": 0.0001947057559018091, |
|
"loss": 1.9349, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.045663929501301825, |
|
"grad_norm": 1.4348840713500977, |
|
"learning_rate": 0.00019460339521420772, |
|
"loss": 1.9112, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.04606449028640096, |
|
"grad_norm": 1.389400839805603, |
|
"learning_rate": 0.00019450008187146684, |
|
"loss": 1.9432, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0464650510715001, |
|
"grad_norm": 1.025496006011963, |
|
"learning_rate": 0.00019439581691395067, |
|
"loss": 1.8639, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.04686561185659924, |
|
"grad_norm": 0.9544436931610107, |
|
"learning_rate": 0.00019429060139160618, |
|
"loss": 2.0917, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.04726617264169838, |
|
"grad_norm": 1.2288682460784912, |
|
"learning_rate": 0.00019418443636395248, |
|
"loss": 1.8996, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.04766673342679752, |
|
"grad_norm": 1.1020634174346924, |
|
"learning_rate": 0.00019407732290007023, |
|
"loss": 2.236, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.048067294211896656, |
|
"grad_norm": 1.2814069986343384, |
|
"learning_rate": 0.00019396926207859084, |
|
"loss": 2.2541, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.048467854996995796, |
|
"grad_norm": 1.3004281520843506, |
|
"learning_rate": 0.00019386025498768558, |
|
"loss": 1.9218, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.048868415782094936, |
|
"grad_norm": 1.1537413597106934, |
|
"learning_rate": 0.00019375030272505463, |
|
"loss": 1.8207, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.04926897656719407, |
|
"grad_norm": 1.1501256227493286, |
|
"learning_rate": 0.00019363940639791606, |
|
"loss": 1.9654, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.04966953735229321, |
|
"grad_norm": 1.511906385421753, |
|
"learning_rate": 0.00019352756712299468, |
|
"loss": 1.9795, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.05007009813739235, |
|
"grad_norm": 1.2695921659469604, |
|
"learning_rate": 0.00019341478602651069, |
|
"loss": 1.8491, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05047065892249149, |
|
"grad_norm": 1.4410191774368286, |
|
"learning_rate": 0.00019330106424416852, |
|
"loss": 1.8925, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.05087121970759063, |
|
"grad_norm": 1.254278302192688, |
|
"learning_rate": 0.00019318640292114524, |
|
"loss": 1.7206, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.05127178049268977, |
|
"grad_norm": 1.2319607734680176, |
|
"learning_rate": 0.00019307080321207912, |
|
"loss": 1.7632, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.05167234127778891, |
|
"grad_norm": 1.116660714149475, |
|
"learning_rate": 0.00019295426628105792, |
|
"loss": 1.8059, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.052072902062888046, |
|
"grad_norm": 1.2714475393295288, |
|
"learning_rate": 0.00019283679330160726, |
|
"loss": 1.868, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05247346284798718, |
|
"grad_norm": 1.2375353574752808, |
|
"learning_rate": 0.00019271838545667876, |
|
"loss": 2.3638, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.05287402363308632, |
|
"grad_norm": 1.0356881618499756, |
|
"learning_rate": 0.00019259904393863802, |
|
"loss": 1.9812, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.05327458441818546, |
|
"grad_norm": 1.3008970022201538, |
|
"learning_rate": 0.00019247876994925292, |
|
"loss": 1.6867, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.0536751452032846, |
|
"grad_norm": 1.1734683513641357, |
|
"learning_rate": 0.0001923575646996811, |
|
"loss": 2.1067, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.05407570598838374, |
|
"grad_norm": 1.2169734239578247, |
|
"learning_rate": 0.00019223542941045817, |
|
"loss": 2.0488, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.05447626677348288, |
|
"grad_norm": 1.3238193988800049, |
|
"learning_rate": 0.000192112365311485, |
|
"loss": 1.8312, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.05487682755858202, |
|
"grad_norm": 1.255581021308899, |
|
"learning_rate": 0.00019198837364201585, |
|
"loss": 1.8126, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.05527738834368116, |
|
"grad_norm": 1.1656538248062134, |
|
"learning_rate": 0.00019186345565064535, |
|
"loss": 1.8241, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.05567794912878029, |
|
"grad_norm": 1.4386628866195679, |
|
"learning_rate": 0.00019173761259529633, |
|
"loss": 1.9037, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.05607850991387943, |
|
"grad_norm": 1.1676979064941406, |
|
"learning_rate": 0.00019161084574320696, |
|
"loss": 1.9797, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.05647907069897857, |
|
"grad_norm": 1.1129257678985596, |
|
"learning_rate": 0.00019148315637091803, |
|
"loss": 1.6362, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.05687963148407771, |
|
"grad_norm": 1.295759677886963, |
|
"learning_rate": 0.0001913545457642601, |
|
"loss": 1.7671, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.05728019226917685, |
|
"grad_norm": 1.3850711584091187, |
|
"learning_rate": 0.00019122501521834053, |
|
"loss": 2.0576, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.05768075305427599, |
|
"grad_norm": 1.1702038049697876, |
|
"learning_rate": 0.0001910945660375305, |
|
"loss": 2.0554, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.05808131383937513, |
|
"grad_norm": 1.0817047357559204, |
|
"learning_rate": 0.00019096319953545185, |
|
"loss": 2.0641, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.05848187462447427, |
|
"grad_norm": 1.2310954332351685, |
|
"learning_rate": 0.0001908309170349637, |
|
"loss": 1.6907, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.0588824354095734, |
|
"grad_norm": 1.5341321229934692, |
|
"learning_rate": 0.00019069771986814947, |
|
"loss": 2.3666, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.05928299619467254, |
|
"grad_norm": 1.2268143892288208, |
|
"learning_rate": 0.0001905636093763031, |
|
"loss": 1.9265, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.05968355697977168, |
|
"grad_norm": 0.9649202823638916, |
|
"learning_rate": 0.00019042858690991574, |
|
"loss": 2.1886, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.06008411776487082, |
|
"grad_norm": 1.3439741134643555, |
|
"learning_rate": 0.00019029265382866214, |
|
"loss": 2.0593, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06048467854996996, |
|
"grad_norm": 1.1927765607833862, |
|
"learning_rate": 0.00019015581150138693, |
|
"loss": 2.1178, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.0608852393350691, |
|
"grad_norm": 0.8950446844100952, |
|
"learning_rate": 0.0001900180613060908, |
|
"loss": 2.2191, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.06128580012016824, |
|
"grad_norm": 1.1860698461532593, |
|
"learning_rate": 0.0001898794046299167, |
|
"loss": 2.0034, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.06168636090526738, |
|
"grad_norm": 1.1506222486495972, |
|
"learning_rate": 0.00018973984286913584, |
|
"loss": 1.901, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.06208692169036651, |
|
"grad_norm": 1.6920007467269897, |
|
"learning_rate": 0.00018959937742913359, |
|
"loss": 1.9474, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.06248748247546565, |
|
"grad_norm": 1.2259491682052612, |
|
"learning_rate": 0.00018945800972439538, |
|
"loss": 2.31, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.0628880432605648, |
|
"grad_norm": 1.2086715698242188, |
|
"learning_rate": 0.0001893157411784924, |
|
"loss": 1.86, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.06328860404566393, |
|
"grad_norm": 1.2207906246185303, |
|
"learning_rate": 0.00018917257322406734, |
|
"loss": 1.8438, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.06368916483076306, |
|
"grad_norm": 1.1944586038589478, |
|
"learning_rate": 0.00018902850730281992, |
|
"loss": 1.8793, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.06408972561586221, |
|
"grad_norm": 1.4343067407608032, |
|
"learning_rate": 0.00018888354486549237, |
|
"loss": 1.9623, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06449028640096134, |
|
"grad_norm": 1.1235885620117188, |
|
"learning_rate": 0.0001887376873718548, |
|
"loss": 2.2875, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.06489084718606049, |
|
"grad_norm": 1.3148598670959473, |
|
"learning_rate": 0.00018859093629069058, |
|
"loss": 1.7892, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.06529140797115962, |
|
"grad_norm": 1.112668514251709, |
|
"learning_rate": 0.00018844329309978145, |
|
"loss": 2.1598, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.06569196875625877, |
|
"grad_norm": 1.1902179718017578, |
|
"learning_rate": 0.00018829475928589271, |
|
"loss": 2.0822, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.0660925295413579, |
|
"grad_norm": 1.3088462352752686, |
|
"learning_rate": 0.00018814533634475822, |
|
"loss": 2.2902, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.06649309032645705, |
|
"grad_norm": 1.047739028930664, |
|
"learning_rate": 0.00018799502578106534, |
|
"loss": 2.1836, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.06689365111155618, |
|
"grad_norm": 1.2094810009002686, |
|
"learning_rate": 0.00018784382910843976, |
|
"loss": 2.0445, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.06729421189665531, |
|
"grad_norm": 1.3738199472427368, |
|
"learning_rate": 0.0001876917478494303, |
|
"loss": 1.7812, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.06769477268175446, |
|
"grad_norm": 1.4020622968673706, |
|
"learning_rate": 0.00018753878353549357, |
|
"loss": 2.2706, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.06809533346685359, |
|
"grad_norm": 1.0181434154510498, |
|
"learning_rate": 0.00018738493770697852, |
|
"loss": 1.759, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06849589425195274, |
|
"grad_norm": 1.2207024097442627, |
|
"learning_rate": 0.0001872302119131109, |
|
"loss": 1.9571, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.06889645503705187, |
|
"grad_norm": 0.9442883729934692, |
|
"learning_rate": 0.00018707460771197774, |
|
"loss": 2.1751, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.06929701582215102, |
|
"grad_norm": 1.2098711729049683, |
|
"learning_rate": 0.00018691812667051162, |
|
"loss": 1.952, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.06969757660725015, |
|
"grad_norm": 1.3631396293640137, |
|
"learning_rate": 0.00018676077036447494, |
|
"loss": 2.0478, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.07009813739234928, |
|
"grad_norm": 1.0731444358825684, |
|
"learning_rate": 0.00018660254037844388, |
|
"loss": 1.8923, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.07049869817744843, |
|
"grad_norm": 1.225913166999817, |
|
"learning_rate": 0.0001864434383057927, |
|
"loss": 2.1391, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.07089925896254756, |
|
"grad_norm": 1.1960365772247314, |
|
"learning_rate": 0.00018628346574867745, |
|
"loss": 2.2535, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.07129981974764671, |
|
"grad_norm": 1.1839686632156372, |
|
"learning_rate": 0.00018612262431802007, |
|
"loss": 2.2424, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.07170038053274584, |
|
"grad_norm": 1.1837133169174194, |
|
"learning_rate": 0.00018596091563349192, |
|
"loss": 1.9497, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.07210094131784499, |
|
"grad_norm": 1.3902225494384766, |
|
"learning_rate": 0.00018579834132349772, |
|
"loss": 2.0473, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.07250150210294412, |
|
"grad_norm": 1.141578197479248, |
|
"learning_rate": 0.0001856349030251589, |
|
"loss": 1.6572, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.07290206288804325, |
|
"grad_norm": 1.1828128099441528, |
|
"learning_rate": 0.00018547060238429736, |
|
"loss": 1.958, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.0733026236731424, |
|
"grad_norm": 1.3398916721343994, |
|
"learning_rate": 0.00018530544105541872, |
|
"loss": 2.0078, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.07370318445824153, |
|
"grad_norm": 1.2211058139801025, |
|
"learning_rate": 0.0001851394207016957, |
|
"loss": 1.9945, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.07410374524334068, |
|
"grad_norm": 1.2373664379119873, |
|
"learning_rate": 0.00018497254299495146, |
|
"loss": 1.8566, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.07450430602843981, |
|
"grad_norm": 1.216086983680725, |
|
"learning_rate": 0.0001848048096156426, |
|
"loss": 1.7227, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.07490486681353896, |
|
"grad_norm": 1.2878079414367676, |
|
"learning_rate": 0.00018463622225284242, |
|
"loss": 2.0206, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.07530542759863809, |
|
"grad_norm": 0.9321051239967346, |
|
"learning_rate": 0.00018446678260422385, |
|
"loss": 1.784, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.07570598838373724, |
|
"grad_norm": 1.0686324834823608, |
|
"learning_rate": 0.00018429649237604217, |
|
"loss": 1.8121, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.07610654916883637, |
|
"grad_norm": 1.2810065746307373, |
|
"learning_rate": 0.00018412535328311814, |
|
"loss": 1.9002, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0765071099539355, |
|
"grad_norm": 1.3205995559692383, |
|
"learning_rate": 0.0001839533670488205, |
|
"loss": 2.1796, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.07690767073903465, |
|
"grad_norm": 1.2351480722427368, |
|
"learning_rate": 0.00018378053540504873, |
|
"loss": 1.8489, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.07730823152413378, |
|
"grad_norm": 1.2140512466430664, |
|
"learning_rate": 0.0001836068600922156, |
|
"loss": 1.9828, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.07770879230923293, |
|
"grad_norm": 1.4807522296905518, |
|
"learning_rate": 0.00018343234285922953, |
|
"loss": 1.6552, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.07810935309433206, |
|
"grad_norm": 1.23876953125, |
|
"learning_rate": 0.00018325698546347715, |
|
"loss": 2.0277, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.07850991387943121, |
|
"grad_norm": 1.4449162483215332, |
|
"learning_rate": 0.00018308078967080546, |
|
"loss": 1.6708, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.07891047466453034, |
|
"grad_norm": 1.2765625715255737, |
|
"learning_rate": 0.00018290375725550417, |
|
"loss": 1.8713, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.07931103544962947, |
|
"grad_norm": 1.3653018474578857, |
|
"learning_rate": 0.00018272589000028772, |
|
"loss": 1.7254, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.07971159623472862, |
|
"grad_norm": 1.4352061748504639, |
|
"learning_rate": 0.0001825471896962774, |
|
"loss": 2.1107, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.08011215701982775, |
|
"grad_norm": 1.2856611013412476, |
|
"learning_rate": 0.0001823676581429833, |
|
"loss": 2.0722, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0805127178049269, |
|
"grad_norm": 1.2037805318832397, |
|
"learning_rate": 0.00018218729714828612, |
|
"loss": 1.8017, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.08091327859002603, |
|
"grad_norm": 1.1805696487426758, |
|
"learning_rate": 0.00018200610852841913, |
|
"loss": 1.8137, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.08131383937512518, |
|
"grad_norm": 1.0390084981918335, |
|
"learning_rate": 0.00018182409410794968, |
|
"loss": 2.0199, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.08171440016022431, |
|
"grad_norm": 1.1457184553146362, |
|
"learning_rate": 0.00018164125571976098, |
|
"loss": 1.8555, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.08211496094532346, |
|
"grad_norm": 1.3365423679351807, |
|
"learning_rate": 0.00018145759520503358, |
|
"loss": 2.2639, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.08251552173042259, |
|
"grad_norm": 1.3933526277542114, |
|
"learning_rate": 0.0001812731144132268, |
|
"loss": 1.5607, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.08291608251552172, |
|
"grad_norm": 1.458027720451355, |
|
"learning_rate": 0.0001810878152020602, |
|
"loss": 2.2164, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.08331664330062087, |
|
"grad_norm": 1.6003340482711792, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 1.9723, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.08371720408572, |
|
"grad_norm": 0.9654092788696289, |
|
"learning_rate": 0.00018071476899371414, |
|
"loss": 2.3965, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.08411776487081915, |
|
"grad_norm": 1.0213390588760376, |
|
"learning_rate": 0.00018052702575310588, |
|
"loss": 2.2219, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.08451832565591828, |
|
"grad_norm": 1.5746159553527832, |
|
"learning_rate": 0.00018033847160624225, |
|
"loss": 1.9594, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.08491888644101743, |
|
"grad_norm": 1.3370170593261719, |
|
"learning_rate": 0.00018014910845186153, |
|
"loss": 1.9862, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.08531944722611656, |
|
"grad_norm": 1.2249865531921387, |
|
"learning_rate": 0.0001799589381968485, |
|
"loss": 2.0159, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.0857200080112157, |
|
"grad_norm": 1.3740154504776, |
|
"learning_rate": 0.00017976796275621555, |
|
"loss": 2.1776, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.08612056879631484, |
|
"grad_norm": 1.5516133308410645, |
|
"learning_rate": 0.00017957618405308324, |
|
"loss": 1.917, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.08652112958141397, |
|
"grad_norm": 1.3436651229858398, |
|
"learning_rate": 0.00017938360401866093, |
|
"loss": 2.1363, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.08692169036651312, |
|
"grad_norm": 1.111444115638733, |
|
"learning_rate": 0.00017919022459222752, |
|
"loss": 2.0363, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.08732225115161225, |
|
"grad_norm": 1.0461078882217407, |
|
"learning_rate": 0.00017899604772111163, |
|
"loss": 2.0568, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.0877228119367114, |
|
"grad_norm": 1.086348533630371, |
|
"learning_rate": 0.00017880107536067218, |
|
"loss": 2.2919, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.08812337272181053, |
|
"grad_norm": 1.2152503728866577, |
|
"learning_rate": 0.00017860530947427875, |
|
"loss": 2.0589, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.08852393350690968, |
|
"grad_norm": 1.3051732778549194, |
|
"learning_rate": 0.0001784087520332916, |
|
"loss": 2.1461, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.08892449429200881, |
|
"grad_norm": 1.0947463512420654, |
|
"learning_rate": 0.00017821140501704194, |
|
"loss": 1.9385, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.08932505507710795, |
|
"grad_norm": 1.056276559829712, |
|
"learning_rate": 0.00017801327041281207, |
|
"loss": 2.5425, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.08972561586220709, |
|
"grad_norm": 1.5695077180862427, |
|
"learning_rate": 0.00017781435021581527, |
|
"loss": 2.1143, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.09012617664730622, |
|
"grad_norm": 1.2888416051864624, |
|
"learning_rate": 0.0001776146464291757, |
|
"loss": 2.0032, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.09052673743240537, |
|
"grad_norm": 1.258169412612915, |
|
"learning_rate": 0.00017741416106390826, |
|
"loss": 2.2619, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.0909272982175045, |
|
"grad_norm": 1.469509482383728, |
|
"learning_rate": 0.00017721289613889835, |
|
"loss": 1.8764, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.09132785900260365, |
|
"grad_norm": 1.2821156978607178, |
|
"learning_rate": 0.00017701085368088156, |
|
"loss": 2.0395, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.09172841978770278, |
|
"grad_norm": 1.2879642248153687, |
|
"learning_rate": 0.00017680803572442318, |
|
"loss": 2.1896, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.09212898057280192, |
|
"grad_norm": 1.5035954713821411, |
|
"learning_rate": 0.0001766044443118978, |
|
"loss": 2.0904, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.09252954135790106, |
|
"grad_norm": 1.167608618736267, |
|
"learning_rate": 0.00017640008149346866, |
|
"loss": 2.0146, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.0929301021430002, |
|
"grad_norm": 1.177674651145935, |
|
"learning_rate": 0.0001761949493270671, |
|
"loss": 1.9045, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.09333066292809934, |
|
"grad_norm": 1.1968990564346313, |
|
"learning_rate": 0.0001759890498783717, |
|
"loss": 1.8802, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.09373122371319847, |
|
"grad_norm": 1.1479514837265015, |
|
"learning_rate": 0.0001757823852207877, |
|
"loss": 1.9292, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.09413178449829762, |
|
"grad_norm": 1.2045358419418335, |
|
"learning_rate": 0.00017557495743542585, |
|
"loss": 1.9526, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.09453234528339675, |
|
"grad_norm": 1.094698190689087, |
|
"learning_rate": 0.00017536676861108164, |
|
"loss": 1.8112, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.0949329060684959, |
|
"grad_norm": 1.5539491176605225, |
|
"learning_rate": 0.00017515782084421427, |
|
"loss": 2.2441, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.09533346685359503, |
|
"grad_norm": 1.2758251428604126, |
|
"learning_rate": 0.0001749481162389254, |
|
"loss": 1.6361, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.09573402763869417, |
|
"grad_norm": 0.9369722604751587, |
|
"learning_rate": 0.0001747376569069381, |
|
"loss": 1.8394, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.09613458842379331, |
|
"grad_norm": 1.2912318706512451, |
|
"learning_rate": 0.0001745264449675755, |
|
"loss": 1.9176, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.09653514920889245, |
|
"grad_norm": 1.3255847692489624, |
|
"learning_rate": 0.00017431448254773944, |
|
"loss": 2.5196, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.09693570999399159, |
|
"grad_norm": 1.3182979822158813, |
|
"learning_rate": 0.00017410177178188918, |
|
"loss": 2.0764, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.09733627077909073, |
|
"grad_norm": 1.131363034248352, |
|
"learning_rate": 0.00017388831481201977, |
|
"loss": 1.4795, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.09773683156418987, |
|
"grad_norm": 1.3598371744155884, |
|
"learning_rate": 0.0001736741137876405, |
|
"loss": 1.9917, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.098137392349289, |
|
"grad_norm": 1.2983320951461792, |
|
"learning_rate": 0.00017345917086575332, |
|
"loss": 1.8847, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.09853795313438814, |
|
"grad_norm": 1.3627434968948364, |
|
"learning_rate": 0.0001732434882108311, |
|
"loss": 1.9239, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.09893851391948728, |
|
"grad_norm": 1.4476374387741089, |
|
"learning_rate": 0.00017302706799479574, |
|
"loss": 1.9497, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.09933907470458642, |
|
"grad_norm": 1.002682089805603, |
|
"learning_rate": 0.00017280991239699642, |
|
"loss": 2.3343, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.09973963548968556, |
|
"grad_norm": 1.120917558670044, |
|
"learning_rate": 0.00017259202360418762, |
|
"loss": 1.9538, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.1001401962747847, |
|
"grad_norm": 1.4410195350646973, |
|
"learning_rate": 0.00017237340381050703, |
|
"loss": 2.1967, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1001401962747847, |
|
"eval_loss": 1.95481538772583, |
|
"eval_runtime": 32.8702, |
|
"eval_samples_per_second": 32.005, |
|
"eval_steps_per_second": 16.002, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.10054075705988384, |
|
"grad_norm": 1.3339630365371704, |
|
"learning_rate": 0.00017215405521745357, |
|
"loss": 2.1258, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.10094131784498298, |
|
"grad_norm": 1.1717379093170166, |
|
"learning_rate": 0.0001719339800338651, |
|
"loss": 2.0449, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.10134187863008212, |
|
"grad_norm": 1.1957095861434937, |
|
"learning_rate": 0.00017171318047589637, |
|
"loss": 1.9656, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.10174243941518125, |
|
"grad_norm": 1.501266598701477, |
|
"learning_rate": 0.00017149165876699635, |
|
"loss": 2.459, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.10214300020028039, |
|
"grad_norm": 0.8971200585365295, |
|
"learning_rate": 0.00017126941713788632, |
|
"loss": 1.7243, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.10254356098537953, |
|
"grad_norm": 1.4776593446731567, |
|
"learning_rate": 0.0001710464578265369, |
|
"loss": 2.0676, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.10294412177047867, |
|
"grad_norm": 0.9212047457695007, |
|
"learning_rate": 0.00017082278307814592, |
|
"loss": 1.9708, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.10334468255557781, |
|
"grad_norm": 0.9782827496528625, |
|
"learning_rate": 0.00017059839514511565, |
|
"loss": 1.8311, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.10374524334067695, |
|
"grad_norm": 1.4289268255233765, |
|
"learning_rate": 0.00017037329628703004, |
|
"loss": 2.2642, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.10414580412577609, |
|
"grad_norm": 1.0863878726959229, |
|
"learning_rate": 0.00017014748877063214, |
|
"loss": 1.8184, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.10454636491087523, |
|
"grad_norm": 1.4105286598205566, |
|
"learning_rate": 0.00016992097486980107, |
|
"loss": 1.7869, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.10494692569597436, |
|
"grad_norm": 1.217506766319275, |
|
"learning_rate": 0.00016969375686552937, |
|
"loss": 2.188, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.1053474864810735, |
|
"grad_norm": 1.2353211641311646, |
|
"learning_rate": 0.00016946583704589973, |
|
"loss": 1.9721, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.10574804726617264, |
|
"grad_norm": 0.9625990390777588, |
|
"learning_rate": 0.00016923721770606228, |
|
"loss": 1.6519, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.10614860805127178, |
|
"grad_norm": 1.0968215465545654, |
|
"learning_rate": 0.00016900790114821122, |
|
"loss": 2.0124, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.10654916883637092, |
|
"grad_norm": 1.0629442930221558, |
|
"learning_rate": 0.0001687778896815617, |
|
"loss": 2.3657, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.10694972962147006, |
|
"grad_norm": 1.5048670768737793, |
|
"learning_rate": 0.00016854718562232668, |
|
"loss": 2.0883, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.1073502904065692, |
|
"grad_norm": 1.4587311744689941, |
|
"learning_rate": 0.00016831579129369346, |
|
"loss": 1.8842, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.10775085119166834, |
|
"grad_norm": 1.3808192014694214, |
|
"learning_rate": 0.00016808370902580036, |
|
"loss": 1.9145, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.10815141197676748, |
|
"grad_norm": 1.2429652214050293, |
|
"learning_rate": 0.00016785094115571322, |
|
"loss": 2.3074, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.10855197276186661, |
|
"grad_norm": 1.3593460321426392, |
|
"learning_rate": 0.00016761749002740193, |
|
"loss": 1.8074, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.10895253354696575, |
|
"grad_norm": 1.303536057472229, |
|
"learning_rate": 0.00016738335799171682, |
|
"loss": 1.9934, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.10935309433206489, |
|
"grad_norm": 0.9886314272880554, |
|
"learning_rate": 0.00016714854740636478, |
|
"loss": 2.0478, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.10975365511716403, |
|
"grad_norm": 1.2797132730484009, |
|
"learning_rate": 0.00016691306063588583, |
|
"loss": 1.6133, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.11015421590226317, |
|
"grad_norm": 0.9066633582115173, |
|
"learning_rate": 0.00016667690005162916, |
|
"loss": 2.1399, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.11055477668736231, |
|
"grad_norm": 1.3959397077560425, |
|
"learning_rate": 0.00016644006803172924, |
|
"loss": 2.0478, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.11095533747246145, |
|
"grad_norm": 1.2920677661895752, |
|
"learning_rate": 0.00016620256696108188, |
|
"loss": 2.0572, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.11135589825756058, |
|
"grad_norm": 1.048725962638855, |
|
"learning_rate": 0.00016596439923132017, |
|
"loss": 2.1255, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.11175645904265973, |
|
"grad_norm": 1.2046992778778076, |
|
"learning_rate": 0.00016572556724079056, |
|
"loss": 2.0455, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.11215701982775886, |
|
"grad_norm": 1.0558348894119263, |
|
"learning_rate": 0.00016548607339452853, |
|
"loss": 1.8228, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.112557580612858, |
|
"grad_norm": 1.1831914186477661, |
|
"learning_rate": 0.00016524592010423443, |
|
"loss": 1.7431, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.11295814139795714, |
|
"grad_norm": 1.540031909942627, |
|
"learning_rate": 0.00016500510978824926, |
|
"loss": 2.0272, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.11335870218305628, |
|
"grad_norm": 1.2096210718154907, |
|
"learning_rate": 0.00016476364487153023, |
|
"loss": 1.77, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.11375926296815542, |
|
"grad_norm": 0.9515264630317688, |
|
"learning_rate": 0.0001645215277856263, |
|
"loss": 1.9224, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.11415982375325455, |
|
"grad_norm": 0.9934387803077698, |
|
"learning_rate": 0.00016427876096865394, |
|
"loss": 2.0196, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.1145603845383537, |
|
"grad_norm": 1.2015880346298218, |
|
"learning_rate": 0.00016403534686527225, |
|
"loss": 1.6666, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.11496094532345283, |
|
"grad_norm": 1.052933931350708, |
|
"learning_rate": 0.00016379128792665855, |
|
"loss": 2.2045, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.11536150610855198, |
|
"grad_norm": 1.562372088432312, |
|
"learning_rate": 0.00016354658661048364, |
|
"loss": 2.3048, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.11576206689365111, |
|
"grad_norm": 1.3387340307235718, |
|
"learning_rate": 0.00016330124538088705, |
|
"loss": 2.1904, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.11616262767875025, |
|
"grad_norm": 0.9825554490089417, |
|
"learning_rate": 0.00016305526670845226, |
|
"loss": 2.1263, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.11656318846384939, |
|
"grad_norm": 1.253036379814148, |
|
"learning_rate": 0.00016280865307018177, |
|
"loss": 1.7718, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.11696374924894853, |
|
"grad_norm": 1.3797401189804077, |
|
"learning_rate": 0.00016256140694947217, |
|
"loss": 1.8047, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.11736431003404767, |
|
"grad_norm": 1.1455391645431519, |
|
"learning_rate": 0.00016231353083608912, |
|
"loss": 2.0973, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.1177648708191468, |
|
"grad_norm": 1.6505376100540161, |
|
"learning_rate": 0.00016206502722614238, |
|
"loss": 2.0752, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.11816543160424595, |
|
"grad_norm": 1.3806008100509644, |
|
"learning_rate": 0.00016181589862206052, |
|
"loss": 2.0264, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.11856599238934508, |
|
"grad_norm": 1.1952394247055054, |
|
"learning_rate": 0.0001615661475325658, |
|
"loss": 2.4044, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.11896655317444423, |
|
"grad_norm": 1.2005327939987183, |
|
"learning_rate": 0.00016131577647264902, |
|
"loss": 1.9808, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.11936711395954336, |
|
"grad_norm": 1.0058878660202026, |
|
"learning_rate": 0.00016106478796354382, |
|
"loss": 2.0834, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.1197676747446425, |
|
"grad_norm": 1.2255983352661133, |
|
"learning_rate": 0.0001608131845327018, |
|
"loss": 1.7139, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.12016823552974164, |
|
"grad_norm": 1.3495612144470215, |
|
"learning_rate": 0.00016056096871376667, |
|
"loss": 1.6639, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12056879631484077, |
|
"grad_norm": 1.1183319091796875, |
|
"learning_rate": 0.00016030814304654895, |
|
"loss": 1.9833, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.12096935709993992, |
|
"grad_norm": 1.3301992416381836, |
|
"learning_rate": 0.00016005471007700031, |
|
"loss": 1.7918, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.12136991788503905, |
|
"grad_norm": 1.4026867151260376, |
|
"learning_rate": 0.00015980067235718792, |
|
"loss": 2.1528, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.1217704786701382, |
|
"grad_norm": 1.2631279230117798, |
|
"learning_rate": 0.0001595460324452688, |
|
"loss": 2.1449, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.12217103945523733, |
|
"grad_norm": 1.7133634090423584, |
|
"learning_rate": 0.00015929079290546408, |
|
"loss": 2.0834, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.12257160024033648, |
|
"grad_norm": 0.9135451912879944, |
|
"learning_rate": 0.000159034956308033, |
|
"loss": 2.4581, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.12297216102543561, |
|
"grad_norm": 1.2533658742904663, |
|
"learning_rate": 0.00015877852522924732, |
|
"loss": 1.9334, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.12337272181053476, |
|
"grad_norm": 1.1332594156265259, |
|
"learning_rate": 0.00015852150225136518, |
|
"loss": 2.1089, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.12377328259563389, |
|
"grad_norm": 1.5345416069030762, |
|
"learning_rate": 0.00015826388996260503, |
|
"loss": 2.0917, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.12417384338073302, |
|
"grad_norm": 0.9528497457504272, |
|
"learning_rate": 0.00015800569095711982, |
|
"loss": 2.2036, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.12457440416583217, |
|
"grad_norm": 1.1167405843734741, |
|
"learning_rate": 0.00015774690783497067, |
|
"loss": 2.1452, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.1249749649509313, |
|
"grad_norm": 1.2491390705108643, |
|
"learning_rate": 0.00015748754320210072, |
|
"loss": 1.85, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.12537552573603045, |
|
"grad_norm": 0.943801760673523, |
|
"learning_rate": 0.00015722759967030898, |
|
"loss": 1.8039, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.1257760865211296, |
|
"grad_norm": 1.1968666315078735, |
|
"learning_rate": 0.0001569670798572239, |
|
"loss": 1.8249, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.1261766473062287, |
|
"grad_norm": 0.8715935349464417, |
|
"learning_rate": 0.00015670598638627706, |
|
"loss": 1.9182, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.12657720809132786, |
|
"grad_norm": 1.3957470655441284, |
|
"learning_rate": 0.00015644432188667695, |
|
"loss": 2.0424, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.126977768876427, |
|
"grad_norm": 1.0456676483154297, |
|
"learning_rate": 0.00015618208899338202, |
|
"loss": 1.821, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.12737832966152612, |
|
"grad_norm": 0.8687244057655334, |
|
"learning_rate": 0.0001559192903470747, |
|
"loss": 2.2916, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.12777889044662527, |
|
"grad_norm": 1.1597154140472412, |
|
"learning_rate": 0.0001556559285941344, |
|
"loss": 2.2943, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.12817945123172442, |
|
"grad_norm": 1.4827574491500854, |
|
"learning_rate": 0.00015539200638661104, |
|
"loss": 2.2057, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.12858001201682356, |
|
"grad_norm": 1.2822664976119995, |
|
"learning_rate": 0.00015512752638219835, |
|
"loss": 1.9975, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.12898057280192268, |
|
"grad_norm": 1.2990927696228027, |
|
"learning_rate": 0.000154862491244207, |
|
"loss": 1.7885, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.12938113358702183, |
|
"grad_norm": 1.2612892389297485, |
|
"learning_rate": 0.0001545969036415379, |
|
"loss": 2.1861, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.12978169437212098, |
|
"grad_norm": 0.9105194211006165, |
|
"learning_rate": 0.00015433076624865531, |
|
"loss": 2.0607, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.1301822551572201, |
|
"grad_norm": 1.3368383646011353, |
|
"learning_rate": 0.00015406408174555976, |
|
"loss": 2.0913, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.13058281594231924, |
|
"grad_norm": 0.7987350225448608, |
|
"learning_rate": 0.00015379685281776125, |
|
"loss": 2.355, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.1309833767274184, |
|
"grad_norm": 1.3039599657058716, |
|
"learning_rate": 0.00015352908215625214, |
|
"loss": 2.1028, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.13138393751251753, |
|
"grad_norm": 1.3268717527389526, |
|
"learning_rate": 0.00015326077245747999, |
|
"loss": 1.7859, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.13178449829761665, |
|
"grad_norm": 1.2853844165802002, |
|
"learning_rate": 0.0001529919264233205, |
|
"loss": 1.7469, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.1321850590827158, |
|
"grad_norm": 1.4058549404144287, |
|
"learning_rate": 0.00015272254676105025, |
|
"loss": 1.9602, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.13258561986781495, |
|
"grad_norm": 1.1850024461746216, |
|
"learning_rate": 0.00015245263618331945, |
|
"loss": 2.2856, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.1329861806529141, |
|
"grad_norm": 1.2287150621414185, |
|
"learning_rate": 0.0001521821974081246, |
|
"loss": 2.0198, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.1333867414380132, |
|
"grad_norm": 0.8819483518600464, |
|
"learning_rate": 0.00015191123315878123, |
|
"loss": 2.1921, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.13378730222311236, |
|
"grad_norm": 1.0964730978012085, |
|
"learning_rate": 0.0001516397461638962, |
|
"loss": 1.7096, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.1341878630082115, |
|
"grad_norm": 0.9685081839561462, |
|
"learning_rate": 0.00015136773915734066, |
|
"loss": 1.8356, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.13458842379331062, |
|
"grad_norm": 1.3063654899597168, |
|
"learning_rate": 0.00015109521487822206, |
|
"loss": 1.7442, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.13498898457840977, |
|
"grad_norm": 1.117842674255371, |
|
"learning_rate": 0.00015082217607085692, |
|
"loss": 2.0806, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.13538954536350892, |
|
"grad_norm": 1.071869969367981, |
|
"learning_rate": 0.000150548625484743, |
|
"loss": 1.6267, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.13579010614860806, |
|
"grad_norm": 1.2108855247497559, |
|
"learning_rate": 0.0001502745658745316, |
|
"loss": 1.9425, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.13619066693370718, |
|
"grad_norm": 1.4408931732177734, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 2.3276, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.13659122771880633, |
|
"grad_norm": 1.092643141746521, |
|
"learning_rate": 0.00014972493062602354, |
|
"loss": 1.6824, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.13699178850390548, |
|
"grad_norm": 1.1907880306243896, |
|
"learning_rate": 0.0001494493605225477, |
|
"loss": 2.1771, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.1373923492890046, |
|
"grad_norm": 1.0870776176452637, |
|
"learning_rate": 0.0001491732924645604, |
|
"loss": 2.1814, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.13779291007410374, |
|
"grad_norm": 1.4029686450958252, |
|
"learning_rate": 0.0001488967292320639, |
|
"loss": 1.882, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.1381934708592029, |
|
"grad_norm": 0.9258529543876648, |
|
"learning_rate": 0.00014861967361004687, |
|
"loss": 2.2565, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.13859403164430203, |
|
"grad_norm": 1.3441503047943115, |
|
"learning_rate": 0.00014834212838845637, |
|
"loss": 1.9838, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.13899459242940115, |
|
"grad_norm": 1.1867705583572388, |
|
"learning_rate": 0.00014806409636216973, |
|
"loss": 1.6701, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.1393951532145003, |
|
"grad_norm": 1.2288343906402588, |
|
"learning_rate": 0.00014778558033096633, |
|
"loss": 1.932, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.13979571399959945, |
|
"grad_norm": 1.100757360458374, |
|
"learning_rate": 0.0001475065830994995, |
|
"loss": 1.6942, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.14019627478469857, |
|
"grad_norm": 0.8742533326148987, |
|
"learning_rate": 0.0001472271074772683, |
|
"loss": 1.8398, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1405968355697977, |
|
"grad_norm": 1.299513578414917, |
|
"learning_rate": 0.00014694715627858908, |
|
"loss": 1.5597, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.14099739635489686, |
|
"grad_norm": 1.0255597829818726, |
|
"learning_rate": 0.00014666673232256738, |
|
"loss": 2.1195, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.141397957139996, |
|
"grad_norm": 0.9807868003845215, |
|
"learning_rate": 0.00014638583843306927, |
|
"loss": 2.0172, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.14179851792509512, |
|
"grad_norm": 1.029623031616211, |
|
"learning_rate": 0.00014610447743869314, |
|
"loss": 2.1941, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.14219907871019427, |
|
"grad_norm": 1.2617048025131226, |
|
"learning_rate": 0.00014582265217274104, |
|
"loss": 2.0539, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.14259963949529342, |
|
"grad_norm": 1.2233887910842896, |
|
"learning_rate": 0.00014554036547319033, |
|
"loss": 2.1597, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.14300020028039254, |
|
"grad_norm": 1.778652548789978, |
|
"learning_rate": 0.00014525762018266483, |
|
"loss": 2.6006, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.14340076106549168, |
|
"grad_norm": 0.983271062374115, |
|
"learning_rate": 0.0001449744191484066, |
|
"loss": 1.764, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.14380132185059083, |
|
"grad_norm": 1.3344851732254028, |
|
"learning_rate": 0.0001446907652222468, |
|
"loss": 1.8017, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.14420188263568998, |
|
"grad_norm": 1.0751646757125854, |
|
"learning_rate": 0.00014440666126057744, |
|
"loss": 2.0915, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1446024434207891, |
|
"grad_norm": 1.1397452354431152, |
|
"learning_rate": 0.00014412211012432212, |
|
"loss": 2.232, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.14500300420588824, |
|
"grad_norm": 1.3193897008895874, |
|
"learning_rate": 0.00014383711467890774, |
|
"loss": 2.4251, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.1454035649909874, |
|
"grad_norm": 1.5369534492492676, |
|
"learning_rate": 0.00014355167779423524, |
|
"loss": 2.1641, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.1458041257760865, |
|
"grad_norm": 1.2363543510437012, |
|
"learning_rate": 0.00014326580234465085, |
|
"loss": 2.2259, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.14620468656118565, |
|
"grad_norm": 1.2943087816238403, |
|
"learning_rate": 0.00014297949120891718, |
|
"loss": 2.0529, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.1466052473462848, |
|
"grad_norm": 1.405510663986206, |
|
"learning_rate": 0.0001426927472701842, |
|
"loss": 1.7095, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.14700580813138395, |
|
"grad_norm": 1.560726284980774, |
|
"learning_rate": 0.00014240557341596018, |
|
"loss": 1.8077, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.14740636891648307, |
|
"grad_norm": 1.2849904298782349, |
|
"learning_rate": 0.00014211797253808268, |
|
"loss": 1.9614, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.1478069297015822, |
|
"grad_norm": 1.205696940422058, |
|
"learning_rate": 0.00014182994753268927, |
|
"loss": 2.1995, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.14820749048668136, |
|
"grad_norm": 1.301679253578186, |
|
"learning_rate": 0.00014154150130018866, |
|
"loss": 2.0094, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1486080512717805, |
|
"grad_norm": 1.511448860168457, |
|
"learning_rate": 0.00014125263674523114, |
|
"loss": 1.9612, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.14900861205687962, |
|
"grad_norm": 1.176255226135254, |
|
"learning_rate": 0.00014096335677667954, |
|
"loss": 1.6863, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.14940917284197877, |
|
"grad_norm": 1.1221837997436523, |
|
"learning_rate": 0.00014067366430758004, |
|
"loss": 2.4452, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.14980973362707792, |
|
"grad_norm": 1.1222666501998901, |
|
"learning_rate": 0.00014038356225513248, |
|
"loss": 1.7168, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.15021029441217704, |
|
"grad_norm": 1.5494662523269653, |
|
"learning_rate": 0.00014009305354066137, |
|
"loss": 1.9266, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.15061085519727618, |
|
"grad_norm": 1.2402633428573608, |
|
"learning_rate": 0.00013980214108958624, |
|
"loss": 1.8818, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.15101141598237533, |
|
"grad_norm": 1.3550876379013062, |
|
"learning_rate": 0.0001395108278313922, |
|
"loss": 1.6726, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.15141197676747448, |
|
"grad_norm": 0.9974124431610107, |
|
"learning_rate": 0.00013921911669960055, |
|
"loss": 2.3311, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.1518125375525736, |
|
"grad_norm": 1.5511209964752197, |
|
"learning_rate": 0.00013892701063173918, |
|
"loss": 1.7031, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.15221309833767274, |
|
"grad_norm": 1.1340442895889282, |
|
"learning_rate": 0.00013863451256931287, |
|
"loss": 1.9426, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1526136591227719, |
|
"grad_norm": 1.4769186973571777, |
|
"learning_rate": 0.00013834162545777395, |
|
"loss": 1.9275, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.153014219907871, |
|
"grad_norm": 1.0888420343399048, |
|
"learning_rate": 0.0001380483522464923, |
|
"loss": 2.2289, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.15341478069297015, |
|
"grad_norm": 1.482541799545288, |
|
"learning_rate": 0.000137754695888726, |
|
"loss": 1.8827, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.1538153414780693, |
|
"grad_norm": 1.3306676149368286, |
|
"learning_rate": 0.00013746065934159123, |
|
"loss": 2.2292, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.15421590226316845, |
|
"grad_norm": 1.2816635370254517, |
|
"learning_rate": 0.00013716624556603274, |
|
"loss": 1.9847, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.15461646304826757, |
|
"grad_norm": 1.5046172142028809, |
|
"learning_rate": 0.0001368714575267941, |
|
"loss": 2.0836, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.1550170238333667, |
|
"grad_norm": 0.8114765882492065, |
|
"learning_rate": 0.00013657629819238746, |
|
"loss": 1.9699, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.15541758461846586, |
|
"grad_norm": 1.2298461198806763, |
|
"learning_rate": 0.0001362807705350641, |
|
"loss": 2.0324, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.15581814540356498, |
|
"grad_norm": 0.8663463592529297, |
|
"learning_rate": 0.00013598487753078425, |
|
"loss": 1.9759, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.15621870618866412, |
|
"grad_norm": 1.268539309501648, |
|
"learning_rate": 0.00013568862215918717, |
|
"loss": 1.7154, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.15661926697376327, |
|
"grad_norm": 1.3727500438690186, |
|
"learning_rate": 0.00013539200740356118, |
|
"loss": 1.9043, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.15701982775886242, |
|
"grad_norm": 1.1378331184387207, |
|
"learning_rate": 0.00013509503625081358, |
|
"loss": 1.7391, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.15742038854396154, |
|
"grad_norm": 1.4207416772842407, |
|
"learning_rate": 0.0001347977116914405, |
|
"loss": 2.0419, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.15782094932906068, |
|
"grad_norm": 1.185951828956604, |
|
"learning_rate": 0.00013450003671949706, |
|
"loss": 2.2408, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.15822151011415983, |
|
"grad_norm": 1.1980212926864624, |
|
"learning_rate": 0.00013420201433256689, |
|
"loss": 2.0313, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.15862207089925895, |
|
"grad_norm": 1.2082370519638062, |
|
"learning_rate": 0.00013390364753173206, |
|
"loss": 2.6664, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.1590226316843581, |
|
"grad_norm": 1.188747525215149, |
|
"learning_rate": 0.00013360493932154302, |
|
"loss": 1.9586, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.15942319246945724, |
|
"grad_norm": 1.4110333919525146, |
|
"learning_rate": 0.00013330589270998808, |
|
"loss": 2.0768, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.1598237532545564, |
|
"grad_norm": 1.0672770738601685, |
|
"learning_rate": 0.00013300651070846333, |
|
"loss": 1.8173, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.1602243140396555, |
|
"grad_norm": 1.4332971572875977, |
|
"learning_rate": 0.00013270679633174218, |
|
"loss": 2.2275, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.16062487482475465, |
|
"grad_norm": 1.1819405555725098, |
|
"learning_rate": 0.00013240675259794507, |
|
"loss": 1.949, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.1610254356098538, |
|
"grad_norm": 1.5723814964294434, |
|
"learning_rate": 0.00013210638252850908, |
|
"loss": 1.8285, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.16142599639495295, |
|
"grad_norm": 1.25686776638031, |
|
"learning_rate": 0.00013180568914815752, |
|
"loss": 1.5269, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.16182655718005207, |
|
"grad_norm": 0.9914525151252747, |
|
"learning_rate": 0.0001315046754848693, |
|
"loss": 1.7253, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.1622271179651512, |
|
"grad_norm": 1.1965891122817993, |
|
"learning_rate": 0.0001312033445698487, |
|
"loss": 1.9786, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.16262767875025036, |
|
"grad_norm": 1.2011706829071045, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 1.7513, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.16302823953534948, |
|
"grad_norm": 1.2285467386245728, |
|
"learning_rate": 0.00013059974312537053, |
|
"loss": 2.1979, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.16342880032044863, |
|
"grad_norm": 1.1085333824157715, |
|
"learning_rate": 0.00013029747867417276, |
|
"loss": 2.4344, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.16382936110554777, |
|
"grad_norm": 1.0605233907699585, |
|
"learning_rate": 0.00012999490912770107, |
|
"loss": 1.9061, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.16422992189064692, |
|
"grad_norm": 1.0893604755401611, |
|
"learning_rate": 0.0001296920375328275, |
|
"loss": 2.0987, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.16463048267574604, |
|
"grad_norm": 1.1180847883224487, |
|
"learning_rate": 0.0001293888669394656, |
|
"loss": 1.7212, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.16503104346084518, |
|
"grad_norm": 1.000549077987671, |
|
"learning_rate": 0.0001290854004005399, |
|
"loss": 1.8383, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.16543160424594433, |
|
"grad_norm": 1.3807919025421143, |
|
"learning_rate": 0.0001287816409719551, |
|
"loss": 2.2079, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.16583216503104345, |
|
"grad_norm": 0.9979914426803589, |
|
"learning_rate": 0.00012847759171256523, |
|
"loss": 2.0307, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.1662327258161426, |
|
"grad_norm": 1.1787306070327759, |
|
"learning_rate": 0.00012817325568414297, |
|
"loss": 1.9271, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.16663328660124174, |
|
"grad_norm": 1.1426745653152466, |
|
"learning_rate": 0.0001278686359513488, |
|
"loss": 2.2186, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.1670338473863409, |
|
"grad_norm": 1.1593252420425415, |
|
"learning_rate": 0.0001275637355816999, |
|
"loss": 1.8586, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.16743440817144, |
|
"grad_norm": 1.3415766954421997, |
|
"learning_rate": 0.0001272585576455398, |
|
"loss": 2.0948, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.16783496895653915, |
|
"grad_norm": 1.2215737104415894, |
|
"learning_rate": 0.0001269531052160068, |
|
"loss": 2.1967, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.1682355297416383, |
|
"grad_norm": 0.9574311375617981, |
|
"learning_rate": 0.00012664738136900348, |
|
"loss": 1.7765, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.16863609052673742, |
|
"grad_norm": 1.449532151222229, |
|
"learning_rate": 0.00012634138918316568, |
|
"loss": 2.0815, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.16903665131183657, |
|
"grad_norm": 1.1745095252990723, |
|
"learning_rate": 0.0001260351317398312, |
|
"loss": 1.9517, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.1694372120969357, |
|
"grad_norm": 1.4526337385177612, |
|
"learning_rate": 0.00012572861212300918, |
|
"loss": 2.3156, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.16983777288203486, |
|
"grad_norm": 1.08524751663208, |
|
"learning_rate": 0.00012542183341934872, |
|
"loss": 1.7148, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.17023833366713398, |
|
"grad_norm": 1.2243317365646362, |
|
"learning_rate": 0.0001251147987181079, |
|
"loss": 1.8422, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.17063889445223313, |
|
"grad_norm": 1.26092529296875, |
|
"learning_rate": 0.0001248075111111229, |
|
"loss": 1.7454, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.17103945523733227, |
|
"grad_norm": 1.1354914903640747, |
|
"learning_rate": 0.0001244999736927764, |
|
"loss": 1.9414, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.1714400160224314, |
|
"grad_norm": 1.1014437675476074, |
|
"learning_rate": 0.00012419218955996676, |
|
"loss": 2.2449, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.17184057680753054, |
|
"grad_norm": 0.9806610941886902, |
|
"learning_rate": 0.0001238841618120769, |
|
"loss": 1.9453, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.17224113759262968, |
|
"grad_norm": 1.0223315954208374, |
|
"learning_rate": 0.00012357589355094275, |
|
"loss": 1.9353, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.17264169837772883, |
|
"grad_norm": 1.0702928304672241, |
|
"learning_rate": 0.00012326738788082223, |
|
"loss": 2.0656, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.17304225916282795, |
|
"grad_norm": 1.4158331155776978, |
|
"learning_rate": 0.0001229586479083641, |
|
"loss": 2.2464, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.1734428199479271, |
|
"grad_norm": 1.3966501951217651, |
|
"learning_rate": 0.00012264967674257646, |
|
"loss": 1.6745, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.17384338073302624, |
|
"grad_norm": 1.1722941398620605, |
|
"learning_rate": 0.00012234047749479544, |
|
"loss": 1.8942, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.1742439415181254, |
|
"grad_norm": 1.2961608171463013, |
|
"learning_rate": 0.00012203105327865407, |
|
"loss": 1.5352, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.1746445023032245, |
|
"grad_norm": 1.529359221458435, |
|
"learning_rate": 0.00012172140721005079, |
|
"loss": 1.8913, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.17504506308832365, |
|
"grad_norm": 1.3748160600662231, |
|
"learning_rate": 0.00012141154240711805, |
|
"loss": 1.8422, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.1754456238734228, |
|
"grad_norm": 1.4000550508499146, |
|
"learning_rate": 0.000121101461990191, |
|
"loss": 2.3207, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.17584618465852192, |
|
"grad_norm": 1.587672472000122, |
|
"learning_rate": 0.00012079116908177593, |
|
"loss": 2.0086, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.17624674544362107, |
|
"grad_norm": 0.9797202348709106, |
|
"learning_rate": 0.00012048066680651908, |
|
"loss": 1.6867, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.1766473062287202, |
|
"grad_norm": 1.153014063835144, |
|
"learning_rate": 0.00012016995829117488, |
|
"loss": 1.8881, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.17704786701381936, |
|
"grad_norm": 1.408564805984497, |
|
"learning_rate": 0.00011985904666457455, |
|
"loss": 2.1962, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.17744842779891848, |
|
"grad_norm": 1.1497104167938232, |
|
"learning_rate": 0.00011954793505759483, |
|
"loss": 1.8189, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.17784898858401763, |
|
"grad_norm": 0.9838416576385498, |
|
"learning_rate": 0.00011923662660312611, |
|
"loss": 2.0812, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.17824954936911677, |
|
"grad_norm": 1.144004464149475, |
|
"learning_rate": 0.00011892512443604102, |
|
"loss": 1.9226, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.1786501101542159, |
|
"grad_norm": 1.2132439613342285, |
|
"learning_rate": 0.00011861343169316301, |
|
"loss": 2.155, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.17905067093931504, |
|
"grad_norm": 1.1353342533111572, |
|
"learning_rate": 0.00011830155151323446, |
|
"loss": 2.1583, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.17945123172441418, |
|
"grad_norm": 1.1002765893936157, |
|
"learning_rate": 0.00011798948703688539, |
|
"loss": 1.6904, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.17985179250951333, |
|
"grad_norm": 1.1224271059036255, |
|
"learning_rate": 0.00011767724140660157, |
|
"loss": 1.7376, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.18025235329461245, |
|
"grad_norm": 1.2441977262496948, |
|
"learning_rate": 0.00011736481776669306, |
|
"loss": 2.3133, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1806529140797116, |
|
"grad_norm": 0.9376555681228638, |
|
"learning_rate": 0.0001170522192632624, |
|
"loss": 1.7441, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.18105347486481074, |
|
"grad_norm": 1.1824123859405518, |
|
"learning_rate": 0.00011673944904417308, |
|
"loss": 1.8746, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.18145403564990986, |
|
"grad_norm": 1.1003307104110718, |
|
"learning_rate": 0.00011642651025901772, |
|
"loss": 1.7693, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.181854596435009, |
|
"grad_norm": 1.5860931873321533, |
|
"learning_rate": 0.00011611340605908642, |
|
"loss": 2.4127, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.18225515722010815, |
|
"grad_norm": 1.332067608833313, |
|
"learning_rate": 0.000115800139597335, |
|
"loss": 1.9801, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.1826557180052073, |
|
"grad_norm": 1.4606534242630005, |
|
"learning_rate": 0.00011548671402835325, |
|
"loss": 1.9786, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.18305627879030642, |
|
"grad_norm": 1.221954345703125, |
|
"learning_rate": 0.00011517313250833317, |
|
"loss": 1.7502, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.18345683957540557, |
|
"grad_norm": 1.2147043943405151, |
|
"learning_rate": 0.00011485939819503717, |
|
"loss": 1.8452, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.1838574003605047, |
|
"grad_norm": 0.9856990575790405, |
|
"learning_rate": 0.00011454551424776637, |
|
"loss": 2.2753, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.18425796114560383, |
|
"grad_norm": 1.21888267993927, |
|
"learning_rate": 0.00011423148382732853, |
|
"loss": 1.6949, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.18465852193070298, |
|
"grad_norm": 1.1283800601959229, |
|
"learning_rate": 0.00011391731009600654, |
|
"loss": 2.1301, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.18505908271580213, |
|
"grad_norm": 1.1772786378860474, |
|
"learning_rate": 0.00011360299621752644, |
|
"loss": 1.8366, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.18545964350090127, |
|
"grad_norm": 1.0780948400497437, |
|
"learning_rate": 0.00011328854535702543, |
|
"loss": 1.4893, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.1858602042860004, |
|
"grad_norm": 1.0804064273834229, |
|
"learning_rate": 0.00011297396068102017, |
|
"loss": 1.9614, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.18626076507109954, |
|
"grad_norm": 0.8003168106079102, |
|
"learning_rate": 0.00011265924535737493, |
|
"loss": 1.6996, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.18666132585619868, |
|
"grad_norm": 1.1143856048583984, |
|
"learning_rate": 0.00011234440255526948, |
|
"loss": 1.8898, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.1870618866412978, |
|
"grad_norm": 1.1231998205184937, |
|
"learning_rate": 0.00011202943544516736, |
|
"loss": 1.9706, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.18746244742639695, |
|
"grad_norm": 1.4141852855682373, |
|
"learning_rate": 0.00011171434719878384, |
|
"loss": 1.7378, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.1878630082114961, |
|
"grad_norm": 1.2133177518844604, |
|
"learning_rate": 0.00011139914098905406, |
|
"loss": 1.9353, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.18826356899659524, |
|
"grad_norm": 1.365179419517517, |
|
"learning_rate": 0.00011108381999010111, |
|
"loss": 2.4052, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.18866412978169436, |
|
"grad_norm": 1.2176018953323364, |
|
"learning_rate": 0.00011076838737720392, |
|
"loss": 1.7026, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.1890646905667935, |
|
"grad_norm": 1.1604026556015015, |
|
"learning_rate": 0.00011045284632676536, |
|
"loss": 1.9168, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.18946525135189266, |
|
"grad_norm": 1.3729729652404785, |
|
"learning_rate": 0.00011013720001628035, |
|
"loss": 2.1561, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.1898658121369918, |
|
"grad_norm": 1.2137882709503174, |
|
"learning_rate": 0.00010982145162430373, |
|
"loss": 2.3844, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.19026637292209092, |
|
"grad_norm": 0.9215346574783325, |
|
"learning_rate": 0.00010950560433041826, |
|
"loss": 1.8599, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.19066693370719007, |
|
"grad_norm": 1.2497369050979614, |
|
"learning_rate": 0.00010918966131520277, |
|
"loss": 1.829, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.1910674944922892, |
|
"grad_norm": 1.2438724040985107, |
|
"learning_rate": 0.00010887362576019981, |
|
"loss": 1.907, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.19146805527738833, |
|
"grad_norm": 0.7997929453849792, |
|
"learning_rate": 0.00010855750084788398, |
|
"loss": 1.8881, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.19186861606248748, |
|
"grad_norm": 1.239675760269165, |
|
"learning_rate": 0.00010824128976162964, |
|
"loss": 2.1791, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.19226917684758663, |
|
"grad_norm": 1.1958426237106323, |
|
"learning_rate": 0.00010792499568567884, |
|
"loss": 2.1612, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.19266973763268577, |
|
"grad_norm": 1.2139252424240112, |
|
"learning_rate": 0.00010760862180510951, |
|
"loss": 1.7926, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.1930702984177849, |
|
"grad_norm": 1.43398118019104, |
|
"learning_rate": 0.0001072921713058031, |
|
"loss": 1.7103, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.19347085920288404, |
|
"grad_norm": 0.9710477590560913, |
|
"learning_rate": 0.00010697564737441252, |
|
"loss": 1.8695, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.19387141998798318, |
|
"grad_norm": 1.897813320159912, |
|
"learning_rate": 0.00010665905319833041, |
|
"loss": 2.1622, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.1942719807730823, |
|
"grad_norm": 1.0615317821502686, |
|
"learning_rate": 0.00010634239196565646, |
|
"loss": 2.4186, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.19467254155818145, |
|
"grad_norm": 1.06027090549469, |
|
"learning_rate": 0.00010602566686516586, |
|
"loss": 2.3492, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.1950731023432806, |
|
"grad_norm": 1.3258793354034424, |
|
"learning_rate": 0.00010570888108627681, |
|
"loss": 2.0268, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.19547366312837974, |
|
"grad_norm": 1.344404935836792, |
|
"learning_rate": 0.00010539203781901861, |
|
"loss": 2.1251, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.19587422391347886, |
|
"grad_norm": 1.5554450750350952, |
|
"learning_rate": 0.00010507514025399943, |
|
"loss": 1.6521, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.196274784698578, |
|
"grad_norm": 1.1709057092666626, |
|
"learning_rate": 0.00010475819158237425, |
|
"loss": 1.9683, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.19667534548367716, |
|
"grad_norm": 1.4332846403121948, |
|
"learning_rate": 0.00010444119499581261, |
|
"loss": 1.8998, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.19707590626877627, |
|
"grad_norm": 1.3177927732467651, |
|
"learning_rate": 0.00010412415368646673, |
|
"loss": 1.6543, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.19747646705387542, |
|
"grad_norm": 1.3712493181228638, |
|
"learning_rate": 0.00010380707084693901, |
|
"loss": 1.995, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.19787702783897457, |
|
"grad_norm": 1.2591536045074463, |
|
"learning_rate": 0.00010348994967025012, |
|
"loss": 1.8751, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.19827758862407371, |
|
"grad_norm": 1.3137924671173096, |
|
"learning_rate": 0.00010317279334980678, |
|
"loss": 1.8033, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.19867814940917283, |
|
"grad_norm": 1.0903714895248413, |
|
"learning_rate": 0.00010285560507936961, |
|
"loss": 1.7871, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.19907871019427198, |
|
"grad_norm": 1.3193492889404297, |
|
"learning_rate": 0.00010253838805302104, |
|
"loss": 2.0395, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.19947927097937113, |
|
"grad_norm": 1.2793503999710083, |
|
"learning_rate": 0.00010222114546513295, |
|
"loss": 2.0782, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.19987983176447024, |
|
"grad_norm": 1.292874813079834, |
|
"learning_rate": 0.00010190388051033466, |
|
"loss": 1.8531, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.2002803925495694, |
|
"grad_norm": 1.136939525604248, |
|
"learning_rate": 0.00010158659638348081, |
|
"loss": 2.0028, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2002803925495694, |
|
"eval_loss": 1.9258522987365723, |
|
"eval_runtime": 32.6934, |
|
"eval_samples_per_second": 32.178, |
|
"eval_steps_per_second": 16.089, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.20068095333466854, |
|
"grad_norm": 1.1813311576843262, |
|
"learning_rate": 0.00010126929627961896, |
|
"loss": 1.8187, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.20108151411976768, |
|
"grad_norm": 1.4287338256835938, |
|
"learning_rate": 0.00010095198339395769, |
|
"loss": 2.1428, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.2014820749048668, |
|
"grad_norm": 1.4133274555206299, |
|
"learning_rate": 0.0001006346609218342, |
|
"loss": 2.0651, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.20188263568996595, |
|
"grad_norm": 1.1729075908660889, |
|
"learning_rate": 0.00010031733205868224, |
|
"loss": 1.9114, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.2022831964750651, |
|
"grad_norm": 1.2264689207077026, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7613, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.20268375726016424, |
|
"grad_norm": 1.221653938293457, |
|
"learning_rate": 9.968266794131777e-05, |
|
"loss": 1.9397, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.20308431804526336, |
|
"grad_norm": 1.4370172023773193, |
|
"learning_rate": 9.936533907816584e-05, |
|
"loss": 1.7342, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.2034848788303625, |
|
"grad_norm": 1.2867745161056519, |
|
"learning_rate": 9.904801660604234e-05, |
|
"loss": 2.0417, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.20388543961546166, |
|
"grad_norm": 1.123342514038086, |
|
"learning_rate": 9.873070372038105e-05, |
|
"loss": 2.0177, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.20428600040056077, |
|
"grad_norm": 1.1308317184448242, |
|
"learning_rate": 9.84134036165192e-05, |
|
"loss": 1.6625, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.20468656118565992, |
|
"grad_norm": 1.1609257459640503, |
|
"learning_rate": 9.809611948966533e-05, |
|
"loss": 1.7069, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.20508712197075907, |
|
"grad_norm": 1.5887484550476074, |
|
"learning_rate": 9.777885453486706e-05, |
|
"loss": 1.9354, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.20548768275585821, |
|
"grad_norm": 1.1795055866241455, |
|
"learning_rate": 9.746161194697895e-05, |
|
"loss": 1.9799, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.20588824354095733, |
|
"grad_norm": 1.4398356676101685, |
|
"learning_rate": 9.71443949206304e-05, |
|
"loss": 2.0568, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.20628880432605648, |
|
"grad_norm": 1.0796585083007812, |
|
"learning_rate": 9.682720665019325e-05, |
|
"loss": 1.9268, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.20668936511115563, |
|
"grad_norm": 1.221709132194519, |
|
"learning_rate": 9.651005032974994e-05, |
|
"loss": 1.894, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.20708992589625475, |
|
"grad_norm": 1.182438850402832, |
|
"learning_rate": 9.619292915306101e-05, |
|
"loss": 2.5103, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.2074904866813539, |
|
"grad_norm": 1.056098222732544, |
|
"learning_rate": 9.587584631353329e-05, |
|
"loss": 1.6194, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.20789104746645304, |
|
"grad_norm": 1.1654489040374756, |
|
"learning_rate": 9.55588050041874e-05, |
|
"loss": 2.2766, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.20829160825155218, |
|
"grad_norm": 1.1800388097763062, |
|
"learning_rate": 9.524180841762577e-05, |
|
"loss": 1.8447, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.2086921690366513, |
|
"grad_norm": 1.13406503200531, |
|
"learning_rate": 9.492485974600059e-05, |
|
"loss": 2.1673, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.20909272982175045, |
|
"grad_norm": 1.5622755289077759, |
|
"learning_rate": 9.460796218098143e-05, |
|
"loss": 1.8394, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.2094932906068496, |
|
"grad_norm": 0.9142590165138245, |
|
"learning_rate": 9.42911189137232e-05, |
|
"loss": 2.2018, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.20989385139194872, |
|
"grad_norm": 0.903015673160553, |
|
"learning_rate": 9.397433313483416e-05, |
|
"loss": 2.114, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.21029441217704786, |
|
"grad_norm": 0.8409507870674133, |
|
"learning_rate": 9.365760803434355e-05, |
|
"loss": 2.3348, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.210694972962147, |
|
"grad_norm": 0.9404178261756897, |
|
"learning_rate": 9.334094680166962e-05, |
|
"loss": 2.0019, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.21109553374724616, |
|
"grad_norm": 1.1843013763427734, |
|
"learning_rate": 9.302435262558747e-05, |
|
"loss": 2.2249, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.21149609453234527, |
|
"grad_norm": 1.0555171966552734, |
|
"learning_rate": 9.270782869419694e-05, |
|
"loss": 2.0795, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.21189665531744442, |
|
"grad_norm": 1.2614086866378784, |
|
"learning_rate": 9.239137819489047e-05, |
|
"loss": 1.9405, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.21229721610254357, |
|
"grad_norm": 1.073330044746399, |
|
"learning_rate": 9.207500431432115e-05, |
|
"loss": 1.8541, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.2126977768876427, |
|
"grad_norm": 1.0380101203918457, |
|
"learning_rate": 9.175871023837042e-05, |
|
"loss": 2.0226, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.21309833767274183, |
|
"grad_norm": 1.1019260883331299, |
|
"learning_rate": 9.144249915211605e-05, |
|
"loss": 1.926, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.21349889845784098, |
|
"grad_norm": 0.8795924782752991, |
|
"learning_rate": 9.112637423980021e-05, |
|
"loss": 2.1929, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.21389945924294013, |
|
"grad_norm": 1.0507018566131592, |
|
"learning_rate": 9.081033868479727e-05, |
|
"loss": 2.4226, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.21430002002803925, |
|
"grad_norm": 1.4842091798782349, |
|
"learning_rate": 9.049439566958175e-05, |
|
"loss": 2.2066, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.2147005808131384, |
|
"grad_norm": 1.1068452596664429, |
|
"learning_rate": 9.01785483756963e-05, |
|
"loss": 1.7091, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.21510114159823754, |
|
"grad_norm": 1.0645016431808472, |
|
"learning_rate": 8.986279998371966e-05, |
|
"loss": 1.7829, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.21550170238333669, |
|
"grad_norm": 1.3148471117019653, |
|
"learning_rate": 8.954715367323468e-05, |
|
"loss": 1.7944, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.2159022631684358, |
|
"grad_norm": 1.3371472358703613, |
|
"learning_rate": 8.92316126227961e-05, |
|
"loss": 1.8282, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.21630282395353495, |
|
"grad_norm": 1.3502963781356812, |
|
"learning_rate": 8.891618000989891e-05, |
|
"loss": 1.7832, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.2167033847386341, |
|
"grad_norm": 1.118548035621643, |
|
"learning_rate": 8.860085901094595e-05, |
|
"loss": 1.8526, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.21710394552373322, |
|
"grad_norm": 1.3419970273971558, |
|
"learning_rate": 8.828565280121617e-05, |
|
"loss": 2.0795, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.21750450630883236, |
|
"grad_norm": 1.457339882850647, |
|
"learning_rate": 8.797056455483266e-05, |
|
"loss": 1.9845, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.2179050670939315, |
|
"grad_norm": 1.106742024421692, |
|
"learning_rate": 8.765559744473053e-05, |
|
"loss": 1.6308, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.21830562787903066, |
|
"grad_norm": 1.377302885055542, |
|
"learning_rate": 8.734075464262507e-05, |
|
"loss": 2.0252, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.21870618866412977, |
|
"grad_norm": 1.283003807067871, |
|
"learning_rate": 8.702603931897982e-05, |
|
"loss": 1.7937, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.21910674944922892, |
|
"grad_norm": 1.376645803451538, |
|
"learning_rate": 8.67114546429746e-05, |
|
"loss": 2.0404, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.21950731023432807, |
|
"grad_norm": 1.074034571647644, |
|
"learning_rate": 8.639700378247361e-05, |
|
"loss": 1.896, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.2199078710194272, |
|
"grad_norm": 1.4838452339172363, |
|
"learning_rate": 8.608268990399349e-05, |
|
"loss": 1.7861, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.22030843180452633, |
|
"grad_norm": 0.964583158493042, |
|
"learning_rate": 8.57685161726715e-05, |
|
"loss": 2.0498, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.22070899258962548, |
|
"grad_norm": 1.6249324083328247, |
|
"learning_rate": 8.545448575223368e-05, |
|
"loss": 2.1178, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.22110955337472463, |
|
"grad_norm": 1.135626196861267, |
|
"learning_rate": 8.514060180496285e-05, |
|
"loss": 2.0566, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.22151011415982375, |
|
"grad_norm": 1.0067975521087646, |
|
"learning_rate": 8.482686749166686e-05, |
|
"loss": 2.0416, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.2219106749449229, |
|
"grad_norm": 1.3207542896270752, |
|
"learning_rate": 8.451328597164679e-05, |
|
"loss": 2.0966, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.22231123573002204, |
|
"grad_norm": 1.0738505125045776, |
|
"learning_rate": 8.4199860402665e-05, |
|
"loss": 1.8144, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.22271179651512116, |
|
"grad_norm": 1.1242963075637817, |
|
"learning_rate": 8.38865939409136e-05, |
|
"loss": 2.0862, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.2231123573002203, |
|
"grad_norm": 1.2468533515930176, |
|
"learning_rate": 8.357348974098231e-05, |
|
"loss": 2.3297, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.22351291808531945, |
|
"grad_norm": 1.1325228214263916, |
|
"learning_rate": 8.326055095582694e-05, |
|
"loss": 1.9616, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.2239134788704186, |
|
"grad_norm": 1.0064939260482788, |
|
"learning_rate": 8.294778073673762e-05, |
|
"loss": 1.8283, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.22431403965551772, |
|
"grad_norm": 1.3716778755187988, |
|
"learning_rate": 8.263518223330697e-05, |
|
"loss": 2.1096, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.22471460044061686, |
|
"grad_norm": 1.0072152614593506, |
|
"learning_rate": 8.232275859339841e-05, |
|
"loss": 2.2765, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.225115161225716, |
|
"grad_norm": 1.425732135772705, |
|
"learning_rate": 8.201051296311462e-05, |
|
"loss": 2.0824, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.22551572201081513, |
|
"grad_norm": 1.3738203048706055, |
|
"learning_rate": 8.169844848676554e-05, |
|
"loss": 1.8802, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.22591628279591427, |
|
"grad_norm": 1.0857584476470947, |
|
"learning_rate": 8.1386568306837e-05, |
|
"loss": 1.5459, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.22631684358101342, |
|
"grad_norm": 1.4477343559265137, |
|
"learning_rate": 8.107487556395901e-05, |
|
"loss": 1.5841, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.22671740436611257, |
|
"grad_norm": 1.384574294090271, |
|
"learning_rate": 8.076337339687394e-05, |
|
"loss": 1.6207, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.2271179651512117, |
|
"grad_norm": 1.2884398698806763, |
|
"learning_rate": 8.045206494240521e-05, |
|
"loss": 2.282, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.22751852593631083, |
|
"grad_norm": 1.4047175645828247, |
|
"learning_rate": 8.014095333542548e-05, |
|
"loss": 1.8804, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.22791908672140998, |
|
"grad_norm": 1.2710424661636353, |
|
"learning_rate": 7.983004170882518e-05, |
|
"loss": 1.8136, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.2283196475065091, |
|
"grad_norm": 0.9432646036148071, |
|
"learning_rate": 7.951933319348095e-05, |
|
"loss": 1.728, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.22872020829160825, |
|
"grad_norm": 1.2438580989837646, |
|
"learning_rate": 7.920883091822408e-05, |
|
"loss": 1.6693, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.2291207690767074, |
|
"grad_norm": 1.2749730348587036, |
|
"learning_rate": 7.889853800980904e-05, |
|
"loss": 1.9896, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.22952132986180654, |
|
"grad_norm": 1.1855494976043701, |
|
"learning_rate": 7.858845759288198e-05, |
|
"loss": 1.9782, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.22992189064690566, |
|
"grad_norm": 1.0309183597564697, |
|
"learning_rate": 7.827859278994925e-05, |
|
"loss": 2.2048, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.2303224514320048, |
|
"grad_norm": 1.5033015012741089, |
|
"learning_rate": 7.796894672134594e-05, |
|
"loss": 1.8239, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.23072301221710395, |
|
"grad_norm": 1.5223544836044312, |
|
"learning_rate": 7.765952250520459e-05, |
|
"loss": 2.2928, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.2311235730022031, |
|
"grad_norm": 1.1659106016159058, |
|
"learning_rate": 7.735032325742355e-05, |
|
"loss": 2.1531, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.23152413378730222, |
|
"grad_norm": 1.3849096298217773, |
|
"learning_rate": 7.704135209163589e-05, |
|
"loss": 1.8511, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.23192469457240136, |
|
"grad_norm": 1.0666840076446533, |
|
"learning_rate": 7.673261211917776e-05, |
|
"loss": 1.6983, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.2323252553575005, |
|
"grad_norm": 1.1079344749450684, |
|
"learning_rate": 7.642410644905726e-05, |
|
"loss": 1.7489, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.23272581614259963, |
|
"grad_norm": 1.0207995176315308, |
|
"learning_rate": 7.611583818792311e-05, |
|
"loss": 1.5285, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.23312637692769878, |
|
"grad_norm": 1.23712158203125, |
|
"learning_rate": 7.580781044003324e-05, |
|
"loss": 1.8596, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.23352693771279792, |
|
"grad_norm": 1.4562586545944214, |
|
"learning_rate": 7.550002630722366e-05, |
|
"loss": 2.0293, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.23392749849789707, |
|
"grad_norm": 1.3066940307617188, |
|
"learning_rate": 7.519248888887716e-05, |
|
"loss": 2.2537, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.2343280592829962, |
|
"grad_norm": 1.1663978099822998, |
|
"learning_rate": 7.488520128189209e-05, |
|
"loss": 2.3066, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.23472862006809533, |
|
"grad_norm": 1.2235394716262817, |
|
"learning_rate": 7.457816658065134e-05, |
|
"loss": 2.4259, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.23512918085319448, |
|
"grad_norm": 1.4642695188522339, |
|
"learning_rate": 7.427138787699086e-05, |
|
"loss": 2.106, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.2355297416382936, |
|
"grad_norm": 1.1755717992782593, |
|
"learning_rate": 7.39648682601688e-05, |
|
"loss": 1.8165, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.23593030242339275, |
|
"grad_norm": 1.2367725372314453, |
|
"learning_rate": 7.365861081683433e-05, |
|
"loss": 2.2718, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.2363308632084919, |
|
"grad_norm": 1.0610246658325195, |
|
"learning_rate": 7.335261863099651e-05, |
|
"loss": 2.0477, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.23673142399359104, |
|
"grad_norm": 1.3351134061813354, |
|
"learning_rate": 7.304689478399323e-05, |
|
"loss": 2.0486, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.23713198477869016, |
|
"grad_norm": 1.0682804584503174, |
|
"learning_rate": 7.274144235446023e-05, |
|
"loss": 2.0759, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.2375325455637893, |
|
"grad_norm": 1.245468020439148, |
|
"learning_rate": 7.243626441830009e-05, |
|
"loss": 2.0305, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.23793310634888845, |
|
"grad_norm": 1.3784598112106323, |
|
"learning_rate": 7.213136404865124e-05, |
|
"loss": 2.1201, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.23833366713398757, |
|
"grad_norm": 1.378495693206787, |
|
"learning_rate": 7.182674431585704e-05, |
|
"loss": 1.9075, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.23873422791908672, |
|
"grad_norm": 1.284844994544983, |
|
"learning_rate": 7.152240828743477e-05, |
|
"loss": 2.0786, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.23913478870418586, |
|
"grad_norm": 1.0804252624511719, |
|
"learning_rate": 7.12183590280449e-05, |
|
"loss": 2.1232, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.239535349489285, |
|
"grad_norm": 1.2147008180618286, |
|
"learning_rate": 7.09145995994601e-05, |
|
"loss": 1.9367, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.23993591027438413, |
|
"grad_norm": 1.0713460445404053, |
|
"learning_rate": 7.061113306053443e-05, |
|
"loss": 1.8186, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.24033647105948328, |
|
"grad_norm": 1.237535834312439, |
|
"learning_rate": 7.030796246717255e-05, |
|
"loss": 2.1206, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.24073703184458242, |
|
"grad_norm": 1.4354087114334106, |
|
"learning_rate": 7.000509087229895e-05, |
|
"loss": 2.1741, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.24113759262968154, |
|
"grad_norm": 1.2553120851516724, |
|
"learning_rate": 6.970252132582728e-05, |
|
"loss": 1.8268, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.2415381534147807, |
|
"grad_norm": 1.192887783050537, |
|
"learning_rate": 6.940025687462952e-05, |
|
"loss": 1.6772, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.24193871419987983, |
|
"grad_norm": 1.409682035446167, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 2.1226, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.24233927498497898, |
|
"grad_norm": 1.2333297729492188, |
|
"learning_rate": 6.87966554301513e-05, |
|
"loss": 2.201, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.2427398357700781, |
|
"grad_norm": 1.374571442604065, |
|
"learning_rate": 6.849532451513074e-05, |
|
"loss": 1.7252, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.24314039655517725, |
|
"grad_norm": 1.368024230003357, |
|
"learning_rate": 6.819431085184251e-05, |
|
"loss": 1.9868, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.2435409573402764, |
|
"grad_norm": 1.25948965549469, |
|
"learning_rate": 6.789361747149093e-05, |
|
"loss": 1.5042, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.24394151812537554, |
|
"grad_norm": 0.9880972504615784, |
|
"learning_rate": 6.759324740205495e-05, |
|
"loss": 1.8699, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.24434207891047466, |
|
"grad_norm": 1.016918420791626, |
|
"learning_rate": 6.729320366825784e-05, |
|
"loss": 1.7774, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.2447426396955738, |
|
"grad_norm": 1.3218151330947876, |
|
"learning_rate": 6.699348929153668e-05, |
|
"loss": 2.0521, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.24514320048067295, |
|
"grad_norm": 1.3361955881118774, |
|
"learning_rate": 6.669410729001193e-05, |
|
"loss": 1.9939, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.24554376126577207, |
|
"grad_norm": 1.083174705505371, |
|
"learning_rate": 6.639506067845697e-05, |
|
"loss": 1.9702, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.24594432205087122, |
|
"grad_norm": 1.1985284090042114, |
|
"learning_rate": 6.609635246826794e-05, |
|
"loss": 1.3102, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.24634488283597036, |
|
"grad_norm": 0.9790166020393372, |
|
"learning_rate": 6.579798566743314e-05, |
|
"loss": 2.3672, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.2467454436210695, |
|
"grad_norm": 1.3507696390151978, |
|
"learning_rate": 6.549996328050296e-05, |
|
"loss": 2.1707, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.24714600440616863, |
|
"grad_norm": 1.266715168952942, |
|
"learning_rate": 6.52022883085595e-05, |
|
"loss": 2.0666, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.24754656519126778, |
|
"grad_norm": 1.0168043375015259, |
|
"learning_rate": 6.490496374918647e-05, |
|
"loss": 2.1277, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.24794712597636692, |
|
"grad_norm": 0.8772782683372498, |
|
"learning_rate": 6.460799259643884e-05, |
|
"loss": 2.0311, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.24834768676146604, |
|
"grad_norm": 1.4147906303405762, |
|
"learning_rate": 6.431137784081282e-05, |
|
"loss": 2.038, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2487482475465652, |
|
"grad_norm": 1.2074676752090454, |
|
"learning_rate": 6.401512246921576e-05, |
|
"loss": 2.0425, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.24914880833166433, |
|
"grad_norm": 1.3896255493164062, |
|
"learning_rate": 6.371922946493591e-05, |
|
"loss": 1.9922, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.24954936911676348, |
|
"grad_norm": 1.2395154237747192, |
|
"learning_rate": 6.342370180761256e-05, |
|
"loss": 1.7281, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.2499499299018626, |
|
"grad_norm": 1.0640504360198975, |
|
"learning_rate": 6.312854247320595e-05, |
|
"loss": 1.9935, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.25035049068696175, |
|
"grad_norm": 1.1218020915985107, |
|
"learning_rate": 6.283375443396726e-05, |
|
"loss": 2.1524, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.2507510514720609, |
|
"grad_norm": 1.1880977153778076, |
|
"learning_rate": 6.25393406584088e-05, |
|
"loss": 2.1081, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.25115161225716004, |
|
"grad_norm": 1.4129986763000488, |
|
"learning_rate": 6.224530411127403e-05, |
|
"loss": 1.6092, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.2515521730422592, |
|
"grad_norm": 0.9830234050750732, |
|
"learning_rate": 6.19516477535077e-05, |
|
"loss": 2.0331, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.2519527338273583, |
|
"grad_norm": 0.9556760787963867, |
|
"learning_rate": 6.165837454222608e-05, |
|
"loss": 1.9718, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.2523532946124574, |
|
"grad_norm": 1.383091926574707, |
|
"learning_rate": 6.136548743068713e-05, |
|
"loss": 1.8912, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.25275385539755657, |
|
"grad_norm": 1.5158681869506836, |
|
"learning_rate": 6.107298936826086e-05, |
|
"loss": 2.0875, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.2531544161826557, |
|
"grad_norm": 1.7562272548675537, |
|
"learning_rate": 6.078088330039945e-05, |
|
"loss": 2.3275, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.25355497696775486, |
|
"grad_norm": 0.8706815838813782, |
|
"learning_rate": 6.048917216860781e-05, |
|
"loss": 2.0608, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.253955537752854, |
|
"grad_norm": 1.0561360120773315, |
|
"learning_rate": 6.019785891041381e-05, |
|
"loss": 1.7948, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.25435609853795316, |
|
"grad_norm": 1.234235405921936, |
|
"learning_rate": 5.9906946459338656e-05, |
|
"loss": 1.8081, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.25475665932305225, |
|
"grad_norm": 1.2104214429855347, |
|
"learning_rate": 5.9616437744867535e-05, |
|
"loss": 1.9068, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.2551572201081514, |
|
"grad_norm": 1.253570556640625, |
|
"learning_rate": 5.9326335692419995e-05, |
|
"loss": 1.9251, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.25555778089325054, |
|
"grad_norm": 1.436930775642395, |
|
"learning_rate": 5.9036643223320475e-05, |
|
"loss": 2.2092, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.2559583416783497, |
|
"grad_norm": 1.1123769283294678, |
|
"learning_rate": 5.8747363254768894e-05, |
|
"loss": 1.9229, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.25635890246344883, |
|
"grad_norm": 1.2202420234680176, |
|
"learning_rate": 5.845849869981137e-05, |
|
"loss": 1.8872, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.256759463248548, |
|
"grad_norm": 1.117271900177002, |
|
"learning_rate": 5.817005246731073e-05, |
|
"loss": 1.9297, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.2571600240336471, |
|
"grad_norm": 1.3900742530822754, |
|
"learning_rate": 5.788202746191734e-05, |
|
"loss": 1.7656, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.2575605848187462, |
|
"grad_norm": 1.078987956047058, |
|
"learning_rate": 5.759442658403985e-05, |
|
"loss": 1.7871, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.25796114560384537, |
|
"grad_norm": 1.2144380807876587, |
|
"learning_rate": 5.7307252729815833e-05, |
|
"loss": 1.9427, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.2583617063889445, |
|
"grad_norm": 1.1583763360977173, |
|
"learning_rate": 5.702050879108284e-05, |
|
"loss": 1.9859, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.25876226717404366, |
|
"grad_norm": 1.332306146621704, |
|
"learning_rate": 5.6734197655349156e-05, |
|
"loss": 1.9659, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.2591628279591428, |
|
"grad_norm": 1.2533334493637085, |
|
"learning_rate": 5.6448322205764794e-05, |
|
"loss": 1.9083, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.25956338874424195, |
|
"grad_norm": 1.2647238969802856, |
|
"learning_rate": 5.616288532109225e-05, |
|
"loss": 1.9254, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.2599639495293411, |
|
"grad_norm": 1.1436094045639038, |
|
"learning_rate": 5.5877889875677845e-05, |
|
"loss": 1.5689, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.2603645103144402, |
|
"grad_norm": 1.4330624341964722, |
|
"learning_rate": 5.559333873942259e-05, |
|
"loss": 1.6846, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.26076507109953934, |
|
"grad_norm": 1.3361932039260864, |
|
"learning_rate": 5.530923477775323e-05, |
|
"loss": 1.7006, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.2611656318846385, |
|
"grad_norm": 1.4416998624801636, |
|
"learning_rate": 5.5025580851593436e-05, |
|
"loss": 1.8457, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.26156619266973763, |
|
"grad_norm": 1.2435954809188843, |
|
"learning_rate": 5.474237981733521e-05, |
|
"loss": 1.9747, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.2619667534548368, |
|
"grad_norm": 1.2450804710388184, |
|
"learning_rate": 5.445963452680973e-05, |
|
"loss": 2.5783, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.2623673142399359, |
|
"grad_norm": 1.004050374031067, |
|
"learning_rate": 5.417734782725896e-05, |
|
"loss": 2.3425, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.26276787502503507, |
|
"grad_norm": 1.2414133548736572, |
|
"learning_rate": 5.38955225613069e-05, |
|
"loss": 1.9871, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.26316843581013416, |
|
"grad_norm": 1.286872148513794, |
|
"learning_rate": 5.361416156693075e-05, |
|
"loss": 1.7411, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.2635689965952333, |
|
"grad_norm": 1.1407065391540527, |
|
"learning_rate": 5.333326767743263e-05, |
|
"loss": 1.9724, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.26396955738033245, |
|
"grad_norm": 1.1503363847732544, |
|
"learning_rate": 5.305284372141095e-05, |
|
"loss": 2.1432, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.2643701181654316, |
|
"grad_norm": 1.2722294330596924, |
|
"learning_rate": 5.277289252273174e-05, |
|
"loss": 1.5037, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.26477067895053075, |
|
"grad_norm": 1.1083940267562866, |
|
"learning_rate": 5.249341690050051e-05, |
|
"loss": 2.3438, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.2651712397356299, |
|
"grad_norm": 1.5369712114334106, |
|
"learning_rate": 5.221441966903371e-05, |
|
"loss": 2.329, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.26557180052072904, |
|
"grad_norm": 1.0785249471664429, |
|
"learning_rate": 5.193590363783028e-05, |
|
"loss": 1.7842, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.2659723613058282, |
|
"grad_norm": 1.3311983346939087, |
|
"learning_rate": 5.1657871611543605e-05, |
|
"loss": 2.1037, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.2663729220909273, |
|
"grad_norm": 1.1631267070770264, |
|
"learning_rate": 5.138032638995315e-05, |
|
"loss": 1.7456, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.2667734828760264, |
|
"grad_norm": 1.1494488716125488, |
|
"learning_rate": 5.110327076793613e-05, |
|
"loss": 1.8558, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.26717404366112557, |
|
"grad_norm": 1.537941575050354, |
|
"learning_rate": 5.082670753543961e-05, |
|
"loss": 2.1186, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.2675746044462247, |
|
"grad_norm": 1.0077687501907349, |
|
"learning_rate": 5.055063947745233e-05, |
|
"loss": 1.9591, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.26797516523132386, |
|
"grad_norm": 1.5373225212097168, |
|
"learning_rate": 5.027506937397652e-05, |
|
"loss": 1.9692, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.268375726016423, |
|
"grad_norm": 1.1492283344268799, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 1.8725, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.26877628680152216, |
|
"grad_norm": 1.2667278051376343, |
|
"learning_rate": 4.972543412546842e-05, |
|
"loss": 1.73, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.26917684758662125, |
|
"grad_norm": 1.2694602012634277, |
|
"learning_rate": 4.945137451525707e-05, |
|
"loss": 1.8786, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.2695774083717204, |
|
"grad_norm": 1.0184561014175415, |
|
"learning_rate": 4.9177823929143106e-05, |
|
"loss": 1.8922, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.26997796915681954, |
|
"grad_norm": 1.1676651239395142, |
|
"learning_rate": 4.890478512177795e-05, |
|
"loss": 1.8485, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.2703785299419187, |
|
"grad_norm": 1.315293550491333, |
|
"learning_rate": 4.8632260842659393e-05, |
|
"loss": 2.0489, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.27077909072701783, |
|
"grad_norm": 1.356357455253601, |
|
"learning_rate": 4.836025383610382e-05, |
|
"loss": 1.8123, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.271179651512117, |
|
"grad_norm": 0.9495698809623718, |
|
"learning_rate": 4.808876684121881e-05, |
|
"loss": 2.4272, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.27158021229721613, |
|
"grad_norm": 1.4226418733596802, |
|
"learning_rate": 4.7817802591875426e-05, |
|
"loss": 1.7676, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.2719807730823152, |
|
"grad_norm": 1.260791301727295, |
|
"learning_rate": 4.754736381668057e-05, |
|
"loss": 1.8301, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.27238133386741437, |
|
"grad_norm": 1.0678155422210693, |
|
"learning_rate": 4.727745323894976e-05, |
|
"loss": 1.9342, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.2727818946525135, |
|
"grad_norm": 1.5142219066619873, |
|
"learning_rate": 4.700807357667952e-05, |
|
"loss": 1.8998, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.27318245543761266, |
|
"grad_norm": 1.2509609460830688, |
|
"learning_rate": 4.673922754252002e-05, |
|
"loss": 2.0971, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.2735830162227118, |
|
"grad_norm": 1.1951498985290527, |
|
"learning_rate": 4.647091784374785e-05, |
|
"loss": 2.3055, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.27398357700781095, |
|
"grad_norm": 0.9153628945350647, |
|
"learning_rate": 4.620314718223876e-05, |
|
"loss": 1.591, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.2743841377929101, |
|
"grad_norm": 1.1153326034545898, |
|
"learning_rate": 4.593591825444028e-05, |
|
"loss": 1.9545, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.2747846985780092, |
|
"grad_norm": 1.1780824661254883, |
|
"learning_rate": 4.566923375134472e-05, |
|
"loss": 1.9943, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.27518525936310834, |
|
"grad_norm": 1.0134261846542358, |
|
"learning_rate": 4.5403096358462095e-05, |
|
"loss": 2.4141, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.2755858201482075, |
|
"grad_norm": 1.2110122442245483, |
|
"learning_rate": 4.513750875579303e-05, |
|
"loss": 1.7741, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.27598638093330663, |
|
"grad_norm": 1.648859977722168, |
|
"learning_rate": 4.487247361780169e-05, |
|
"loss": 1.9172, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.2763869417184058, |
|
"grad_norm": 1.2855784893035889, |
|
"learning_rate": 4.4607993613388976e-05, |
|
"loss": 2.0266, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.2767875025035049, |
|
"grad_norm": 1.065869927406311, |
|
"learning_rate": 4.434407140586565e-05, |
|
"loss": 1.9958, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.27718806328860407, |
|
"grad_norm": 1.305423617362976, |
|
"learning_rate": 4.4080709652925336e-05, |
|
"loss": 1.5812, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.27758862407370316, |
|
"grad_norm": 1.1796246767044067, |
|
"learning_rate": 4.3817911006617986e-05, |
|
"loss": 1.9753, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.2779891848588023, |
|
"grad_norm": 1.5826653242111206, |
|
"learning_rate": 4.355567811332311e-05, |
|
"loss": 1.8844, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.27838974564390145, |
|
"grad_norm": 1.1710503101348877, |
|
"learning_rate": 4.329401361372294e-05, |
|
"loss": 1.8832, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.2787903064290006, |
|
"grad_norm": 1.4755582809448242, |
|
"learning_rate": 4.3032920142776125e-05, |
|
"loss": 1.7222, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.27919086721409975, |
|
"grad_norm": 1.193444013595581, |
|
"learning_rate": 4.277240032969105e-05, |
|
"loss": 1.9375, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.2795914279991989, |
|
"grad_norm": 1.4020665884017944, |
|
"learning_rate": 4.251245679789928e-05, |
|
"loss": 1.5222, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.27999198878429804, |
|
"grad_norm": 1.6775052547454834, |
|
"learning_rate": 4.225309216502933e-05, |
|
"loss": 2.3145, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.28039254956939713, |
|
"grad_norm": 1.5346165895462036, |
|
"learning_rate": 4.19943090428802e-05, |
|
"loss": 1.8626, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2807931103544963, |
|
"grad_norm": 1.1919195652008057, |
|
"learning_rate": 4.173611003739498e-05, |
|
"loss": 1.8406, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.2811936711395954, |
|
"grad_norm": 1.2349470853805542, |
|
"learning_rate": 4.147849774863488e-05, |
|
"loss": 1.5201, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.28159423192469457, |
|
"grad_norm": 1.1646391153335571, |
|
"learning_rate": 4.12214747707527e-05, |
|
"loss": 2.0479, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.2819947927097937, |
|
"grad_norm": 1.2893829345703125, |
|
"learning_rate": 4.096504369196704e-05, |
|
"loss": 1.9092, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.28239535349489286, |
|
"grad_norm": 0.9188634753227234, |
|
"learning_rate": 4.070920709453597e-05, |
|
"loss": 2.0369, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.282795914279992, |
|
"grad_norm": 1.1880841255187988, |
|
"learning_rate": 4.045396755473121e-05, |
|
"loss": 2.2927, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.2831964750650911, |
|
"grad_norm": 1.1953880786895752, |
|
"learning_rate": 4.019932764281211e-05, |
|
"loss": 1.9961, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.28359703585019025, |
|
"grad_norm": 1.1551967859268188, |
|
"learning_rate": 3.994528992299971e-05, |
|
"loss": 2.1265, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.2839975966352894, |
|
"grad_norm": 1.254237413406372, |
|
"learning_rate": 3.969185695345105e-05, |
|
"loss": 1.621, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.28439815742038854, |
|
"grad_norm": 1.0534310340881348, |
|
"learning_rate": 3.943903128623335e-05, |
|
"loss": 1.9361, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.2847987182054877, |
|
"grad_norm": 1.0625948905944824, |
|
"learning_rate": 3.918681546729822e-05, |
|
"loss": 1.5379, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.28519927899058684, |
|
"grad_norm": 1.3185557126998901, |
|
"learning_rate": 3.893521203645618e-05, |
|
"loss": 1.6286, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.285599839775686, |
|
"grad_norm": 1.3816808462142944, |
|
"learning_rate": 3.8684223527351025e-05, |
|
"loss": 1.7597, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.2860004005607851, |
|
"grad_norm": 1.2613497972488403, |
|
"learning_rate": 3.843385246743417e-05, |
|
"loss": 1.7095, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.2864009613458842, |
|
"grad_norm": 1.2452985048294067, |
|
"learning_rate": 3.8184101377939476e-05, |
|
"loss": 1.8584, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.28680152213098337, |
|
"grad_norm": 1.6921788454055786, |
|
"learning_rate": 3.7934972773857634e-05, |
|
"loss": 2.0267, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.2872020829160825, |
|
"grad_norm": 1.1087079048156738, |
|
"learning_rate": 3.7686469163910885e-05, |
|
"loss": 1.7955, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.28760264370118166, |
|
"grad_norm": 1.2214350700378418, |
|
"learning_rate": 3.7438593050527845e-05, |
|
"loss": 1.9203, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.2880032044862808, |
|
"grad_norm": 1.0154850482940674, |
|
"learning_rate": 3.719134692981826e-05, |
|
"loss": 2.2155, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.28840376527137995, |
|
"grad_norm": 1.018591046333313, |
|
"learning_rate": 3.694473329154778e-05, |
|
"loss": 1.7213, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.28880432605647904, |
|
"grad_norm": 1.3071078062057495, |
|
"learning_rate": 3.669875461911297e-05, |
|
"loss": 2.0372, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.2892048868415782, |
|
"grad_norm": 1.1150556802749634, |
|
"learning_rate": 3.645341338951639e-05, |
|
"loss": 2.0046, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.28960544762667734, |
|
"grad_norm": 1.0924551486968994, |
|
"learning_rate": 3.62087120733415e-05, |
|
"loss": 1.8254, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.2900060084117765, |
|
"grad_norm": 1.1697030067443848, |
|
"learning_rate": 3.5964653134727776e-05, |
|
"loss": 1.9609, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.29040656919687563, |
|
"grad_norm": 0.9879043698310852, |
|
"learning_rate": 3.5721239031346066e-05, |
|
"loss": 1.9276, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.2908071299819748, |
|
"grad_norm": 1.258763313293457, |
|
"learning_rate": 3.547847221437372e-05, |
|
"loss": 1.6585, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.2912076907670739, |
|
"grad_norm": 1.2708990573883057, |
|
"learning_rate": 3.523635512846981e-05, |
|
"loss": 1.9371, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.291608251552173, |
|
"grad_norm": 0.940768301486969, |
|
"learning_rate": 3.4994890211750754e-05, |
|
"loss": 2.1374, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.29200881233727216, |
|
"grad_norm": 1.2373859882354736, |
|
"learning_rate": 3.47540798957656e-05, |
|
"loss": 2.1544, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.2924093731223713, |
|
"grad_norm": 1.1635286808013916, |
|
"learning_rate": 3.45139266054715e-05, |
|
"loss": 1.668, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.29280993390747045, |
|
"grad_norm": 1.1330822706222534, |
|
"learning_rate": 3.4274432759209453e-05, |
|
"loss": 1.9928, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.2932104946925696, |
|
"grad_norm": 1.0662771463394165, |
|
"learning_rate": 3.4035600768679855e-05, |
|
"loss": 1.4355, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.29361105547766875, |
|
"grad_norm": 1.1696702241897583, |
|
"learning_rate": 3.379743303891815e-05, |
|
"loss": 1.8776, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.2940116162627679, |
|
"grad_norm": 1.2918500900268555, |
|
"learning_rate": 3.3559931968270753e-05, |
|
"loss": 2.0556, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.29441217704786704, |
|
"grad_norm": 1.015869379043579, |
|
"learning_rate": 3.332309994837085e-05, |
|
"loss": 1.7284, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.29481273783296613, |
|
"grad_norm": 1.0896291732788086, |
|
"learning_rate": 3.308693936411421e-05, |
|
"loss": 2.1261, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.2952132986180653, |
|
"grad_norm": 1.082911729812622, |
|
"learning_rate": 3.2851452593635266e-05, |
|
"loss": 2.0187, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.2956138594031644, |
|
"grad_norm": 1.2486083507537842, |
|
"learning_rate": 3.2616642008283213e-05, |
|
"loss": 1.7642, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.29601442018826357, |
|
"grad_norm": 1.1814159154891968, |
|
"learning_rate": 3.238250997259808e-05, |
|
"loss": 2.1014, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.2964149809733627, |
|
"grad_norm": 1.3076002597808838, |
|
"learning_rate": 3.21490588442868e-05, |
|
"loss": 1.7214, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.29681554175846186, |
|
"grad_norm": 1.1683399677276611, |
|
"learning_rate": 3.191629097419966e-05, |
|
"loss": 1.9912, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.297216102543561, |
|
"grad_norm": 1.4244028329849243, |
|
"learning_rate": 3.1684208706306574e-05, |
|
"loss": 2.1868, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.2976166633286601, |
|
"grad_norm": 1.08429753780365, |
|
"learning_rate": 3.1452814377673346e-05, |
|
"loss": 1.876, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.29801722411375925, |
|
"grad_norm": 1.1976194381713867, |
|
"learning_rate": 3.1222110318438304e-05, |
|
"loss": 1.9932, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.2984177848988584, |
|
"grad_norm": 0.9979033470153809, |
|
"learning_rate": 3.099209885178882e-05, |
|
"loss": 1.9214, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.29881834568395754, |
|
"grad_norm": 1.1685912609100342, |
|
"learning_rate": 3.076278229393773e-05, |
|
"loss": 2.2045, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.2992189064690567, |
|
"grad_norm": 1.0432329177856445, |
|
"learning_rate": 3.053416295410026e-05, |
|
"loss": 1.9744, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.29961946725415584, |
|
"grad_norm": 1.3380476236343384, |
|
"learning_rate": 3.030624313447067e-05, |
|
"loss": 2.2597, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.300020028039255, |
|
"grad_norm": 1.0045922994613647, |
|
"learning_rate": 3.0079025130198935e-05, |
|
"loss": 1.844, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.3004205888243541, |
|
"grad_norm": 1.2213352918624878, |
|
"learning_rate": 2.9852511229367865e-05, |
|
"loss": 1.7077, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3004205888243541, |
|
"eval_loss": 1.9114018678665161, |
|
"eval_runtime": 32.8226, |
|
"eval_samples_per_second": 32.051, |
|
"eval_steps_per_second": 16.026, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3008211496094532, |
|
"grad_norm": 1.2960726022720337, |
|
"learning_rate": 2.962670371296996e-05, |
|
"loss": 2.397, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.30122171039455237, |
|
"grad_norm": 1.0980877876281738, |
|
"learning_rate": 2.9401604854884357e-05, |
|
"loss": 2.2021, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.3016222711796515, |
|
"grad_norm": 1.016575813293457, |
|
"learning_rate": 2.91772169218541e-05, |
|
"loss": 1.7718, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.30202283196475066, |
|
"grad_norm": 1.2587579488754272, |
|
"learning_rate": 2.8953542173463133e-05, |
|
"loss": 1.8161, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.3024233927498498, |
|
"grad_norm": 1.3101757764816284, |
|
"learning_rate": 2.8730582862113742e-05, |
|
"loss": 1.6577, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.30282395353494895, |
|
"grad_norm": 1.2363008260726929, |
|
"learning_rate": 2.8508341233003654e-05, |
|
"loss": 2.0945, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.30322451432004804, |
|
"grad_norm": 1.2293903827667236, |
|
"learning_rate": 2.828681952410366e-05, |
|
"loss": 1.8652, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.3036250751051472, |
|
"grad_norm": 1.2226698398590088, |
|
"learning_rate": 2.8066019966134904e-05, |
|
"loss": 2.0198, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.30402563589024634, |
|
"grad_norm": 1.4481924772262573, |
|
"learning_rate": 2.7845944782546453e-05, |
|
"loss": 1.861, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.3044261966753455, |
|
"grad_norm": 1.2004791498184204, |
|
"learning_rate": 2.7626596189492983e-05, |
|
"loss": 2.192, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.30482675746044463, |
|
"grad_norm": 1.1224644184112549, |
|
"learning_rate": 2.7407976395812418e-05, |
|
"loss": 2.108, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.3052273182455438, |
|
"grad_norm": 1.3192898035049438, |
|
"learning_rate": 2.719008760300359e-05, |
|
"loss": 1.7614, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.3056278790306429, |
|
"grad_norm": 1.3838907480239868, |
|
"learning_rate": 2.6972932005204267e-05, |
|
"loss": 1.9876, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.306028439815742, |
|
"grad_norm": 1.4712343215942383, |
|
"learning_rate": 2.6756511789168925e-05, |
|
"loss": 2.0119, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.30642900060084116, |
|
"grad_norm": 1.0120660066604614, |
|
"learning_rate": 2.654082913424668e-05, |
|
"loss": 2.0213, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.3068295613859403, |
|
"grad_norm": 1.2688238620758057, |
|
"learning_rate": 2.6325886212359498e-05, |
|
"loss": 1.6258, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.30723012217103945, |
|
"grad_norm": 1.1377239227294922, |
|
"learning_rate": 2.6111685187980262e-05, |
|
"loss": 1.7486, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.3076306829561386, |
|
"grad_norm": 1.1769511699676514, |
|
"learning_rate": 2.589822821811083e-05, |
|
"loss": 1.6603, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.30803124374123775, |
|
"grad_norm": 0.9931359887123108, |
|
"learning_rate": 2.5685517452260567e-05, |
|
"loss": 2.4301, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.3084318045263369, |
|
"grad_norm": 1.1136318445205688, |
|
"learning_rate": 2.5473555032424533e-05, |
|
"loss": 1.8987, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.308832365311436, |
|
"grad_norm": 0.8127551674842834, |
|
"learning_rate": 2.5262343093061936e-05, |
|
"loss": 1.9156, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.30923292609653513, |
|
"grad_norm": 1.2461220026016235, |
|
"learning_rate": 2.5051883761074614e-05, |
|
"loss": 1.9488, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.3096334868816343, |
|
"grad_norm": 1.610859990119934, |
|
"learning_rate": 2.4842179155785737e-05, |
|
"loss": 2.0289, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.3100340476667334, |
|
"grad_norm": 1.442642092704773, |
|
"learning_rate": 2.4633231388918378e-05, |
|
"loss": 1.9204, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.31043460845183257, |
|
"grad_norm": 0.9093045592308044, |
|
"learning_rate": 2.4425042564574184e-05, |
|
"loss": 1.9993, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.3108351692369317, |
|
"grad_norm": 0.9235540628433228, |
|
"learning_rate": 2.4217614779212315e-05, |
|
"loss": 1.7345, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.31123573002203087, |
|
"grad_norm": 1.202626347541809, |
|
"learning_rate": 2.4010950121628318e-05, |
|
"loss": 1.8141, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.31163629080712996, |
|
"grad_norm": 0.9984288811683655, |
|
"learning_rate": 2.3805050672932928e-05, |
|
"loss": 1.8233, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.3120368515922291, |
|
"grad_norm": 1.394755482673645, |
|
"learning_rate": 2.3599918506531337e-05, |
|
"loss": 1.8879, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.31243741237732825, |
|
"grad_norm": 1.5760648250579834, |
|
"learning_rate": 2.339555568810221e-05, |
|
"loss": 1.9124, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.3128379731624274, |
|
"grad_norm": 0.9450803995132446, |
|
"learning_rate": 2.3191964275576805e-05, |
|
"loss": 2.068, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.31323853394752654, |
|
"grad_norm": 1.023253083229065, |
|
"learning_rate": 2.2989146319118425e-05, |
|
"loss": 2.166, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.3136390947326257, |
|
"grad_norm": 1.1726493835449219, |
|
"learning_rate": 2.2787103861101655e-05, |
|
"loss": 1.9661, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.31403965551772484, |
|
"grad_norm": 1.1509053707122803, |
|
"learning_rate": 2.2585838936091754e-05, |
|
"loss": 2.0738, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.3144402163028239, |
|
"grad_norm": 1.0587100982666016, |
|
"learning_rate": 2.2385353570824308e-05, |
|
"loss": 1.8896, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.3148407770879231, |
|
"grad_norm": 1.2842707633972168, |
|
"learning_rate": 2.2185649784184746e-05, |
|
"loss": 1.9896, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.3152413378730222, |
|
"grad_norm": 1.2250515222549438, |
|
"learning_rate": 2.198672958718796e-05, |
|
"loss": 2.0292, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.31564189865812137, |
|
"grad_norm": 0.9199315309524536, |
|
"learning_rate": 2.178859498295809e-05, |
|
"loss": 2.0993, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.3160424594432205, |
|
"grad_norm": 1.379782795906067, |
|
"learning_rate": 2.159124796670843e-05, |
|
"loss": 1.9658, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.31644302022831966, |
|
"grad_norm": 1.297568440437317, |
|
"learning_rate": 2.139469052572127e-05, |
|
"loss": 1.8385, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.3168435810134188, |
|
"grad_norm": 1.2404309511184692, |
|
"learning_rate": 2.119892463932781e-05, |
|
"loss": 2.0786, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.3172441417985179, |
|
"grad_norm": 0.8843573331832886, |
|
"learning_rate": 2.1003952278888382e-05, |
|
"loss": 2.226, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.31764470258361704, |
|
"grad_norm": 1.3021501302719116, |
|
"learning_rate": 2.0809775407772503e-05, |
|
"loss": 2.0633, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.3180452633687162, |
|
"grad_norm": 1.5391861200332642, |
|
"learning_rate": 2.0616395981339075e-05, |
|
"loss": 2.0367, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.31844582415381534, |
|
"grad_norm": 1.118991732597351, |
|
"learning_rate": 2.042381594691678e-05, |
|
"loss": 1.94, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.3188463849389145, |
|
"grad_norm": 1.1585220098495483, |
|
"learning_rate": 2.0232037243784475e-05, |
|
"loss": 1.8611, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.31924694572401363, |
|
"grad_norm": 1.1401135921478271, |
|
"learning_rate": 2.0041061803151508e-05, |
|
"loss": 1.8363, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.3196475065091128, |
|
"grad_norm": 1.2198765277862549, |
|
"learning_rate": 1.985089154813846e-05, |
|
"loss": 2.0256, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.3200480672942119, |
|
"grad_norm": 1.0418367385864258, |
|
"learning_rate": 1.9661528393757744e-05, |
|
"loss": 1.9346, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.320448628079311, |
|
"grad_norm": 1.1914174556732178, |
|
"learning_rate": 1.947297424689414e-05, |
|
"loss": 2.2865, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.32084918886441016, |
|
"grad_norm": 1.3711671829223633, |
|
"learning_rate": 1.9285231006285853e-05, |
|
"loss": 2.423, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.3212497496495093, |
|
"grad_norm": 1.17362380027771, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 1.5378, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.32165031043460846, |
|
"grad_norm": 0.9338995814323425, |
|
"learning_rate": 1.8912184797939803e-05, |
|
"loss": 2.2084, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.3220508712197076, |
|
"grad_norm": 1.059409499168396, |
|
"learning_rate": 1.8726885586773212e-05, |
|
"loss": 2.3115, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.32245143200480675, |
|
"grad_norm": 1.0587588548660278, |
|
"learning_rate": 1.854240479496643e-05, |
|
"loss": 1.6772, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.3228519927899059, |
|
"grad_norm": 1.5739694833755493, |
|
"learning_rate": 1.835874428023905e-05, |
|
"loss": 1.8971, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.323252553575005, |
|
"grad_norm": 1.096549391746521, |
|
"learning_rate": 1.817590589205035e-05, |
|
"loss": 1.9728, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.32365311436010413, |
|
"grad_norm": 1.1896045207977295, |
|
"learning_rate": 1.7993891471580893e-05, |
|
"loss": 1.4642, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.3240536751452033, |
|
"grad_norm": 1.2677874565124512, |
|
"learning_rate": 1.7812702851713904e-05, |
|
"loss": 1.9893, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.3244542359303024, |
|
"grad_norm": 1.2322319746017456, |
|
"learning_rate": 1.763234185701673e-05, |
|
"loss": 1.6648, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.3248547967154016, |
|
"grad_norm": 1.4150607585906982, |
|
"learning_rate": 1.74528103037226e-05, |
|
"loss": 1.8747, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.3252553575005007, |
|
"grad_norm": 1.1502705812454224, |
|
"learning_rate": 1.7274109999712295e-05, |
|
"loss": 1.8867, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.32565591828559987, |
|
"grad_norm": 1.0201531648635864, |
|
"learning_rate": 1.7096242744495837e-05, |
|
"loss": 1.9702, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.32605647907069896, |
|
"grad_norm": 1.109731912612915, |
|
"learning_rate": 1.6919210329194533e-05, |
|
"loss": 1.852, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.3264570398557981, |
|
"grad_norm": 1.1922898292541504, |
|
"learning_rate": 1.6743014536522873e-05, |
|
"loss": 1.7001, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.32685760064089725, |
|
"grad_norm": 0.8632221221923828, |
|
"learning_rate": 1.6567657140770475e-05, |
|
"loss": 1.7701, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.3272581614259964, |
|
"grad_norm": 1.106311321258545, |
|
"learning_rate": 1.6393139907784404e-05, |
|
"loss": 2.1824, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.32765872221109554, |
|
"grad_norm": 1.2513147592544556, |
|
"learning_rate": 1.621946459495127e-05, |
|
"loss": 2.1743, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.3280592829961947, |
|
"grad_norm": 1.183262825012207, |
|
"learning_rate": 1.6046632951179508e-05, |
|
"loss": 1.7933, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.32845984378129384, |
|
"grad_norm": 1.4637755155563354, |
|
"learning_rate": 1.587464671688187e-05, |
|
"loss": 1.4244, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.3288604045663929, |
|
"grad_norm": 1.0762394666671753, |
|
"learning_rate": 1.5703507623957848e-05, |
|
"loss": 1.9548, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.3292609653514921, |
|
"grad_norm": 1.5148048400878906, |
|
"learning_rate": 1.553321739577619e-05, |
|
"loss": 1.8027, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.3296615261365912, |
|
"grad_norm": 1.4003595113754272, |
|
"learning_rate": 1.5363777747157572e-05, |
|
"loss": 1.6786, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.33006208692169037, |
|
"grad_norm": 1.1532552242279053, |
|
"learning_rate": 1.5195190384357404e-05, |
|
"loss": 2.0791, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.3304626477067895, |
|
"grad_norm": 1.2315119504928589, |
|
"learning_rate": 1.5027457005048573e-05, |
|
"loss": 1.8975, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.33086320849188866, |
|
"grad_norm": 1.5469486713409424, |
|
"learning_rate": 1.4860579298304312e-05, |
|
"loss": 1.9729, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.3312637692769878, |
|
"grad_norm": 1.1972731351852417, |
|
"learning_rate": 1.4694558944581293e-05, |
|
"loss": 1.7436, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.3316643300620869, |
|
"grad_norm": 1.2163808345794678, |
|
"learning_rate": 1.4529397615702656e-05, |
|
"loss": 1.9608, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.33206489084718604, |
|
"grad_norm": 1.1661227941513062, |
|
"learning_rate": 1.4365096974841108e-05, |
|
"loss": 1.9195, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.3324654516322852, |
|
"grad_norm": 1.3404620885849, |
|
"learning_rate": 1.4201658676502294e-05, |
|
"loss": 1.9545, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.33286601241738434, |
|
"grad_norm": 1.3196473121643066, |
|
"learning_rate": 1.4039084366508092e-05, |
|
"loss": 1.789, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.3332665732024835, |
|
"grad_norm": 1.4525930881500244, |
|
"learning_rate": 1.3877375681979943e-05, |
|
"loss": 1.9036, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.33366713398758263, |
|
"grad_norm": 0.9184648990631104, |
|
"learning_rate": 1.3716534251322544e-05, |
|
"loss": 1.9158, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.3340676947726818, |
|
"grad_norm": 1.1989598274230957, |
|
"learning_rate": 1.3556561694207338e-05, |
|
"loss": 1.6822, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.33446825555778087, |
|
"grad_norm": 0.9898660182952881, |
|
"learning_rate": 1.339745962155613e-05, |
|
"loss": 2.1256, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.33486881634288, |
|
"grad_norm": 1.368600606918335, |
|
"learning_rate": 1.3239229635525074e-05, |
|
"loss": 1.592, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.33526937712797916, |
|
"grad_norm": 1.3661975860595703, |
|
"learning_rate": 1.3081873329488392e-05, |
|
"loss": 1.865, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.3356699379130783, |
|
"grad_norm": 0.9782090187072754, |
|
"learning_rate": 1.2925392288022298e-05, |
|
"loss": 1.8389, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.33607049869817746, |
|
"grad_norm": 1.5394399166107178, |
|
"learning_rate": 1.2769788086889134e-05, |
|
"loss": 2.0711, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.3364710594832766, |
|
"grad_norm": 1.2607556581497192, |
|
"learning_rate": 1.2615062293021507e-05, |
|
"loss": 2.0338, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.33687162026837575, |
|
"grad_norm": 1.4436510801315308, |
|
"learning_rate": 1.2461216464506454e-05, |
|
"loss": 2.074, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.33727218105347484, |
|
"grad_norm": 1.4884815216064453, |
|
"learning_rate": 1.230825215056971e-05, |
|
"loss": 2.0801, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.337672741838574, |
|
"grad_norm": 0.985197126865387, |
|
"learning_rate": 1.2156170891560258e-05, |
|
"loss": 2.1941, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.33807330262367313, |
|
"grad_norm": 1.5094271898269653, |
|
"learning_rate": 1.2004974218934695e-05, |
|
"loss": 2.1544, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.3384738634087723, |
|
"grad_norm": 1.4975275993347168, |
|
"learning_rate": 1.1854663655241805e-05, |
|
"loss": 2.3323, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.3388744241938714, |
|
"grad_norm": 1.178804636001587, |
|
"learning_rate": 1.1705240714107302e-05, |
|
"loss": 2.0121, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.3392749849789706, |
|
"grad_norm": 1.1911643743515015, |
|
"learning_rate": 1.1556706900218572e-05, |
|
"loss": 2.2518, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.3396755457640697, |
|
"grad_norm": 1.2257444858551025, |
|
"learning_rate": 1.1409063709309442e-05, |
|
"loss": 1.9825, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.3400761065491688, |
|
"grad_norm": 1.2943917512893677, |
|
"learning_rate": 1.126231262814521e-05, |
|
"loss": 1.9012, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.34047666733426796, |
|
"grad_norm": 1.4267241954803467, |
|
"learning_rate": 1.1116455134507664e-05, |
|
"loss": 1.9565, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.3408772281193671, |
|
"grad_norm": 1.4165103435516357, |
|
"learning_rate": 1.0971492697180096e-05, |
|
"loss": 1.9873, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.34127778890446625, |
|
"grad_norm": 0.963735044002533, |
|
"learning_rate": 1.0827426775932658e-05, |
|
"loss": 1.9228, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.3416783496895654, |
|
"grad_norm": 1.302394986152649, |
|
"learning_rate": 1.068425882150762e-05, |
|
"loss": 1.8196, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.34207891047466454, |
|
"grad_norm": 1.6307578086853027, |
|
"learning_rate": 1.054199027560463e-05, |
|
"loss": 2.2207, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.3424794712597637, |
|
"grad_norm": 1.1663156747817993, |
|
"learning_rate": 1.0400622570866425e-05, |
|
"loss": 1.3852, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.3428800320448628, |
|
"grad_norm": 1.391180157661438, |
|
"learning_rate": 1.026015713086418e-05, |
|
"loss": 2.0258, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.34328059282996193, |
|
"grad_norm": 0.9974930882453918, |
|
"learning_rate": 1.0120595370083318e-05, |
|
"loss": 2.1458, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.3436811536150611, |
|
"grad_norm": 1.4504176378250122, |
|
"learning_rate": 9.98193869390922e-06, |
|
"loss": 2.5077, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.3440817144001602, |
|
"grad_norm": 1.6607308387756348, |
|
"learning_rate": 9.844188498613116e-06, |
|
"loss": 1.9936, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.34448227518525937, |
|
"grad_norm": 1.0695178508758545, |
|
"learning_rate": 9.707346171337894e-06, |
|
"loss": 1.5378, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3448828359703585, |
|
"grad_norm": 1.1689550876617432, |
|
"learning_rate": 9.57141309008428e-06, |
|
"loss": 2.1762, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.34528339675545766, |
|
"grad_norm": 1.247942566871643, |
|
"learning_rate": 9.436390623696911e-06, |
|
"loss": 2.2111, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.34568395754055675, |
|
"grad_norm": 1.3530837297439575, |
|
"learning_rate": 9.302280131850539e-06, |
|
"loss": 2.2161, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.3460845183256559, |
|
"grad_norm": 0.9715630412101746, |
|
"learning_rate": 9.16908296503628e-06, |
|
"loss": 1.8675, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.34648507911075505, |
|
"grad_norm": 1.2928553819656372, |
|
"learning_rate": 9.036800464548157e-06, |
|
"loss": 1.98, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.3468856398958542, |
|
"grad_norm": 1.3303308486938477, |
|
"learning_rate": 8.905433962469489e-06, |
|
"loss": 2.0134, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.34728620068095334, |
|
"grad_norm": 1.1572000980377197, |
|
"learning_rate": 8.774984781659467e-06, |
|
"loss": 1.9953, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.3476867614660525, |
|
"grad_norm": 1.1844559907913208, |
|
"learning_rate": 8.645454235739903e-06, |
|
"loss": 2.4239, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.34808732225115163, |
|
"grad_norm": 0.9763182401657104, |
|
"learning_rate": 8.516843629081984e-06, |
|
"loss": 2.2392, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.3484878830362508, |
|
"grad_norm": 1.408148169517517, |
|
"learning_rate": 8.38915425679304e-06, |
|
"loss": 1.792, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.34888844382134987, |
|
"grad_norm": 1.2217282056808472, |
|
"learning_rate": 8.262387404703653e-06, |
|
"loss": 1.5025, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.349289004606449, |
|
"grad_norm": 1.0182693004608154, |
|
"learning_rate": 8.13654434935467e-06, |
|
"loss": 1.684, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.34968956539154816, |
|
"grad_norm": 1.0316119194030762, |
|
"learning_rate": 8.011626357984181e-06, |
|
"loss": 1.9877, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.3500901261766473, |
|
"grad_norm": 1.3248041868209839, |
|
"learning_rate": 7.887634688515e-06, |
|
"loss": 2.0565, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.35049068696174646, |
|
"grad_norm": 1.2190947532653809, |
|
"learning_rate": 7.764570589541875e-06, |
|
"loss": 1.9459, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.3508912477468456, |
|
"grad_norm": 1.182137131690979, |
|
"learning_rate": 7.642435300318907e-06, |
|
"loss": 2.0345, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.35129180853194475, |
|
"grad_norm": 1.1659443378448486, |
|
"learning_rate": 7.521230050747086e-06, |
|
"loss": 2.1023, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.35169236931704384, |
|
"grad_norm": 1.0156196355819702, |
|
"learning_rate": 7.400956061361974e-06, |
|
"loss": 1.8653, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.352092930102143, |
|
"grad_norm": 1.0992286205291748, |
|
"learning_rate": 7.281614543321269e-06, |
|
"loss": 1.8927, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.35249349088724213, |
|
"grad_norm": 1.1435526609420776, |
|
"learning_rate": 7.163206698392744e-06, |
|
"loss": 2.099, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.3528940516723413, |
|
"grad_norm": 1.0202122926712036, |
|
"learning_rate": 7.045733718942094e-06, |
|
"loss": 1.8585, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.3532946124574404, |
|
"grad_norm": 1.4951303005218506, |
|
"learning_rate": 6.929196787920899e-06, |
|
"loss": 1.695, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.3536951732425396, |
|
"grad_norm": 1.1155850887298584, |
|
"learning_rate": 6.813597078854772e-06, |
|
"loss": 1.8532, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.3540957340276387, |
|
"grad_norm": 1.260799527168274, |
|
"learning_rate": 6.698935755831492e-06, |
|
"loss": 1.9453, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.3544962948127378, |
|
"grad_norm": 1.116297960281372, |
|
"learning_rate": 6.585213973489335e-06, |
|
"loss": 2.0739, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.35489685559783696, |
|
"grad_norm": 1.355909824371338, |
|
"learning_rate": 6.472432877005341e-06, |
|
"loss": 2.0201, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.3552974163829361, |
|
"grad_norm": 1.2766674757003784, |
|
"learning_rate": 6.360593602083942e-06, |
|
"loss": 1.8345, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.35569797716803525, |
|
"grad_norm": 1.0762203931808472, |
|
"learning_rate": 6.2496972749453766e-06, |
|
"loss": 1.8632, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.3560985379531344, |
|
"grad_norm": 1.4779655933380127, |
|
"learning_rate": 6.139745012314424e-06, |
|
"loss": 1.8136, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.35649909873823354, |
|
"grad_norm": 1.2446835041046143, |
|
"learning_rate": 6.030737921409169e-06, |
|
"loss": 2.1419, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.3568996595233327, |
|
"grad_norm": 1.3997869491577148, |
|
"learning_rate": 5.922677099929786e-06, |
|
"loss": 1.5943, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.3573002203084318, |
|
"grad_norm": 1.0760163068771362, |
|
"learning_rate": 5.8155636360475385e-06, |
|
"loss": 1.7411, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.35770078109353093, |
|
"grad_norm": 1.466942548751831, |
|
"learning_rate": 5.709398608393835e-06, |
|
"loss": 1.5269, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.3581013418786301, |
|
"grad_norm": 1.3610737323760986, |
|
"learning_rate": 5.604183086049342e-06, |
|
"loss": 2.1299, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.3585019026637292, |
|
"grad_norm": 1.5304806232452393, |
|
"learning_rate": 5.499918128533155e-06, |
|
"loss": 2.0253, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.35890246344882837, |
|
"grad_norm": 0.9878894090652466, |
|
"learning_rate": 5.396604785792281e-06, |
|
"loss": 1.9011, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.3593030242339275, |
|
"grad_norm": 1.012338638305664, |
|
"learning_rate": 5.294244098190926e-06, |
|
"loss": 1.8002, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.35970358501902666, |
|
"grad_norm": 1.3276349306106567, |
|
"learning_rate": 5.192837096500058e-06, |
|
"loss": 2.0386, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.36010414580412575, |
|
"grad_norm": 1.223771572113037, |
|
"learning_rate": 5.092384801887074e-06, |
|
"loss": 2.1836, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.3605047065892249, |
|
"grad_norm": 0.9753492474555969, |
|
"learning_rate": 4.992888225905468e-06, |
|
"loss": 1.6111, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.36090526737432405, |
|
"grad_norm": 0.7910905480384827, |
|
"learning_rate": 4.8943483704846475e-06, |
|
"loss": 1.903, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.3613058281594232, |
|
"grad_norm": 1.2467719316482544, |
|
"learning_rate": 4.796766227919857e-06, |
|
"loss": 1.9438, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.36170638894452234, |
|
"grad_norm": 1.1500917673110962, |
|
"learning_rate": 4.700142780862205e-06, |
|
"loss": 1.9579, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.3621069497296215, |
|
"grad_norm": 1.1933996677398682, |
|
"learning_rate": 4.604479002308737e-06, |
|
"loss": 2.3006, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.36250751051472063, |
|
"grad_norm": 1.1259969472885132, |
|
"learning_rate": 4.509775855592613e-06, |
|
"loss": 1.998, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.3629080712998197, |
|
"grad_norm": 1.130823016166687, |
|
"learning_rate": 4.416034294373472e-06, |
|
"loss": 1.8769, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.36330863208491887, |
|
"grad_norm": 1.02981698513031, |
|
"learning_rate": 4.323255262627846e-06, |
|
"loss": 1.9655, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.363709192870018, |
|
"grad_norm": 0.9289757609367371, |
|
"learning_rate": 4.231439694639483e-06, |
|
"loss": 2.0099, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.36410975365511716, |
|
"grad_norm": 1.203212857246399, |
|
"learning_rate": 4.140588514990162e-06, |
|
"loss": 1.9931, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.3645103144402163, |
|
"grad_norm": 1.1724556684494019, |
|
"learning_rate": 4.050702638550275e-06, |
|
"loss": 1.662, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.36491087522531546, |
|
"grad_norm": 1.212730050086975, |
|
"learning_rate": 3.961782970469563e-06, |
|
"loss": 2.0693, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.3653114360104146, |
|
"grad_norm": 0.8744038939476013, |
|
"learning_rate": 3.873830406168111e-06, |
|
"loss": 1.9265, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.3657119967955137, |
|
"grad_norm": 1.2729175090789795, |
|
"learning_rate": 3.7868458313272904e-06, |
|
"loss": 2.1908, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.36611255758061284, |
|
"grad_norm": 1.3766783475875854, |
|
"learning_rate": 3.7008301218807716e-06, |
|
"loss": 1.9221, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.366513118365712, |
|
"grad_norm": 1.1976795196533203, |
|
"learning_rate": 3.615784144005796e-06, |
|
"loss": 2.0681, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.36691367915081113, |
|
"grad_norm": 1.2433587312698364, |
|
"learning_rate": 3.5317087541144377e-06, |
|
"loss": 1.7831, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.3673142399359103, |
|
"grad_norm": 1.328249216079712, |
|
"learning_rate": 3.448604798844912e-06, |
|
"loss": 1.9766, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.3677148007210094, |
|
"grad_norm": 0.9622407555580139, |
|
"learning_rate": 3.3664731150531482e-06, |
|
"loss": 1.8986, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.3681153615061086, |
|
"grad_norm": 1.2278088331222534, |
|
"learning_rate": 3.2853145298042953e-06, |
|
"loss": 1.9074, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.36851592229120766, |
|
"grad_norm": 1.2879400253295898, |
|
"learning_rate": 3.2051298603643753e-06, |
|
"loss": 2.0471, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.3689164830763068, |
|
"grad_norm": 1.2228416204452515, |
|
"learning_rate": 3.1259199141921435e-06, |
|
"loss": 1.8044, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.36931704386140596, |
|
"grad_norm": 1.045020341873169, |
|
"learning_rate": 3.047685488930874e-06, |
|
"loss": 1.8997, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.3697176046465051, |
|
"grad_norm": 0.9093934893608093, |
|
"learning_rate": 2.970427372400353e-06, |
|
"loss": 1.651, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.37011816543160425, |
|
"grad_norm": 1.3145874738693237, |
|
"learning_rate": 2.894146342588977e-06, |
|
"loss": 2.1705, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.3705187262167034, |
|
"grad_norm": 1.227515697479248, |
|
"learning_rate": 2.818843167645835e-06, |
|
"loss": 2.4005, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.37091928700180254, |
|
"grad_norm": 0.9281034469604492, |
|
"learning_rate": 2.744518605873092e-06, |
|
"loss": 2.142, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.37131984778690164, |
|
"grad_norm": 1.3893688917160034, |
|
"learning_rate": 2.6711734057182415e-06, |
|
"loss": 1.9674, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.3717204085720008, |
|
"grad_norm": 1.1843217611312866, |
|
"learning_rate": 2.5988083057666533e-06, |
|
"loss": 2.0467, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.37212096935709993, |
|
"grad_norm": 1.1903239488601685, |
|
"learning_rate": 2.5274240347340717e-06, |
|
"loss": 1.9037, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.3725215301421991, |
|
"grad_norm": 1.1729291677474976, |
|
"learning_rate": 2.4570213114592954e-06, |
|
"loss": 1.9407, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.3729220909272982, |
|
"grad_norm": 1.4715547561645508, |
|
"learning_rate": 2.3876008448969976e-06, |
|
"loss": 2.1013, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.37332265171239737, |
|
"grad_norm": 1.2586127519607544, |
|
"learning_rate": 2.3191633341104856e-06, |
|
"loss": 1.8211, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.3737232124974965, |
|
"grad_norm": 1.3188387155532837, |
|
"learning_rate": 2.2517094682647397e-06, |
|
"loss": 1.7527, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.3741237732825956, |
|
"grad_norm": 1.0343830585479736, |
|
"learning_rate": 2.1852399266194314e-06, |
|
"loss": 1.4433, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.37452433406769475, |
|
"grad_norm": 1.2205039262771606, |
|
"learning_rate": 2.119755378522137e-06, |
|
"loss": 1.6247, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.3749248948527939, |
|
"grad_norm": 1.367773175239563, |
|
"learning_rate": 2.05525648340148e-06, |
|
"loss": 1.9733, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.37532545563789305, |
|
"grad_norm": 1.1995794773101807, |
|
"learning_rate": 1.9917438907606556e-06, |
|
"loss": 2.2017, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.3757260164229922, |
|
"grad_norm": 1.0902953147888184, |
|
"learning_rate": 1.9292182401707603e-06, |
|
"loss": 1.6807, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.37612657720809134, |
|
"grad_norm": 1.0587186813354492, |
|
"learning_rate": 1.8676801612643957e-06, |
|
"loss": 1.7358, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.3765271379931905, |
|
"grad_norm": 1.0900659561157227, |
|
"learning_rate": 1.8071302737293295e-06, |
|
"loss": 1.9882, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.37692769877828963, |
|
"grad_norm": 1.2947819232940674, |
|
"learning_rate": 1.747569187302267e-06, |
|
"loss": 1.8446, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.3773282595633887, |
|
"grad_norm": 0.9651957750320435, |
|
"learning_rate": 1.6889975017626903e-06, |
|
"loss": 1.9641, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.37772882034848787, |
|
"grad_norm": 1.0325901508331299, |
|
"learning_rate": 1.6314158069267948e-06, |
|
"loss": 1.9663, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.378129381133587, |
|
"grad_norm": 1.0935128927230835, |
|
"learning_rate": 1.574824682641629e-06, |
|
"loss": 1.6413, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.37852994191868616, |
|
"grad_norm": 1.0227198600769043, |
|
"learning_rate": 1.5192246987791981e-06, |
|
"loss": 1.8915, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.3789305027037853, |
|
"grad_norm": 1.019073486328125, |
|
"learning_rate": 1.4646164152307018e-06, |
|
"loss": 1.8609, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.37933106348888446, |
|
"grad_norm": 1.2001268863677979, |
|
"learning_rate": 1.411000381900951e-06, |
|
"loss": 1.95, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.3797316242739836, |
|
"grad_norm": 1.1757829189300537, |
|
"learning_rate": 1.3583771387028265e-06, |
|
"loss": 1.6127, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.3801321850590827, |
|
"grad_norm": 0.9995384216308594, |
|
"learning_rate": 1.3067472155517735e-06, |
|
"loss": 1.7688, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.38053274584418184, |
|
"grad_norm": 0.9961532354354858, |
|
"learning_rate": 1.2561111323605712e-06, |
|
"loss": 1.7373, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.380933306629281, |
|
"grad_norm": 1.0140630006790161, |
|
"learning_rate": 1.2064693990339936e-06, |
|
"loss": 1.901, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.38133386741438013, |
|
"grad_norm": 1.1691038608551025, |
|
"learning_rate": 1.157822515463758e-06, |
|
"loss": 1.8546, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.3817344281994793, |
|
"grad_norm": 1.1697040796279907, |
|
"learning_rate": 1.1101709715234386e-06, |
|
"loss": 1.7615, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.3821349889845784, |
|
"grad_norm": 1.594868540763855, |
|
"learning_rate": 1.0635152470635512e-06, |
|
"loss": 1.9847, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.3825355497696776, |
|
"grad_norm": 1.151361107826233, |
|
"learning_rate": 1.0178558119067315e-06, |
|
"loss": 2.078, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.38293611055477667, |
|
"grad_norm": 0.9073551297187805, |
|
"learning_rate": 9.731931258429638e-07, |
|
"loss": 1.9805, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.3833366713398758, |
|
"grad_norm": 1.2325596809387207, |
|
"learning_rate": 9.295276386250274e-07, |
|
"loss": 2.0565, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.38373723212497496, |
|
"grad_norm": 1.184550166130066, |
|
"learning_rate": 8.868597899638898e-07, |
|
"loss": 2.1695, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.3841377929100741, |
|
"grad_norm": 1.1788114309310913, |
|
"learning_rate": 8.451900095242881e-07, |
|
"loss": 2.0578, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.38453835369517325, |
|
"grad_norm": 1.1700098514556885, |
|
"learning_rate": 8.04518716920466e-07, |
|
"loss": 1.8347, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.3849389144802724, |
|
"grad_norm": 0.935607373714447, |
|
"learning_rate": 7.648463217118984e-07, |
|
"loss": 1.8536, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.38533947526537154, |
|
"grad_norm": 0.9701693058013916, |
|
"learning_rate": 7.261732233991513e-07, |
|
"loss": 1.6937, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.38574003605047064, |
|
"grad_norm": 1.195351243019104, |
|
"learning_rate": 6.884998114198959e-07, |
|
"loss": 2.1418, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.3861405968355698, |
|
"grad_norm": 1.6363762617111206, |
|
"learning_rate": 6.518264651449779e-07, |
|
"loss": 2.2872, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.38654115762066893, |
|
"grad_norm": 1.2114075422286987, |
|
"learning_rate": 6.161535538745878e-07, |
|
"loss": 2.1418, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.3869417184057681, |
|
"grad_norm": 1.7314434051513672, |
|
"learning_rate": 5.814814368345412e-07, |
|
"loss": 2.0097, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.3873422791908672, |
|
"grad_norm": 1.5273650884628296, |
|
"learning_rate": 5.478104631726711e-07, |
|
"loss": 2.0629, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.38774283997596637, |
|
"grad_norm": 1.0178067684173584, |
|
"learning_rate": 5.151409719553079e-07, |
|
"loss": 1.5887, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.3881434007610655, |
|
"grad_norm": 0.971847653388977, |
|
"learning_rate": 4.834732921638719e-07, |
|
"loss": 1.8772, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.3885439615461646, |
|
"grad_norm": 1.0141364336013794, |
|
"learning_rate": 4.5280774269154115e-07, |
|
"loss": 1.7437, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.38894452233126375, |
|
"grad_norm": 1.1142873764038086, |
|
"learning_rate": 4.2314463234005565e-07, |
|
"loss": 2.1158, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.3893450831163629, |
|
"grad_norm": 0.7133710384368896, |
|
"learning_rate": 3.9448425981661876e-07, |
|
"loss": 1.8973, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.38974564390146205, |
|
"grad_norm": 1.245099663734436, |
|
"learning_rate": 3.6682691373086665e-07, |
|
"loss": 1.9652, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.3901462046865612, |
|
"grad_norm": 1.2889657020568848, |
|
"learning_rate": 3.401728725919373e-07, |
|
"loss": 1.7035, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.39054676547166034, |
|
"grad_norm": 1.1345562934875488, |
|
"learning_rate": 3.145224048057727e-07, |
|
"loss": 1.6353, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.3909473262567595, |
|
"grad_norm": 1.1244771480560303, |
|
"learning_rate": 2.898757686722542e-07, |
|
"loss": 1.9095, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.3913478870418586, |
|
"grad_norm": 1.1727226972579956, |
|
"learning_rate": 2.6623321238277157e-07, |
|
"loss": 1.8767, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.3917484478269577, |
|
"grad_norm": 1.2802989482879639, |
|
"learning_rate": 2.4359497401758024e-07, |
|
"loss": 1.5799, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.39214900861205687, |
|
"grad_norm": 1.4607635736465454, |
|
"learning_rate": 2.219612815434924e-07, |
|
"loss": 2.0009, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.392549569397156, |
|
"grad_norm": 1.410239815711975, |
|
"learning_rate": 2.0133235281156736e-07, |
|
"loss": 2.0732, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.39295013018225516, |
|
"grad_norm": 0.9666495323181152, |
|
"learning_rate": 1.817083955548693e-07, |
|
"loss": 1.8358, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.3933506909673543, |
|
"grad_norm": 1.4496511220932007, |
|
"learning_rate": 1.630896073864352e-07, |
|
"loss": 1.8643, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.39375125175245346, |
|
"grad_norm": 1.2983746528625488, |
|
"learning_rate": 1.4547617579725449e-07, |
|
"loss": 1.9004, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.39415181253755255, |
|
"grad_norm": 1.286615014076233, |
|
"learning_rate": 1.2886827815440372e-07, |
|
"loss": 1.8282, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.3945523733226517, |
|
"grad_norm": 1.1125391721725464, |
|
"learning_rate": 1.1326608169920372e-07, |
|
"loss": 1.9587, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.39495293410775084, |
|
"grad_norm": 1.1754589080810547, |
|
"learning_rate": 9.866974354560965e-08, |
|
"loss": 1.8011, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.39535349489285, |
|
"grad_norm": 1.0687789916992188, |
|
"learning_rate": 8.507941067859016e-08, |
|
"loss": 1.8824, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.39575405567794913, |
|
"grad_norm": 1.156052589416504, |
|
"learning_rate": 7.249521995263964e-08, |
|
"loss": 1.8151, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.3961546164630483, |
|
"grad_norm": 1.0500197410583496, |
|
"learning_rate": 6.09172980904238e-08, |
|
"loss": 1.6616, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.39655517724814743, |
|
"grad_norm": 0.9670491218566895, |
|
"learning_rate": 5.0345761681491746e-08, |
|
"loss": 1.671, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.3969557380332465, |
|
"grad_norm": 1.1478677988052368, |
|
"learning_rate": 4.078071718107701e-08, |
|
"loss": 1.779, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.39735629881834567, |
|
"grad_norm": 1.2338886260986328, |
|
"learning_rate": 3.2222260909087196e-08, |
|
"loss": 1.8303, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.3977568596034448, |
|
"grad_norm": 0.9074128270149231, |
|
"learning_rate": 2.4670479049082597e-08, |
|
"loss": 1.8302, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.39815742038854396, |
|
"grad_norm": 1.1111611127853394, |
|
"learning_rate": 1.81254476474213e-08, |
|
"loss": 1.9118, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.3985579811736431, |
|
"grad_norm": 1.4943230152130127, |
|
"learning_rate": 1.2587232612493172e-08, |
|
"loss": 2.1329, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.39895854195874225, |
|
"grad_norm": 0.8442416191101074, |
|
"learning_rate": 8.055889714064791e-09, |
|
"loss": 1.832, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.3993591027438414, |
|
"grad_norm": 1.6363670825958252, |
|
"learning_rate": 4.531464582713252e-09, |
|
"loss": 2.3091, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.3997596635289405, |
|
"grad_norm": 1.186084270477295, |
|
"learning_rate": 2.0139927093487664e-09, |
|
"loss": 1.5448, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.40016022431403964, |
|
"grad_norm": 1.3950694799423218, |
|
"learning_rate": 5.034994448926967e-10, |
|
"loss": 1.846, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.4005607850991388, |
|
"grad_norm": 1.2405130863189697, |
|
"learning_rate": 0.0, |
|
"loss": 1.6719, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4005607850991388, |
|
"eval_loss": 1.906521201133728, |
|
"eval_runtime": 32.8778, |
|
"eval_samples_per_second": 31.997, |
|
"eval_steps_per_second": 15.999, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 250, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.08473960997847e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|