|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.99876492383697, |
|
"eval_steps": 500, |
|
"global_step": 5463, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005489227391244683, |
|
"grad_norm": 1.3054485321044922, |
|
"learning_rate": 9.140767824497258e-07, |
|
"loss": 1.7028, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010978454782489365, |
|
"grad_norm": 1.6326860189437866, |
|
"learning_rate": 1.8281535648994516e-06, |
|
"loss": 1.6575, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.016467682173734045, |
|
"grad_norm": 0.9034647345542908, |
|
"learning_rate": 2.7422303473491773e-06, |
|
"loss": 1.3904, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02195690956497873, |
|
"grad_norm": 0.8317720293998718, |
|
"learning_rate": 3.6563071297989032e-06, |
|
"loss": 1.2568, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.027446136956223412, |
|
"grad_norm": 0.8646258115768433, |
|
"learning_rate": 4.570383912248629e-06, |
|
"loss": 1.2404, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03293536434746809, |
|
"grad_norm": 0.7493156790733337, |
|
"learning_rate": 5.484460694698355e-06, |
|
"loss": 1.1597, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03842459173871278, |
|
"grad_norm": 0.7537096738815308, |
|
"learning_rate": 6.398537477148081e-06, |
|
"loss": 1.1168, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04391381912995746, |
|
"grad_norm": 0.6768060922622681, |
|
"learning_rate": 7.3126142595978065e-06, |
|
"loss": 1.0531, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04940304652120214, |
|
"grad_norm": 0.8540539145469666, |
|
"learning_rate": 8.226691042047533e-06, |
|
"loss": 1.0974, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.054892273912446825, |
|
"grad_norm": 0.7654123306274414, |
|
"learning_rate": 9.140767824497258e-06, |
|
"loss": 1.0468, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06038150130369151, |
|
"grad_norm": 0.838114857673645, |
|
"learning_rate": 1.0054844606946984e-05, |
|
"loss": 1.0481, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06587072869493618, |
|
"grad_norm": 0.7839793562889099, |
|
"learning_rate": 1.096892138939671e-05, |
|
"loss": 1.0444, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07135995608618087, |
|
"grad_norm": 1.0483232736587524, |
|
"learning_rate": 1.1882998171846435e-05, |
|
"loss": 1.005, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07684918347742556, |
|
"grad_norm": 0.9476339221000671, |
|
"learning_rate": 1.2797074954296162e-05, |
|
"loss": 1.0538, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08233841086867023, |
|
"grad_norm": 0.8280003070831299, |
|
"learning_rate": 1.3711151736745886e-05, |
|
"loss": 1.0122, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08782763825991492, |
|
"grad_norm": 0.8112940788269043, |
|
"learning_rate": 1.4625228519195613e-05, |
|
"loss": 0.9613, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0933168656511596, |
|
"grad_norm": 0.9424939155578613, |
|
"learning_rate": 1.553930530164534e-05, |
|
"loss": 0.9637, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09880609304240429, |
|
"grad_norm": 0.781250536441803, |
|
"learning_rate": 1.6453382084095066e-05, |
|
"loss": 0.9984, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.10429532043364896, |
|
"grad_norm": 0.9252836108207703, |
|
"learning_rate": 1.7367458866544793e-05, |
|
"loss": 0.9984, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10978454782489365, |
|
"grad_norm": 0.9257864356040955, |
|
"learning_rate": 1.8281535648994517e-05, |
|
"loss": 0.949, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11527377521613832, |
|
"grad_norm": 1.042043924331665, |
|
"learning_rate": 1.9195612431444244e-05, |
|
"loss": 1.0031, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12076300260738301, |
|
"grad_norm": 1.0521234273910522, |
|
"learning_rate": 2.0109689213893968e-05, |
|
"loss": 0.9751, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1262522299986277, |
|
"grad_norm": 0.865064263343811, |
|
"learning_rate": 2.1023765996343695e-05, |
|
"loss": 0.9564, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13174145738987236, |
|
"grad_norm": 0.8879236578941345, |
|
"learning_rate": 2.193784277879342e-05, |
|
"loss": 0.9182, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13723068478111705, |
|
"grad_norm": 0.9224317669868469, |
|
"learning_rate": 2.2851919561243146e-05, |
|
"loss": 0.9037, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14271991217236174, |
|
"grad_norm": 0.8295108675956726, |
|
"learning_rate": 2.376599634369287e-05, |
|
"loss": 0.9708, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14820913956360643, |
|
"grad_norm": 0.7987868785858154, |
|
"learning_rate": 2.4680073126142597e-05, |
|
"loss": 0.9611, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15369836695485112, |
|
"grad_norm": 0.774760901927948, |
|
"learning_rate": 2.5594149908592324e-05, |
|
"loss": 0.9872, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15918759434609578, |
|
"grad_norm": 0.7601301670074463, |
|
"learning_rate": 2.6508226691042048e-05, |
|
"loss": 0.9, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16467682173734047, |
|
"grad_norm": 0.9270791411399841, |
|
"learning_rate": 2.742230347349177e-05, |
|
"loss": 0.8798, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17016604912858516, |
|
"grad_norm": 0.873102605342865, |
|
"learning_rate": 2.8336380255941502e-05, |
|
"loss": 0.8962, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17565527651982984, |
|
"grad_norm": 0.9427269101142883, |
|
"learning_rate": 2.9250457038391226e-05, |
|
"loss": 0.886, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1811445039110745, |
|
"grad_norm": 0.8019095659255981, |
|
"learning_rate": 3.016453382084095e-05, |
|
"loss": 0.8335, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1866337313023192, |
|
"grad_norm": 0.9028713703155518, |
|
"learning_rate": 3.107861060329068e-05, |
|
"loss": 0.8946, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.19212295869356388, |
|
"grad_norm": 1.0009723901748657, |
|
"learning_rate": 3.1992687385740404e-05, |
|
"loss": 0.9274, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19761218608480857, |
|
"grad_norm": 0.7785693407058716, |
|
"learning_rate": 3.290676416819013e-05, |
|
"loss": 0.8001, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.20310141347605323, |
|
"grad_norm": 0.9450286030769348, |
|
"learning_rate": 3.382084095063985e-05, |
|
"loss": 0.9036, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.20859064086729792, |
|
"grad_norm": 0.899732232093811, |
|
"learning_rate": 3.4734917733089586e-05, |
|
"loss": 0.8944, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2140798682585426, |
|
"grad_norm": 1.35003662109375, |
|
"learning_rate": 3.5648994515539306e-05, |
|
"loss": 0.8581, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2195690956497873, |
|
"grad_norm": 1.1555213928222656, |
|
"learning_rate": 3.656307129798903e-05, |
|
"loss": 0.8412, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.225058323041032, |
|
"grad_norm": 0.8920039534568787, |
|
"learning_rate": 3.7477148080438754e-05, |
|
"loss": 0.9105, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.23054755043227665, |
|
"grad_norm": 0.8022063970565796, |
|
"learning_rate": 3.839122486288849e-05, |
|
"loss": 0.9217, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.23603677782352134, |
|
"grad_norm": 1.1498247385025024, |
|
"learning_rate": 3.930530164533821e-05, |
|
"loss": 0.9259, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.24152600521476603, |
|
"grad_norm": 1.0198287963867188, |
|
"learning_rate": 4.0219378427787935e-05, |
|
"loss": 0.8857, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.24701523260601072, |
|
"grad_norm": 0.9331903457641602, |
|
"learning_rate": 4.113345521023766e-05, |
|
"loss": 0.8754, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2525044599972554, |
|
"grad_norm": 0.9897291660308838, |
|
"learning_rate": 4.204753199268739e-05, |
|
"loss": 0.8769, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.2579936873885001, |
|
"grad_norm": 1.6721230745315552, |
|
"learning_rate": 4.296160877513711e-05, |
|
"loss": 0.8992, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2634829147797447, |
|
"grad_norm": 1.1787182092666626, |
|
"learning_rate": 4.387568555758684e-05, |
|
"loss": 0.8918, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2689721421709894, |
|
"grad_norm": 1.0543595552444458, |
|
"learning_rate": 4.4789762340036564e-05, |
|
"loss": 0.8167, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2744613695622341, |
|
"grad_norm": 0.9777544140815735, |
|
"learning_rate": 4.570383912248629e-05, |
|
"loss": 0.8888, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2799505969534788, |
|
"grad_norm": 0.9173258543014526, |
|
"learning_rate": 4.661791590493602e-05, |
|
"loss": 0.8296, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2854398243447235, |
|
"grad_norm": 1.0830740928649902, |
|
"learning_rate": 4.753199268738574e-05, |
|
"loss": 0.8703, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.29092905173596817, |
|
"grad_norm": 1.115646243095398, |
|
"learning_rate": 4.844606946983547e-05, |
|
"loss": 0.9081, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.29641827912721286, |
|
"grad_norm": 1.219681739807129, |
|
"learning_rate": 4.936014625228519e-05, |
|
"loss": 0.9165, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.30190750651845755, |
|
"grad_norm": 1.178253173828125, |
|
"learning_rate": 4.999995405604411e-05, |
|
"loss": 0.8977, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.30739673390970224, |
|
"grad_norm": 1.3987079858779907, |
|
"learning_rate": 4.999913727930364e-05, |
|
"loss": 0.8527, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.31288596130094687, |
|
"grad_norm": 1.095534324645996, |
|
"learning_rate": 4.999729956415998e-05, |
|
"loss": 0.8716, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.31837518869219156, |
|
"grad_norm": 1.1343433856964111, |
|
"learning_rate": 4.9994440985663475e-05, |
|
"loss": 0.8402, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.32386441608343625, |
|
"grad_norm": 1.177049994468689, |
|
"learning_rate": 4.9990561660555454e-05, |
|
"loss": 0.8629, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.32935364347468093, |
|
"grad_norm": 1.091205358505249, |
|
"learning_rate": 4.998566174726347e-05, |
|
"loss": 0.7973, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3348428708659256, |
|
"grad_norm": 1.3899606466293335, |
|
"learning_rate": 4.997974144589481e-05, |
|
"loss": 0.8956, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3403320982571703, |
|
"grad_norm": 1.2220797538757324, |
|
"learning_rate": 4.997280099822833e-05, |
|
"loss": 0.794, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.345821325648415, |
|
"grad_norm": 1.0186364650726318, |
|
"learning_rate": 4.996484068770461e-05, |
|
"loss": 0.7641, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3513105530396597, |
|
"grad_norm": 1.3602491617202759, |
|
"learning_rate": 4.9955860839414324e-05, |
|
"loss": 0.8582, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3567997804309043, |
|
"grad_norm": 1.2544053792953491, |
|
"learning_rate": 4.994586182008501e-05, |
|
"loss": 0.8087, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.362289007822149, |
|
"grad_norm": 1.25338876247406, |
|
"learning_rate": 4.993484403806609e-05, |
|
"loss": 0.8814, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3677782352133937, |
|
"grad_norm": 1.6701184511184692, |
|
"learning_rate": 4.9922807943312135e-05, |
|
"loss": 0.8039, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3732674626046384, |
|
"grad_norm": 1.2474104166030884, |
|
"learning_rate": 4.990975402736457e-05, |
|
"loss": 0.8411, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3787566899958831, |
|
"grad_norm": 1.0203585624694824, |
|
"learning_rate": 4.9895682823331564e-05, |
|
"loss": 0.7838, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.38424591738712777, |
|
"grad_norm": 1.2643638849258423, |
|
"learning_rate": 4.988059490586624e-05, |
|
"loss": 0.7802, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.38973514477837246, |
|
"grad_norm": 1.2018098831176758, |
|
"learning_rate": 4.986449089114325e-05, |
|
"loss": 0.8049, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.39522437216961714, |
|
"grad_norm": 1.5600682497024536, |
|
"learning_rate": 4.984737143683356e-05, |
|
"loss": 0.864, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.40071359956086183, |
|
"grad_norm": 1.1996121406555176, |
|
"learning_rate": 4.982923724207764e-05, |
|
"loss": 0.8222, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.40620282695210647, |
|
"grad_norm": 1.2239071130752563, |
|
"learning_rate": 4.9810089047456873e-05, |
|
"loss": 0.7757, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.41169205434335115, |
|
"grad_norm": 1.278192162513733, |
|
"learning_rate": 4.978992763496334e-05, |
|
"loss": 0.7693, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.41718128173459584, |
|
"grad_norm": 1.3768647909164429, |
|
"learning_rate": 4.976875382796786e-05, |
|
"loss": 0.7927, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.42267050912584053, |
|
"grad_norm": 1.3257420063018799, |
|
"learning_rate": 4.974656849118638e-05, |
|
"loss": 0.7997, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.4281597365170852, |
|
"grad_norm": 1.4355076551437378, |
|
"learning_rate": 4.972337253064466e-05, |
|
"loss": 0.7719, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.4336489639083299, |
|
"grad_norm": 1.0469034910202026, |
|
"learning_rate": 4.969916689364128e-05, |
|
"loss": 0.8203, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4391381912995746, |
|
"grad_norm": 1.4641021490097046, |
|
"learning_rate": 4.9673952568708906e-05, |
|
"loss": 0.8303, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4446274186908193, |
|
"grad_norm": 1.6394554376602173, |
|
"learning_rate": 4.964773058557399e-05, |
|
"loss": 0.8693, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.450116646082064, |
|
"grad_norm": 1.777869462966919, |
|
"learning_rate": 4.9620502015114675e-05, |
|
"loss": 0.7929, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4556058734733086, |
|
"grad_norm": 1.161238670349121, |
|
"learning_rate": 4.959226796931706e-05, |
|
"loss": 0.8393, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 1.5231930017471313, |
|
"learning_rate": 4.95630296012298e-05, |
|
"loss": 0.8195, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.466584328255798, |
|
"grad_norm": 1.446094274520874, |
|
"learning_rate": 4.953278810491701e-05, |
|
"loss": 0.8157, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4720735556470427, |
|
"grad_norm": 1.702967882156372, |
|
"learning_rate": 4.950154471540951e-05, |
|
"loss": 0.7932, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.47756278303828736, |
|
"grad_norm": 1.3679907321929932, |
|
"learning_rate": 4.9469300708654385e-05, |
|
"loss": 0.7741, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.48305201042953205, |
|
"grad_norm": 1.1557847261428833, |
|
"learning_rate": 4.943605740146286e-05, |
|
"loss": 0.8406, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.48854123782077674, |
|
"grad_norm": 1.4791802167892456, |
|
"learning_rate": 4.940181615145655e-05, |
|
"loss": 0.7731, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.49403046521202143, |
|
"grad_norm": 1.3994717597961426, |
|
"learning_rate": 4.936657835701198e-05, |
|
"loss": 0.7903, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.49951969260326606, |
|
"grad_norm": 1.2580246925354004, |
|
"learning_rate": 4.933034545720354e-05, |
|
"loss": 0.7601, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.5050089199945108, |
|
"grad_norm": 1.4461493492126465, |
|
"learning_rate": 4.9293118931744624e-05, |
|
"loss": 0.8246, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.5104981473857555, |
|
"grad_norm": 1.9255192279815674, |
|
"learning_rate": 4.925490030092729e-05, |
|
"loss": 0.7729, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5159873747770002, |
|
"grad_norm": 1.2568154335021973, |
|
"learning_rate": 4.9215691125560104e-05, |
|
"loss": 0.7711, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5214766021682449, |
|
"grad_norm": 1.2998193502426147, |
|
"learning_rate": 4.917549300690445e-05, |
|
"loss": 0.7897, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5269658295594895, |
|
"grad_norm": 1.712433099746704, |
|
"learning_rate": 4.9134307586609104e-05, |
|
"loss": 0.7356, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5324550569507341, |
|
"grad_norm": 1.4403119087219238, |
|
"learning_rate": 4.9092136546643184e-05, |
|
"loss": 0.7599, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5379442843419788, |
|
"grad_norm": 1.2811603546142578, |
|
"learning_rate": 4.9048981609227504e-05, |
|
"loss": 0.7572, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5434335117332235, |
|
"grad_norm": 1.6650887727737427, |
|
"learning_rate": 4.9004844536764185e-05, |
|
"loss": 0.7726, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5489227391244682, |
|
"grad_norm": 1.4498590230941772, |
|
"learning_rate": 4.8959727131764735e-05, |
|
"loss": 0.7772, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5544119665157129, |
|
"grad_norm": 1.38353431224823, |
|
"learning_rate": 4.891363123677638e-05, |
|
"loss": 0.7954, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.5599011939069576, |
|
"grad_norm": 1.5972951650619507, |
|
"learning_rate": 4.886655873430687e-05, |
|
"loss": 0.759, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5653904212982023, |
|
"grad_norm": 1.409515380859375, |
|
"learning_rate": 4.881851154674757e-05, |
|
"loss": 0.675, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.570879648689447, |
|
"grad_norm": 1.2562367916107178, |
|
"learning_rate": 4.876949163629494e-05, |
|
"loss": 0.8194, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5763688760806917, |
|
"grad_norm": 1.520317554473877, |
|
"learning_rate": 4.871950100487043e-05, |
|
"loss": 0.7587, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5818581034719363, |
|
"grad_norm": 1.5166853666305542, |
|
"learning_rate": 4.866854169403871e-05, |
|
"loss": 0.6909, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.587347330863181, |
|
"grad_norm": 1.4219826459884644, |
|
"learning_rate": 4.861661578492429e-05, |
|
"loss": 0.7907, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5928365582544257, |
|
"grad_norm": 1.449629545211792, |
|
"learning_rate": 4.856372539812655e-05, |
|
"loss": 0.7512, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5983257856456704, |
|
"grad_norm": 1.715462565422058, |
|
"learning_rate": 4.850987269363311e-05, |
|
"loss": 0.7171, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.6038150130369151, |
|
"grad_norm": 1.6240124702453613, |
|
"learning_rate": 4.845505987073161e-05, |
|
"loss": 0.763, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6093042404281598, |
|
"grad_norm": 1.3949427604675293, |
|
"learning_rate": 4.839928916791996e-05, |
|
"loss": 0.7513, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.6147934678194045, |
|
"grad_norm": 1.491368293762207, |
|
"learning_rate": 4.834256286281482e-05, |
|
"loss": 0.6982, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.620282695210649, |
|
"grad_norm": 1.2943052053451538, |
|
"learning_rate": 4.82848832720587e-05, |
|
"loss": 0.8051, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6257719226018937, |
|
"grad_norm": 1.7091878652572632, |
|
"learning_rate": 4.8226252751225245e-05, |
|
"loss": 0.7914, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6312611499931384, |
|
"grad_norm": 1.2987576723098755, |
|
"learning_rate": 4.816667369472309e-05, |
|
"loss": 0.7705, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6367503773843831, |
|
"grad_norm": 1.4213101863861084, |
|
"learning_rate": 4.810614853569807e-05, |
|
"loss": 0.7916, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6422396047756278, |
|
"grad_norm": 1.4974167346954346, |
|
"learning_rate": 4.804467974593387e-05, |
|
"loss": 0.7628, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6477288321668725, |
|
"grad_norm": 1.729684591293335, |
|
"learning_rate": 4.798226983575103e-05, |
|
"loss": 0.7393, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6532180595581172, |
|
"grad_norm": 1.765308141708374, |
|
"learning_rate": 4.7918921353904464e-05, |
|
"loss": 0.7251, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6587072869493619, |
|
"grad_norm": 1.7703893184661865, |
|
"learning_rate": 4.785463688747937e-05, |
|
"loss": 0.7329, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6641965143406066, |
|
"grad_norm": 2.700155258178711, |
|
"learning_rate": 4.778941906178556e-05, |
|
"loss": 0.6967, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6696857417318512, |
|
"grad_norm": 1.3553398847579956, |
|
"learning_rate": 4.772327054025027e-05, |
|
"loss": 0.7221, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6751749691230959, |
|
"grad_norm": 1.2455166578292847, |
|
"learning_rate": 4.765619402430934e-05, |
|
"loss": 0.6925, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6806641965143406, |
|
"grad_norm": 1.7047752141952515, |
|
"learning_rate": 4.758819225329696e-05, |
|
"loss": 0.7373, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6861534239055853, |
|
"grad_norm": 1.5384269952774048, |
|
"learning_rate": 4.751926800433374e-05, |
|
"loss": 0.7348, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.69164265129683, |
|
"grad_norm": 1.491666316986084, |
|
"learning_rate": 4.744942409221333e-05, |
|
"loss": 0.7121, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6971318786880747, |
|
"grad_norm": 1.4360090494155884, |
|
"learning_rate": 4.7378663369287445e-05, |
|
"loss": 0.6728, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.7026211060793194, |
|
"grad_norm": 1.3977197408676147, |
|
"learning_rate": 4.730698872534938e-05, |
|
"loss": 0.7617, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.7081103334705641, |
|
"grad_norm": 1.7353872060775757, |
|
"learning_rate": 4.723440308751601e-05, |
|
"loss": 0.6887, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.7135995608618086, |
|
"grad_norm": 1.3200151920318604, |
|
"learning_rate": 4.716090942010823e-05, |
|
"loss": 0.752, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7190887882530533, |
|
"grad_norm": 1.333355188369751, |
|
"learning_rate": 4.708651072452993e-05, |
|
"loss": 0.7336, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.724578015644298, |
|
"grad_norm": 1.6440070867538452, |
|
"learning_rate": 4.701121003914537e-05, |
|
"loss": 0.7333, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7300672430355427, |
|
"grad_norm": 1.848791480064392, |
|
"learning_rate": 4.693501043915514e-05, |
|
"loss": 0.7648, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7355564704267874, |
|
"grad_norm": 1.593891978263855, |
|
"learning_rate": 4.685791503647052e-05, |
|
"loss": 0.787, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.7410456978180321, |
|
"grad_norm": 1.6957751512527466, |
|
"learning_rate": 4.6779926979586475e-05, |
|
"loss": 0.7212, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7465349252092768, |
|
"grad_norm": 1.3588330745697021, |
|
"learning_rate": 4.6701049453453e-05, |
|
"loss": 0.7175, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7520241526005215, |
|
"grad_norm": 1.462112307548523, |
|
"learning_rate": 4.662128567934509e-05, |
|
"loss": 0.7133, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7575133799917662, |
|
"grad_norm": 1.5633749961853027, |
|
"learning_rate": 4.654063891473115e-05, |
|
"loss": 0.6978, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.7630026073830108, |
|
"grad_norm": 1.7605217695236206, |
|
"learning_rate": 4.645911245314e-05, |
|
"loss": 0.7019, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7684918347742555, |
|
"grad_norm": 1.6843842267990112, |
|
"learning_rate": 4.637670962402636e-05, |
|
"loss": 0.7483, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7739810621655002, |
|
"grad_norm": 2.015845537185669, |
|
"learning_rate": 4.629343379263487e-05, |
|
"loss": 0.7208, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7794702895567449, |
|
"grad_norm": 1.8158447742462158, |
|
"learning_rate": 4.620928835986267e-05, |
|
"loss": 0.7733, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7849595169479896, |
|
"grad_norm": 1.7793387174606323, |
|
"learning_rate": 4.6124276762120485e-05, |
|
"loss": 0.7111, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7904487443392343, |
|
"grad_norm": 1.6674373149871826, |
|
"learning_rate": 4.603840247119233e-05, |
|
"loss": 0.6663, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.795937971730479, |
|
"grad_norm": 1.4028520584106445, |
|
"learning_rate": 4.595166899409368e-05, |
|
"loss": 0.7692, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.8014271991217237, |
|
"grad_norm": 1.6022142171859741, |
|
"learning_rate": 4.5864079872928265e-05, |
|
"loss": 0.7305, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.8069164265129684, |
|
"grad_norm": 1.4971508979797363, |
|
"learning_rate": 4.577563868474344e-05, |
|
"loss": 0.6875, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.8124056539042129, |
|
"grad_norm": 1.8490726947784424, |
|
"learning_rate": 4.5686349041384055e-05, |
|
"loss": 0.6849, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.8178948812954576, |
|
"grad_norm": 1.9100017547607422, |
|
"learning_rate": 4.559621458934498e-05, |
|
"loss": 0.6506, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.8233841086867023, |
|
"grad_norm": 1.6782461404800415, |
|
"learning_rate": 4.550523900962219e-05, |
|
"loss": 0.704, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.828873336077947, |
|
"grad_norm": 2.1226425170898438, |
|
"learning_rate": 4.541342601756242e-05, |
|
"loss": 0.6988, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8343625634691917, |
|
"grad_norm": 1.658097267150879, |
|
"learning_rate": 4.532077936271144e-05, |
|
"loss": 0.705, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8398517908604364, |
|
"grad_norm": 1.8850988149642944, |
|
"learning_rate": 4.522730282866093e-05, |
|
"loss": 0.6801, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8453410182516811, |
|
"grad_norm": 1.5480940341949463, |
|
"learning_rate": 4.513300023289397e-05, |
|
"loss": 0.6308, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8508302456429258, |
|
"grad_norm": 1.7652947902679443, |
|
"learning_rate": 4.503787542662912e-05, |
|
"loss": 0.6731, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8563194730341704, |
|
"grad_norm": 1.902155876159668, |
|
"learning_rate": 4.494193229466314e-05, |
|
"loss": 0.7404, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.8618087004254151, |
|
"grad_norm": 2.1436920166015625, |
|
"learning_rate": 4.4845174755212385e-05, |
|
"loss": 0.6884, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.8672979278166598, |
|
"grad_norm": 1.6139538288116455, |
|
"learning_rate": 4.47476067597527e-05, |
|
"loss": 0.6947, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8727871552079045, |
|
"grad_norm": 1.5919870138168335, |
|
"learning_rate": 4.464923229285816e-05, |
|
"loss": 0.6982, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.8782763825991492, |
|
"grad_norm": 1.6209038496017456, |
|
"learning_rate": 4.4550055372038225e-05, |
|
"loss": 0.7124, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8837656099903939, |
|
"grad_norm": 1.631515383720398, |
|
"learning_rate": 4.445008004757376e-05, |
|
"loss": 0.6771, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.8892548373816386, |
|
"grad_norm": 1.4836645126342773, |
|
"learning_rate": 4.434931040235159e-05, |
|
"loss": 0.6272, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8947440647728833, |
|
"grad_norm": 1.3640625476837158, |
|
"learning_rate": 4.4247750551697756e-05, |
|
"loss": 0.6477, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.900233292164128, |
|
"grad_norm": 1.5562537908554077, |
|
"learning_rate": 4.414540464320945e-05, |
|
"loss": 0.7128, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.9057225195553725, |
|
"grad_norm": 1.548048973083496, |
|
"learning_rate": 4.404227685658565e-05, |
|
"loss": 0.7098, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.9112117469466172, |
|
"grad_norm": 1.613368034362793, |
|
"learning_rate": 4.39383714034564e-05, |
|
"loss": 0.6926, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.9167009743378619, |
|
"grad_norm": 1.789654016494751, |
|
"learning_rate": 4.383369252721084e-05, |
|
"loss": 0.6398, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 1.625928282737732, |
|
"learning_rate": 4.372824450282388e-05, |
|
"loss": 0.7087, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.9276794291203513, |
|
"grad_norm": 1.686936855316162, |
|
"learning_rate": 4.362203163668164e-05, |
|
"loss": 0.6764, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.933168656511596, |
|
"grad_norm": 1.6460559368133545, |
|
"learning_rate": 4.351505826640555e-05, |
|
"loss": 0.6969, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9386578839028407, |
|
"grad_norm": 1.6267837285995483, |
|
"learning_rate": 4.3407328760675245e-05, |
|
"loss": 0.672, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9441471112940854, |
|
"grad_norm": 1.5070548057556152, |
|
"learning_rate": 4.329884751905014e-05, |
|
"loss": 0.6586, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.94963633868533, |
|
"grad_norm": 1.8759193420410156, |
|
"learning_rate": 4.3189618971789747e-05, |
|
"loss": 0.6601, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9551255660765747, |
|
"grad_norm": 1.6111549139022827, |
|
"learning_rate": 4.307964757967273e-05, |
|
"loss": 0.7042, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.9606147934678194, |
|
"grad_norm": 1.3748118877410889, |
|
"learning_rate": 4.2968937833814784e-05, |
|
"loss": 0.6573, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9661040208590641, |
|
"grad_norm": 1.7284533977508545, |
|
"learning_rate": 4.285749425548518e-05, |
|
"loss": 0.619, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9715932482503088, |
|
"grad_norm": 1.5528743267059326, |
|
"learning_rate": 4.274532139592211e-05, |
|
"loss": 0.6601, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9770824756415535, |
|
"grad_norm": 1.6220190525054932, |
|
"learning_rate": 4.2632423836146885e-05, |
|
"loss": 0.6449, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9825717030327982, |
|
"grad_norm": 2.00435471534729, |
|
"learning_rate": 4.251880618677678e-05, |
|
"loss": 0.6404, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.9880609304240429, |
|
"grad_norm": 1.8456660509109497, |
|
"learning_rate": 4.240447308783679e-05, |
|
"loss": 0.7124, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9935501578152875, |
|
"grad_norm": 1.8724040985107422, |
|
"learning_rate": 4.2289429208570094e-05, |
|
"loss": 0.7138, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9990393852065321, |
|
"grad_norm": 1.441105842590332, |
|
"learning_rate": 4.217367924724741e-05, |
|
"loss": 0.7439, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.0045286125977768, |
|
"grad_norm": 1.392276406288147, |
|
"learning_rate": 4.2057227930975066e-05, |
|
"loss": 0.4876, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.0100178399890216, |
|
"grad_norm": 1.4682689905166626, |
|
"learning_rate": 4.194008001550204e-05, |
|
"loss": 0.4949, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.0155070673802662, |
|
"grad_norm": 1.7317707538604736, |
|
"learning_rate": 4.1822240285025635e-05, |
|
"loss": 0.5329, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.020996294771511, |
|
"grad_norm": 1.9328278303146362, |
|
"learning_rate": 4.170371355199621e-05, |
|
"loss": 0.5068, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.0264855221627556, |
|
"grad_norm": 1.7879178524017334, |
|
"learning_rate": 4.158450465692051e-05, |
|
"loss": 0.5112, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.0319747495540004, |
|
"grad_norm": 1.6801658868789673, |
|
"learning_rate": 4.146461846816411e-05, |
|
"loss": 0.4826, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.037463976945245, |
|
"grad_norm": 1.6541537046432495, |
|
"learning_rate": 4.1344059881752534e-05, |
|
"loss": 0.4522, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.0429532043364897, |
|
"grad_norm": 2.27681303024292, |
|
"learning_rate": 4.1222833821171315e-05, |
|
"loss": 0.4726, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0484424317277343, |
|
"grad_norm": 1.6335279941558838, |
|
"learning_rate": 4.110094523716492e-05, |
|
"loss": 0.469, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.053931659118979, |
|
"grad_norm": 1.730760931968689, |
|
"learning_rate": 4.0978399107534584e-05, |
|
"loss": 0.4554, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.0594208865102237, |
|
"grad_norm": 1.636106014251709, |
|
"learning_rate": 4.0855200436935e-05, |
|
"loss": 0.4914, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.0649101139014683, |
|
"grad_norm": 1.855231523513794, |
|
"learning_rate": 4.073135425666997e-05, |
|
"loss": 0.4609, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.070399341292713, |
|
"grad_norm": 2.0908730030059814, |
|
"learning_rate": 4.0606865624486875e-05, |
|
"loss": 0.472, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.0758885686839577, |
|
"grad_norm": 1.7960741519927979, |
|
"learning_rate": 4.048173962437019e-05, |
|
"loss": 0.5072, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.0813777960752025, |
|
"grad_norm": 1.6274662017822266, |
|
"learning_rate": 4.035598136633378e-05, |
|
"loss": 0.455, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.086867023466447, |
|
"grad_norm": 1.898768663406372, |
|
"learning_rate": 4.0229595986212304e-05, |
|
"loss": 0.5023, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.0923562508576918, |
|
"grad_norm": 1.6245406866073608, |
|
"learning_rate": 4.0102588645451396e-05, |
|
"loss": 0.4863, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.0978454782489364, |
|
"grad_norm": 1.440356731414795, |
|
"learning_rate": 3.997496453089692e-05, |
|
"loss": 0.4912, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.1033347056401812, |
|
"grad_norm": 1.9108120203018188, |
|
"learning_rate": 3.984672885458312e-05, |
|
"loss": 0.4691, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.1088239330314258, |
|
"grad_norm": 1.7355122566223145, |
|
"learning_rate": 3.971788685351978e-05, |
|
"loss": 0.4965, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.1143131604226706, |
|
"grad_norm": 1.7125989198684692, |
|
"learning_rate": 3.9588443789478366e-05, |
|
"loss": 0.468, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.1198023878139152, |
|
"grad_norm": 1.8434703350067139, |
|
"learning_rate": 3.945840494877709e-05, |
|
"loss": 0.4886, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.12529161520516, |
|
"grad_norm": 2.302004337310791, |
|
"learning_rate": 3.934086499185402e-05, |
|
"loss": 0.4932, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.1307808425964045, |
|
"grad_norm": 1.931429147720337, |
|
"learning_rate": 3.9209708826272075e-05, |
|
"loss": 0.5121, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.1362700699876491, |
|
"grad_norm": 1.889414668083191, |
|
"learning_rate": 3.907797235116677e-05, |
|
"loss": 0.5094, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.141759297378894, |
|
"grad_norm": 2.243352174758911, |
|
"learning_rate": 3.894566094651682e-05, |
|
"loss": 0.488, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.1472485247701387, |
|
"grad_norm": 1.6855474710464478, |
|
"learning_rate": 3.881278001578046e-05, |
|
"loss": 0.531, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.1527377521613833, |
|
"grad_norm": 2.328468084335327, |
|
"learning_rate": 3.8679334985674786e-05, |
|
"loss": 0.5397, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.1582269795526279, |
|
"grad_norm": 1.8057246208190918, |
|
"learning_rate": 3.854533130595408e-05, |
|
"loss": 0.4964, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.1637162069438727, |
|
"grad_norm": 1.6702812910079956, |
|
"learning_rate": 3.8410774449187315e-05, |
|
"loss": 0.5011, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.1692054343351173, |
|
"grad_norm": 1.4972355365753174, |
|
"learning_rate": 3.827566991053461e-05, |
|
"loss": 0.4922, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.174694661726362, |
|
"grad_norm": 1.739022970199585, |
|
"learning_rate": 3.814002320752287e-05, |
|
"loss": 0.4309, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.1801838891176066, |
|
"grad_norm": 1.8909087181091309, |
|
"learning_rate": 3.8003839879820377e-05, |
|
"loss": 0.4761, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.1856731165088514, |
|
"grad_norm": 1.9765682220458984, |
|
"learning_rate": 3.786712548901064e-05, |
|
"loss": 0.4895, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.191162343900096, |
|
"grad_norm": 2.1266307830810547, |
|
"learning_rate": 3.772988561836517e-05, |
|
"loss": 0.4894, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.1966515712913408, |
|
"grad_norm": 1.7856028079986572, |
|
"learning_rate": 3.759212587261559e-05, |
|
"loss": 0.4812, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.2021407986825854, |
|
"grad_norm": 1.8546531200408936, |
|
"learning_rate": 3.745385187772463e-05, |
|
"loss": 0.4928, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.2076300260738302, |
|
"grad_norm": 1.8596118688583374, |
|
"learning_rate": 3.731506928065641e-05, |
|
"loss": 0.512, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.2131192534650748, |
|
"grad_norm": 2.024635076522827, |
|
"learning_rate": 3.717578374914585e-05, |
|
"loss": 0.4715, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.2186084808563196, |
|
"grad_norm": 2.1620028018951416, |
|
"learning_rate": 3.703600097146718e-05, |
|
"loss": 0.4754, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.2240977082475641, |
|
"grad_norm": 1.9437251091003418, |
|
"learning_rate": 3.68957266562016e-05, |
|
"loss": 0.475, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.229586935638809, |
|
"grad_norm": 2.7284131050109863, |
|
"learning_rate": 3.675496653200425e-05, |
|
"loss": 0.4901, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.2350761630300535, |
|
"grad_norm": 2.2666921615600586, |
|
"learning_rate": 3.661372634737013e-05, |
|
"loss": 0.4694, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.240565390421298, |
|
"grad_norm": 1.5657079219818115, |
|
"learning_rate": 3.647201187039946e-05, |
|
"loss": 0.4809, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.246054617812543, |
|
"grad_norm": 2.3592708110809326, |
|
"learning_rate": 3.632982888856202e-05, |
|
"loss": 0.4539, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.2515438452037877, |
|
"grad_norm": 1.7647560834884644, |
|
"learning_rate": 3.6187183208460844e-05, |
|
"loss": 0.4945, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.2570330725950323, |
|
"grad_norm": 2.0273566246032715, |
|
"learning_rate": 3.604408065559508e-05, |
|
"loss": 0.4853, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.2625222999862769, |
|
"grad_norm": 2.0692555904388428, |
|
"learning_rate": 3.590052707412208e-05, |
|
"loss": 0.498, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.2680115273775217, |
|
"grad_norm": 2.35859751701355, |
|
"learning_rate": 3.575652832661872e-05, |
|
"loss": 0.5287, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.2735007547687662, |
|
"grad_norm": 1.8455514907836914, |
|
"learning_rate": 3.5612090293841994e-05, |
|
"loss": 0.5035, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.278989982160011, |
|
"grad_norm": 2.233416795730591, |
|
"learning_rate": 3.5467218874488837e-05, |
|
"loss": 0.5078, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.2844792095512556, |
|
"grad_norm": 1.7934064865112305, |
|
"learning_rate": 3.5321919984955244e-05, |
|
"loss": 0.5015, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.2899684369425004, |
|
"grad_norm": 1.753578543663025, |
|
"learning_rate": 3.517619955909463e-05, |
|
"loss": 0.4556, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.295457664333745, |
|
"grad_norm": 1.9207135438919067, |
|
"learning_rate": 3.5030063547975525e-05, |
|
"loss": 0.4417, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.3009468917249898, |
|
"grad_norm": 1.77664315700531, |
|
"learning_rate": 3.488351791963849e-05, |
|
"loss": 0.435, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.3064361191162344, |
|
"grad_norm": 1.5567264556884766, |
|
"learning_rate": 3.473656865885248e-05, |
|
"loss": 0.4872, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.3119253465074792, |
|
"grad_norm": 1.9232813119888306, |
|
"learning_rate": 3.4589221766870306e-05, |
|
"loss": 0.479, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.3174145738987237, |
|
"grad_norm": 1.6090134382247925, |
|
"learning_rate": 3.444148326118366e-05, |
|
"loss": 0.577, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.3229038012899683, |
|
"grad_norm": 1.7962336540222168, |
|
"learning_rate": 3.4293359175277314e-05, |
|
"loss": 0.4801, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.3283930286812131, |
|
"grad_norm": 2.1019630432128906, |
|
"learning_rate": 3.414485555838273e-05, |
|
"loss": 0.4884, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.333882256072458, |
|
"grad_norm": 1.8056087493896484, |
|
"learning_rate": 3.3995978475231024e-05, |
|
"loss": 0.4527, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.3393714834637025, |
|
"grad_norm": 1.7557107210159302, |
|
"learning_rate": 3.3846734005805254e-05, |
|
"loss": 0.4831, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.344860710854947, |
|
"grad_norm": 1.7773773670196533, |
|
"learning_rate": 3.369712824509217e-05, |
|
"loss": 0.4994, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.3503499382461919, |
|
"grad_norm": 1.7856857776641846, |
|
"learning_rate": 3.354716730283327e-05, |
|
"loss": 0.4761, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.3558391656374364, |
|
"grad_norm": 2.119858980178833, |
|
"learning_rate": 3.3396857303275296e-05, |
|
"loss": 0.4891, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.3613283930286812, |
|
"grad_norm": 1.9912039041519165, |
|
"learning_rate": 3.324620438492011e-05, |
|
"loss": 0.4415, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.3668176204199258, |
|
"grad_norm": 2.347066879272461, |
|
"learning_rate": 3.309521470027403e-05, |
|
"loss": 0.4733, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.3723068478111706, |
|
"grad_norm": 1.963139533996582, |
|
"learning_rate": 3.294389441559655e-05, |
|
"loss": 0.4626, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.3777960752024152, |
|
"grad_norm": 2.103672742843628, |
|
"learning_rate": 3.279224971064851e-05, |
|
"loss": 0.5168, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.38328530259366, |
|
"grad_norm": 1.741493821144104, |
|
"learning_rate": 3.2640286778439746e-05, |
|
"loss": 0.4687, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.3887745299849046, |
|
"grad_norm": 1.9451817274093628, |
|
"learning_rate": 3.248801182497615e-05, |
|
"loss": 0.454, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.3942637573761494, |
|
"grad_norm": 2.4190995693206787, |
|
"learning_rate": 3.233543106900624e-05, |
|
"loss": 0.4594, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.399752984767394, |
|
"grad_norm": 1.543632984161377, |
|
"learning_rate": 3.21825507417672e-05, |
|
"loss": 0.4408, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.4052422121586385, |
|
"grad_norm": 2.006373882293701, |
|
"learning_rate": 3.202937708673033e-05, |
|
"loss": 0.4802, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.4107314395498833, |
|
"grad_norm": 1.912208914756775, |
|
"learning_rate": 3.1875916359346214e-05, |
|
"loss": 0.4731, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.4162206669411281, |
|
"grad_norm": 1.6737933158874512, |
|
"learning_rate": 3.17221748267891e-05, |
|
"loss": 0.454, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.4217098943323727, |
|
"grad_norm": 1.9672836065292358, |
|
"learning_rate": 3.156815876770105e-05, |
|
"loss": 0.4229, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.4271991217236173, |
|
"grad_norm": 1.512810230255127, |
|
"learning_rate": 3.1413874471935496e-05, |
|
"loss": 0.4896, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.432688349114862, |
|
"grad_norm": 1.8552961349487305, |
|
"learning_rate": 3.125932824030037e-05, |
|
"loss": 0.4808, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.438177576506107, |
|
"grad_norm": 1.7483348846435547, |
|
"learning_rate": 3.110452638430081e-05, |
|
"loss": 0.4271, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.4436668038973515, |
|
"grad_norm": 1.7746537923812866, |
|
"learning_rate": 3.094947522588135e-05, |
|
"loss": 0.4618, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.449156031288596, |
|
"grad_norm": 2.1067216396331787, |
|
"learning_rate": 3.079418109716778e-05, |
|
"loss": 0.4765, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.4546452586798408, |
|
"grad_norm": 1.6052168607711792, |
|
"learning_rate": 3.063865034020857e-05, |
|
"loss": 0.4596, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.4601344860710854, |
|
"grad_norm": 1.6968189477920532, |
|
"learning_rate": 3.0482889306715813e-05, |
|
"loss": 0.4384, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.4656237134623302, |
|
"grad_norm": 1.869379997253418, |
|
"learning_rate": 3.032690435780584e-05, |
|
"loss": 0.4872, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.4711129408535748, |
|
"grad_norm": 1.8812456130981445, |
|
"learning_rate": 3.017070186373949e-05, |
|
"loss": 0.4581, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.4766021682448196, |
|
"grad_norm": 1.99275803565979, |
|
"learning_rate": 3.001428820366187e-05, |
|
"loss": 0.49, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.4820913956360642, |
|
"grad_norm": 1.6111352443695068, |
|
"learning_rate": 2.9857669765341928e-05, |
|
"loss": 0.4262, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.487580623027309, |
|
"grad_norm": 2.6115357875823975, |
|
"learning_rate": 2.9700852944911512e-05, |
|
"loss": 0.4774, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.4930698504185536, |
|
"grad_norm": 1.8144830465316772, |
|
"learning_rate": 2.9543844146604195e-05, |
|
"loss": 0.4618, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.4985590778097984, |
|
"grad_norm": 1.7375366687774658, |
|
"learning_rate": 2.938664978249372e-05, |
|
"loss": 0.4278, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.504048305201043, |
|
"grad_norm": 1.914023756980896, |
|
"learning_rate": 2.9229276272232146e-05, |
|
"loss": 0.4706, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.5095375325922875, |
|
"grad_norm": 1.7386229038238525, |
|
"learning_rate": 2.907173004278768e-05, |
|
"loss": 0.4308, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.5150267599835323, |
|
"grad_norm": 1.5574982166290283, |
|
"learning_rate": 2.8914017528182185e-05, |
|
"loss": 0.4487, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.5205159873747771, |
|
"grad_norm": 2.144409418106079, |
|
"learning_rate": 2.8756145169228432e-05, |
|
"loss": 0.4232, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.5260052147660217, |
|
"grad_norm": 2.5904343128204346, |
|
"learning_rate": 2.859811941326709e-05, |
|
"loss": 0.4603, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.5314944421572663, |
|
"grad_norm": 2.3824493885040283, |
|
"learning_rate": 2.8439946713903354e-05, |
|
"loss": 0.4649, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.536983669548511, |
|
"grad_norm": 2.1540448665618896, |
|
"learning_rate": 2.8281633530743497e-05, |
|
"loss": 0.4988, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.5424728969397559, |
|
"grad_norm": 2.121973752975464, |
|
"learning_rate": 2.8123186329130942e-05, |
|
"loss": 0.4795, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.5479621243310004, |
|
"grad_norm": 1.8560881614685059, |
|
"learning_rate": 2.7964611579882317e-05, |
|
"loss": 0.427, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.553451351722245, |
|
"grad_norm": 2.625507354736328, |
|
"learning_rate": 2.7805915759023153e-05, |
|
"loss": 0.4982, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.5589405791134898, |
|
"grad_norm": 1.8594845533370972, |
|
"learning_rate": 2.764710534752342e-05, |
|
"loss": 0.4489, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.5644298065047346, |
|
"grad_norm": 1.9746872186660767, |
|
"learning_rate": 2.748818683103285e-05, |
|
"loss": 0.4177, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.569919033895979, |
|
"grad_norm": 1.9741085767745972, |
|
"learning_rate": 2.7329166699616064e-05, |
|
"loss": 0.4816, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.5754082612872238, |
|
"grad_norm": 1.9904859066009521, |
|
"learning_rate": 2.7170051447487532e-05, |
|
"loss": 0.4392, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.5808974886784686, |
|
"grad_norm": 1.9376888275146484, |
|
"learning_rate": 2.7010847572746356e-05, |
|
"loss": 0.5002, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.5863867160697132, |
|
"grad_norm": 1.6673862934112549, |
|
"learning_rate": 2.6851561577110874e-05, |
|
"loss": 0.437, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.5918759434609577, |
|
"grad_norm": 1.8041437864303589, |
|
"learning_rate": 2.6692199965653185e-05, |
|
"loss": 0.4565, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.5973651708522025, |
|
"grad_norm": 1.6648041009902954, |
|
"learning_rate": 2.6532769246533435e-05, |
|
"loss": 0.4755, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.6028543982434473, |
|
"grad_norm": 2.290234088897705, |
|
"learning_rate": 2.6373275930734075e-05, |
|
"loss": 0.4603, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.608343625634692, |
|
"grad_norm": 2.1882123947143555, |
|
"learning_rate": 2.621372653179391e-05, |
|
"loss": 0.4551, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.6138328530259365, |
|
"grad_norm": 1.7908653020858765, |
|
"learning_rate": 2.6054127565542146e-05, |
|
"loss": 0.5062, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.6193220804171813, |
|
"grad_norm": 2.1407206058502197, |
|
"learning_rate": 2.5894485549832254e-05, |
|
"loss": 0.5046, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.624811307808426, |
|
"grad_norm": 1.8676074743270874, |
|
"learning_rate": 2.57348070042758e-05, |
|
"loss": 0.4685, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.6303005351996707, |
|
"grad_norm": 2.0238535404205322, |
|
"learning_rate": 2.5575098449976204e-05, |
|
"loss": 0.4836, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.6357897625909152, |
|
"grad_norm": 2.1416938304901123, |
|
"learning_rate": 2.541536640926238e-05, |
|
"loss": 0.4146, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.64127898998216, |
|
"grad_norm": 2.006524085998535, |
|
"learning_rate": 2.5255617405422443e-05, |
|
"loss": 0.441, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.6467682173734048, |
|
"grad_norm": 2.2434608936309814, |
|
"learning_rate": 2.5095857962437226e-05, |
|
"loss": 0.4932, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.6522574447646494, |
|
"grad_norm": 2.113938093185425, |
|
"learning_rate": 2.4936094604713918e-05, |
|
"loss": 0.4324, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.657746672155894, |
|
"grad_norm": 1.8772289752960205, |
|
"learning_rate": 2.4776333856819565e-05, |
|
"loss": 0.4655, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.6632358995471388, |
|
"grad_norm": 2.1245956420898438, |
|
"learning_rate": 2.4616582243214623e-05, |
|
"loss": 0.4631, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.6687251269383834, |
|
"grad_norm": 2.2539162635803223, |
|
"learning_rate": 2.4456846287986525e-05, |
|
"loss": 0.4492, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.674214354329628, |
|
"grad_norm": 2.4101765155792236, |
|
"learning_rate": 2.429713251458323e-05, |
|
"loss": 0.4326, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.6797035817208728, |
|
"grad_norm": 2.1554176807403564, |
|
"learning_rate": 2.4137447445546837e-05, |
|
"loss": 0.4527, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.6851928091121176, |
|
"grad_norm": 2.0779566764831543, |
|
"learning_rate": 2.397779760224713e-05, |
|
"loss": 0.4331, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.6906820365033621, |
|
"grad_norm": 2.3145909309387207, |
|
"learning_rate": 2.3818189504615367e-05, |
|
"loss": 0.4159, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.6961712638946067, |
|
"grad_norm": 2.1414687633514404, |
|
"learning_rate": 2.3658629670877938e-05, |
|
"loss": 0.4996, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.7016604912858515, |
|
"grad_norm": 1.6377606391906738, |
|
"learning_rate": 2.3499124617290187e-05, |
|
"loss": 0.4827, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.7071497186770963, |
|
"grad_norm": 2.27193546295166, |
|
"learning_rate": 2.3339680857870288e-05, |
|
"loss": 0.5358, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.7126389460683409, |
|
"grad_norm": 1.6943323612213135, |
|
"learning_rate": 2.318030490413323e-05, |
|
"loss": 0.4584, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.7181281734595855, |
|
"grad_norm": 2.1574575901031494, |
|
"learning_rate": 2.30210032648249e-05, |
|
"loss": 0.4366, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.7236174008508303, |
|
"grad_norm": 1.805962085723877, |
|
"learning_rate": 2.286178244565625e-05, |
|
"loss": 0.4633, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.729106628242075, |
|
"grad_norm": 2.4552011489868164, |
|
"learning_rate": 2.2702648949037618e-05, |
|
"loss": 0.4861, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.7345958556333196, |
|
"grad_norm": 1.9455459117889404, |
|
"learning_rate": 2.2543609273813195e-05, |
|
"loss": 0.4881, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.7400850830245642, |
|
"grad_norm": 1.8050341606140137, |
|
"learning_rate": 2.2384669914995592e-05, |
|
"loss": 0.418, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.745574310415809, |
|
"grad_norm": 2.0198020935058594, |
|
"learning_rate": 2.2225837363500636e-05, |
|
"loss": 0.472, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.7510635378070538, |
|
"grad_norm": 2.245699167251587, |
|
"learning_rate": 2.2067118105882195e-05, |
|
"loss": 0.4718, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.7565527651982984, |
|
"grad_norm": 2.3782804012298584, |
|
"learning_rate": 2.190851862406739e-05, |
|
"loss": 0.4318, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.762041992589543, |
|
"grad_norm": 1.766295075416565, |
|
"learning_rate": 2.17500453950918e-05, |
|
"loss": 0.4728, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.7675312199807878, |
|
"grad_norm": 1.883118987083435, |
|
"learning_rate": 2.159170489083498e-05, |
|
"loss": 0.4229, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.7730204473720323, |
|
"grad_norm": 2.2457189559936523, |
|
"learning_rate": 2.1433503577756137e-05, |
|
"loss": 0.3906, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.778509674763277, |
|
"grad_norm": 1.66023850440979, |
|
"learning_rate": 2.1275447916630055e-05, |
|
"loss": 0.379, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.7839989021545217, |
|
"grad_norm": 2.2401814460754395, |
|
"learning_rate": 2.1117544362283286e-05, |
|
"loss": 0.4173, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.7894881295457665, |
|
"grad_norm": 2.2202141284942627, |
|
"learning_rate": 2.0959799363330425e-05, |
|
"loss": 0.426, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.794977356937011, |
|
"grad_norm": 2.292778253555298, |
|
"learning_rate": 2.0802219361910908e-05, |
|
"loss": 0.4165, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.8004665843282557, |
|
"grad_norm": 2.025392770767212, |
|
"learning_rate": 2.0644810793425807e-05, |
|
"loss": 0.4216, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.8059558117195005, |
|
"grad_norm": 1.669911503791809, |
|
"learning_rate": 2.048758008627506e-05, |
|
"loss": 0.4745, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.8114450391107453, |
|
"grad_norm": 2.2425777912139893, |
|
"learning_rate": 2.033053366159493e-05, |
|
"loss": 0.4314, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.8169342665019899, |
|
"grad_norm": 2.065985679626465, |
|
"learning_rate": 2.0173677932995787e-05, |
|
"loss": 0.4882, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.8224234938932344, |
|
"grad_norm": 1.8231384754180908, |
|
"learning_rate": 2.0017019306300182e-05, |
|
"loss": 0.4346, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.8279127212844792, |
|
"grad_norm": 2.203216075897217, |
|
"learning_rate": 1.9860564179281217e-05, |
|
"loss": 0.515, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.833401948675724, |
|
"grad_norm": 1.8703504800796509, |
|
"learning_rate": 1.970431894140128e-05, |
|
"loss": 0.4268, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.8388911760669686, |
|
"grad_norm": 2.13779616355896, |
|
"learning_rate": 1.954828997355112e-05, |
|
"loss": 0.4324, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.8443804034582132, |
|
"grad_norm": 2.4620044231414795, |
|
"learning_rate": 1.939248364778924e-05, |
|
"loss": 0.4542, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.849869630849458, |
|
"grad_norm": 2.0297598838806152, |
|
"learning_rate": 1.923690632708169e-05, |
|
"loss": 0.4695, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.8553588582407026, |
|
"grad_norm": 2.2314670085906982, |
|
"learning_rate": 1.908156436504215e-05, |
|
"loss": 0.4433, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.8608480856319471, |
|
"grad_norm": 1.7243778705596924, |
|
"learning_rate": 1.892646410567255e-05, |
|
"loss": 0.4257, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.866337313023192, |
|
"grad_norm": 1.6004356145858765, |
|
"learning_rate": 1.877161188310392e-05, |
|
"loss": 0.3585, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.8718265404144367, |
|
"grad_norm": 2.1382360458374023, |
|
"learning_rate": 1.8617014021337732e-05, |
|
"loss": 0.4234, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.8773157678056813, |
|
"grad_norm": 1.9200503826141357, |
|
"learning_rate": 1.846267683398761e-05, |
|
"loss": 0.4546, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.882804995196926, |
|
"grad_norm": 1.9421885013580322, |
|
"learning_rate": 1.830860662402153e-05, |
|
"loss": 0.4505, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.8882942225881707, |
|
"grad_norm": 2.25044584274292, |
|
"learning_rate": 1.8154809683504403e-05, |
|
"loss": 0.4684, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.8937834499794155, |
|
"grad_norm": 1.7311737537384033, |
|
"learning_rate": 1.8001292293341087e-05, |
|
"loss": 0.4478, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.89927267737066, |
|
"grad_norm": 1.8768479824066162, |
|
"learning_rate": 1.7848060723019894e-05, |
|
"loss": 0.4323, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.9047619047619047, |
|
"grad_norm": 1.617492914199829, |
|
"learning_rate": 1.7695121230356566e-05, |
|
"loss": 0.4429, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.9102511321531495, |
|
"grad_norm": 2.0293169021606445, |
|
"learning_rate": 1.7542480061238685e-05, |
|
"loss": 0.4399, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.9157403595443943, |
|
"grad_norm": 1.7568955421447754, |
|
"learning_rate": 1.7390143449370663e-05, |
|
"loss": 0.4029, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.9212295869356388, |
|
"grad_norm": 1.6997624635696411, |
|
"learning_rate": 1.723811761601904e-05, |
|
"loss": 0.4072, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.9267188143268834, |
|
"grad_norm": 2.185622453689575, |
|
"learning_rate": 1.708640876975855e-05, |
|
"loss": 0.4502, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.9322080417181282, |
|
"grad_norm": 2.803870439529419, |
|
"learning_rate": 1.693502310621848e-05, |
|
"loss": 0.4397, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.937697269109373, |
|
"grad_norm": 2.163422107696533, |
|
"learning_rate": 1.6783966807829692e-05, |
|
"loss": 0.4562, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.9431864965006176, |
|
"grad_norm": 1.7700269222259521, |
|
"learning_rate": 1.66332460435721e-05, |
|
"loss": 0.4332, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.9486757238918622, |
|
"grad_norm": 1.9938660860061646, |
|
"learning_rate": 1.648286696872277e-05, |
|
"loss": 0.4392, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.954164951283107, |
|
"grad_norm": 1.796387791633606, |
|
"learning_rate": 1.6332835724604556e-05, |
|
"loss": 0.4079, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.9596541786743515, |
|
"grad_norm": 1.6725765466690063, |
|
"learning_rate": 1.6183158438335223e-05, |
|
"loss": 0.4156, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.9651434060655961, |
|
"grad_norm": 2.1465470790863037, |
|
"learning_rate": 1.6033841222577312e-05, |
|
"loss": 0.4514, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.970632633456841, |
|
"grad_norm": 2.0636017322540283, |
|
"learning_rate": 1.588489017528844e-05, |
|
"loss": 0.4107, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.9761218608480857, |
|
"grad_norm": 2.281461000442505, |
|
"learning_rate": 1.573631137947232e-05, |
|
"loss": 0.4247, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.9816110882393303, |
|
"grad_norm": 1.8369041681289673, |
|
"learning_rate": 1.5588110902930252e-05, |
|
"loss": 0.3993, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.9871003156305749, |
|
"grad_norm": 1.538087010383606, |
|
"learning_rate": 1.5440294798013445e-05, |
|
"loss": 0.4032, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.9925895430218197, |
|
"grad_norm": 1.7788771390914917, |
|
"learning_rate": 1.5292869101375718e-05, |
|
"loss": 0.4191, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.9980787704130645, |
|
"grad_norm": 1.663266897201538, |
|
"learning_rate": 1.514583983372707e-05, |
|
"loss": 0.4065, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.003567997804309, |
|
"grad_norm": 1.6794120073318481, |
|
"learning_rate": 1.4999212999587723e-05, |
|
"loss": 0.3012, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.0090572251955536, |
|
"grad_norm": 2.0947213172912598, |
|
"learning_rate": 1.4852994587042957e-05, |
|
"loss": 0.2699, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.0145464525867984, |
|
"grad_norm": 1.647148609161377, |
|
"learning_rate": 1.4707190567498552e-05, |
|
"loss": 0.256, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.0200356799780432, |
|
"grad_norm": 1.6485075950622559, |
|
"learning_rate": 1.4561806895436907e-05, |
|
"loss": 0.2306, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.0255249073692876, |
|
"grad_norm": 1.5765630006790161, |
|
"learning_rate": 1.4416849508173864e-05, |
|
"loss": 0.2363, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.0310141347605324, |
|
"grad_norm": 2.0813677310943604, |
|
"learning_rate": 1.4272324325616251e-05, |
|
"loss": 0.2407, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.036503362151777, |
|
"grad_norm": 2.0815207958221436, |
|
"learning_rate": 1.4128237250020115e-05, |
|
"loss": 0.2525, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.041992589543022, |
|
"grad_norm": 2.1880548000335693, |
|
"learning_rate": 1.3984594165749676e-05, |
|
"loss": 0.2371, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.0474818169342663, |
|
"grad_norm": 1.7031059265136719, |
|
"learning_rate": 1.3841400939037013e-05, |
|
"loss": 0.241, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.052971044325511, |
|
"grad_norm": 1.9227020740509033, |
|
"learning_rate": 1.3698663417742496e-05, |
|
"loss": 0.2509, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.058460271716756, |
|
"grad_norm": 1.8670146465301514, |
|
"learning_rate": 1.3556387431115969e-05, |
|
"loss": 0.265, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.0639494991080007, |
|
"grad_norm": 2.06124210357666, |
|
"learning_rate": 1.3414578789558696e-05, |
|
"loss": 0.2182, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.069438726499245, |
|
"grad_norm": 2.2382616996765137, |
|
"learning_rate": 1.3273243284386023e-05, |
|
"loss": 0.2477, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.07492795389049, |
|
"grad_norm": 2.178103446960449, |
|
"learning_rate": 1.3132386687590958e-05, |
|
"loss": 0.2379, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.0804171812817347, |
|
"grad_norm": 1.947129249572754, |
|
"learning_rate": 1.2992014751608372e-05, |
|
"loss": 0.2319, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.0859064086729795, |
|
"grad_norm": 2.0307064056396484, |
|
"learning_rate": 1.2852133209080097e-05, |
|
"loss": 0.2416, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.091395636064224, |
|
"grad_norm": 1.988174319267273, |
|
"learning_rate": 1.2712747772620801e-05, |
|
"loss": 0.2629, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.0968848634554687, |
|
"grad_norm": 3.22770094871521, |
|
"learning_rate": 1.2573864134584718e-05, |
|
"loss": 0.2069, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.1023740908467135, |
|
"grad_norm": 2.0717577934265137, |
|
"learning_rate": 1.243548796683319e-05, |
|
"loss": 0.237, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.107863318237958, |
|
"grad_norm": 2.118257522583008, |
|
"learning_rate": 1.2297624920502953e-05, |
|
"loss": 0.2531, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.1133525456292026, |
|
"grad_norm": 2.584071636199951, |
|
"learning_rate": 1.2160280625775447e-05, |
|
"loss": 0.2464, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.1188417730204474, |
|
"grad_norm": 2.8391823768615723, |
|
"learning_rate": 1.2023460691646821e-05, |
|
"loss": 0.2344, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.124331000411692, |
|
"grad_norm": 1.8388172388076782, |
|
"learning_rate": 1.1887170705698905e-05, |
|
"loss": 0.2191, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.1298202278029366, |
|
"grad_norm": 2.278942346572876, |
|
"learning_rate": 1.1751416233870999e-05, |
|
"loss": 0.2303, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.1353094551941814, |
|
"grad_norm": 2.4428744316101074, |
|
"learning_rate": 1.1616202820232567e-05, |
|
"loss": 0.2493, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.140798682585426, |
|
"grad_norm": 2.272839069366455, |
|
"learning_rate": 1.1481535986756828e-05, |
|
"loss": 0.2527, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.146287909976671, |
|
"grad_norm": 2.1877167224884033, |
|
"learning_rate": 1.134742123309525e-05, |
|
"loss": 0.2599, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.1517771373679153, |
|
"grad_norm": 1.6496747732162476, |
|
"learning_rate": 1.1213864036352939e-05, |
|
"loss": 0.2457, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.15726636475916, |
|
"grad_norm": 2.1241507530212402, |
|
"learning_rate": 1.1080869850864964e-05, |
|
"loss": 0.2532, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.162755592150405, |
|
"grad_norm": 1.5271326303482056, |
|
"learning_rate": 1.094844410797361e-05, |
|
"loss": 0.2651, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.1682448195416497, |
|
"grad_norm": 1.9697147607803345, |
|
"learning_rate": 1.0816592215806562e-05, |
|
"loss": 0.2171, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.173734046932894, |
|
"grad_norm": 1.604737401008606, |
|
"learning_rate": 1.0685319559056051e-05, |
|
"loss": 0.2579, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.179223274324139, |
|
"grad_norm": 2.053114414215088, |
|
"learning_rate": 1.0554631498758943e-05, |
|
"loss": 0.2541, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.1847125017153837, |
|
"grad_norm": 2.424609422683716, |
|
"learning_rate": 1.0424533372077803e-05, |
|
"loss": 0.2053, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.1902017291066285, |
|
"grad_norm": 2.34372615814209, |
|
"learning_rate": 1.029503049208293e-05, |
|
"loss": 0.214, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.195690956497873, |
|
"grad_norm": 2.1274008750915527, |
|
"learning_rate": 1.0166128147535352e-05, |
|
"loss": 0.2133, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.2011801838891176, |
|
"grad_norm": 1.7201030254364014, |
|
"learning_rate": 1.003783160267091e-05, |
|
"loss": 0.2233, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.2066694112803624, |
|
"grad_norm": 2.844679832458496, |
|
"learning_rate": 9.91014609698519e-06, |
|
"loss": 0.2389, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.212158638671607, |
|
"grad_norm": 1.7438113689422607, |
|
"learning_rate": 9.783076845019598e-06, |
|
"loss": 0.2297, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.2176478660628516, |
|
"grad_norm": 2.076685905456543, |
|
"learning_rate": 9.656629036148365e-06, |
|
"loss": 0.2519, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.2231370934540964, |
|
"grad_norm": 2.197861671447754, |
|
"learning_rate": 9.530807834366658e-06, |
|
"loss": 0.2416, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.228626320845341, |
|
"grad_norm": 1.6589149236679077, |
|
"learning_rate": 9.405618378079686e-06, |
|
"loss": 0.238, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.2341155482365855, |
|
"grad_norm": 2.1636011600494385, |
|
"learning_rate": 9.281065779892826e-06, |
|
"loss": 0.2165, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.2396047756278303, |
|
"grad_norm": 2.11350679397583, |
|
"learning_rate": 9.15715512640282e-06, |
|
"loss": 0.2539, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.245094003019075, |
|
"grad_norm": 2.1883132457733154, |
|
"learning_rate": 9.033891477990091e-06, |
|
"loss": 0.2392, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.25058323041032, |
|
"grad_norm": 1.9660489559173584, |
|
"learning_rate": 8.923511544874787e-06, |
|
"loss": 0.2507, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.2560724578015643, |
|
"grad_norm": 1.596596360206604, |
|
"learning_rate": 8.801491052657259e-06, |
|
"loss": 0.2205, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.261561685192809, |
|
"grad_norm": 1.9153178930282593, |
|
"learning_rate": 8.680132090462712e-06, |
|
"loss": 0.2189, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.267050912584054, |
|
"grad_norm": 2.2162649631500244, |
|
"learning_rate": 8.559439614463177e-06, |
|
"loss": 0.2573, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.2725401399752982, |
|
"grad_norm": 2.1483819484710693, |
|
"learning_rate": 8.439418553612105e-06, |
|
"loss": 0.2464, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.278029367366543, |
|
"grad_norm": 2.043180465698242, |
|
"learning_rate": 8.320073809443024e-06, |
|
"loss": 0.236, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.283518594757788, |
|
"grad_norm": 1.6839346885681152, |
|
"learning_rate": 8.201410255869458e-06, |
|
"loss": 0.2596, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.2890078221490326, |
|
"grad_norm": 2.334520101547241, |
|
"learning_rate": 8.083432738985782e-06, |
|
"loss": 0.2233, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.2944970495402774, |
|
"grad_norm": 2.2806715965270996, |
|
"learning_rate": 7.966146076869386e-06, |
|
"loss": 0.223, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.299986276931522, |
|
"grad_norm": 2.726118803024292, |
|
"learning_rate": 7.849555059383839e-06, |
|
"loss": 0.2236, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.3054755043227666, |
|
"grad_norm": 1.6786760091781616, |
|
"learning_rate": 7.733664447983349e-06, |
|
"loss": 0.2509, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.3109647317140114, |
|
"grad_norm": 1.767065405845642, |
|
"learning_rate": 7.618478975518292e-06, |
|
"loss": 0.2373, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.3164539591052558, |
|
"grad_norm": 1.5344187021255493, |
|
"learning_rate": 7.504003346041871e-06, |
|
"loss": 0.2404, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.3219431864965006, |
|
"grad_norm": 2.3337080478668213, |
|
"learning_rate": 7.390242234618075e-06, |
|
"loss": 0.1858, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.3274324138877454, |
|
"grad_norm": 2.3712496757507324, |
|
"learning_rate": 7.277200287130728e-06, |
|
"loss": 0.2595, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.33292164127899, |
|
"grad_norm": 2.5574772357940674, |
|
"learning_rate": 7.164882120093757e-06, |
|
"loss": 0.2419, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.3384108686702345, |
|
"grad_norm": 2.367032051086426, |
|
"learning_rate": 7.053292320462654e-06, |
|
"loss": 0.256, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.3439000960614793, |
|
"grad_norm": 2.3279731273651123, |
|
"learning_rate": 6.942435445447159e-06, |
|
"loss": 0.2319, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.349389323452724, |
|
"grad_norm": 2.5082924365997314, |
|
"learning_rate": 6.832316022325138e-06, |
|
"loss": 0.212, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.354878550843969, |
|
"grad_norm": 3.303452730178833, |
|
"learning_rate": 6.7229385482577065e-06, |
|
"loss": 0.2434, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.3603677782352133, |
|
"grad_norm": 2.3842883110046387, |
|
"learning_rate": 6.614307490105557e-06, |
|
"loss": 0.2644, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.365857005626458, |
|
"grad_norm": 2.0827999114990234, |
|
"learning_rate": 6.506427284246547e-06, |
|
"loss": 0.2327, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.371346233017703, |
|
"grad_norm": 1.9834315776824951, |
|
"learning_rate": 6.3993023363945165e-06, |
|
"loss": 0.2206, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.376835460408947, |
|
"grad_norm": 1.9981067180633545, |
|
"learning_rate": 6.2929370214193735e-06, |
|
"loss": 0.2736, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.382324687800192, |
|
"grad_norm": 2.4763762950897217, |
|
"learning_rate": 6.1873356831683884e-06, |
|
"loss": 0.2344, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.387813915191437, |
|
"grad_norm": 1.4734795093536377, |
|
"learning_rate": 6.082502634288873e-06, |
|
"loss": 0.2019, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.3933031425826816, |
|
"grad_norm": 2.4421563148498535, |
|
"learning_rate": 5.978442156051986e-06, |
|
"loss": 0.2289, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.3987923699739264, |
|
"grad_norm": 2.192746162414551, |
|
"learning_rate": 5.875158498177921e-06, |
|
"loss": 0.2396, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.4042815973651708, |
|
"grad_norm": 1.9783297777175903, |
|
"learning_rate": 5.772655878662339e-06, |
|
"loss": 0.2531, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.4097708247564156, |
|
"grad_norm": 2.5523509979248047, |
|
"learning_rate": 5.6709384836041184e-06, |
|
"loss": 0.2405, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.4152600521476604, |
|
"grad_norm": 1.6953893899917603, |
|
"learning_rate": 5.570010467034425e-06, |
|
"loss": 0.2279, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.4207492795389047, |
|
"grad_norm": 1.7373918294906616, |
|
"learning_rate": 5.469875950747016e-06, |
|
"loss": 0.2081, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.4262385069301495, |
|
"grad_norm": 2.789266347885132, |
|
"learning_rate": 5.370539024129928e-06, |
|
"loss": 0.2455, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.4317277343213943, |
|
"grad_norm": 1.8112378120422363, |
|
"learning_rate": 5.272003743998489e-06, |
|
"loss": 0.256, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.437216961712639, |
|
"grad_norm": 1.8407344818115234, |
|
"learning_rate": 5.1742741344296246e-06, |
|
"loss": 0.2481, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.4427061891038835, |
|
"grad_norm": 2.0232059955596924, |
|
"learning_rate": 5.077354186597541e-06, |
|
"loss": 0.2213, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.4481954164951283, |
|
"grad_norm": 2.0662572383880615, |
|
"learning_rate": 4.981247858610688e-06, |
|
"loss": 0.2064, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.453684643886373, |
|
"grad_norm": 1.9686827659606934, |
|
"learning_rate": 4.885959075350149e-06, |
|
"loss": 0.2344, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.459173871277618, |
|
"grad_norm": 2.095710515975952, |
|
"learning_rate": 4.791491728309347e-06, |
|
"loss": 0.2727, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.4646630986688622, |
|
"grad_norm": 2.7409679889678955, |
|
"learning_rate": 4.697849675435112e-06, |
|
"loss": 0.2449, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.470152326060107, |
|
"grad_norm": 1.6528655290603638, |
|
"learning_rate": 4.605036740970134e-06, |
|
"loss": 0.2228, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.475641553451352, |
|
"grad_norm": 2.210045337677002, |
|
"learning_rate": 4.513056715296773e-06, |
|
"loss": 0.2399, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.481130780842596, |
|
"grad_norm": 1.4573155641555786, |
|
"learning_rate": 4.4219133547822865e-06, |
|
"loss": 0.2133, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.486620008233841, |
|
"grad_norm": 2.013803482055664, |
|
"learning_rate": 4.331610381625395e-06, |
|
"loss": 0.2318, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.492109235625086, |
|
"grad_norm": 2.090888261795044, |
|
"learning_rate": 4.242151483704293e-06, |
|
"loss": 0.2393, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.4975984630163306, |
|
"grad_norm": 2.1223104000091553, |
|
"learning_rate": 4.153540314426033e-06, |
|
"loss": 0.2343, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.5030876904075754, |
|
"grad_norm": 2.2379000186920166, |
|
"learning_rate": 4.065780492577326e-06, |
|
"loss": 0.2608, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.5085769177988197, |
|
"grad_norm": 1.780354380607605, |
|
"learning_rate": 3.978875602176726e-06, |
|
"loss": 0.2401, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.5140661451900645, |
|
"grad_norm": 2.5559253692626953, |
|
"learning_rate": 3.892829192328337e-06, |
|
"loss": 0.2381, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.5195553725813093, |
|
"grad_norm": 1.7930986881256104, |
|
"learning_rate": 3.8076447770767796e-06, |
|
"loss": 0.2712, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.5250445999725537, |
|
"grad_norm": 2.0700008869171143, |
|
"learning_rate": 3.7233258352637553e-06, |
|
"loss": 0.2162, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.5305338273637985, |
|
"grad_norm": 2.3782896995544434, |
|
"learning_rate": 3.6398758103859067e-06, |
|
"loss": 0.208, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.5360230547550433, |
|
"grad_norm": 2.345578670501709, |
|
"learning_rate": 3.557298110454252e-06, |
|
"loss": 0.2231, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.5415122821462877, |
|
"grad_norm": 1.9046144485473633, |
|
"learning_rate": 3.475596107854981e-06, |
|
"loss": 0.2359, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.5470015095375325, |
|
"grad_norm": 2.2235357761383057, |
|
"learning_rate": 3.3947731392117237e-06, |
|
"loss": 0.2268, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.5524907369287773, |
|
"grad_norm": 2.3540749549865723, |
|
"learning_rate": 3.3148325052492713e-06, |
|
"loss": 0.2382, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.557979964320022, |
|
"grad_norm": 2.550370931625366, |
|
"learning_rate": 3.2357774706588157e-06, |
|
"loss": 0.2364, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.563469191711267, |
|
"grad_norm": 2.06571626663208, |
|
"learning_rate": 3.1576112639646023e-06, |
|
"loss": 0.2379, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.568958419102511, |
|
"grad_norm": 1.3834680318832397, |
|
"learning_rate": 3.08033707739209e-06, |
|
"loss": 0.2012, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.574447646493756, |
|
"grad_norm": 1.7131993770599365, |
|
"learning_rate": 3.0039580667375557e-06, |
|
"loss": 0.2019, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 2.579936873885001, |
|
"grad_norm": 2.2222018241882324, |
|
"learning_rate": 2.9284773512392475e-06, |
|
"loss": 0.2565, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.585426101276245, |
|
"grad_norm": 1.8102763891220093, |
|
"learning_rate": 2.8538980134499958e-06, |
|
"loss": 0.2254, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 2.59091532866749, |
|
"grad_norm": 2.4614617824554443, |
|
"learning_rate": 2.780223099111298e-06, |
|
"loss": 0.2505, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.5964045560587348, |
|
"grad_norm": 2.120408535003662, |
|
"learning_rate": 2.7074556170289674e-06, |
|
"loss": 0.1887, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 2.6018937834499796, |
|
"grad_norm": 2.1147496700286865, |
|
"learning_rate": 2.6355985389502293e-06, |
|
"loss": 0.1995, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.6073830108412244, |
|
"grad_norm": 1.818030595779419, |
|
"learning_rate": 2.5646547994423784e-06, |
|
"loss": 0.2527, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.6128722382324687, |
|
"grad_norm": 1.935293197631836, |
|
"learning_rate": 2.4946272957729165e-06, |
|
"loss": 0.2007, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.6183614656237135, |
|
"grad_norm": 1.837039589881897, |
|
"learning_rate": 2.4255188877912477e-06, |
|
"loss": 0.2044, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 2.6238506930149583, |
|
"grad_norm": 2.487926721572876, |
|
"learning_rate": 2.3573323978118705e-06, |
|
"loss": 0.2825, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.6293399204062027, |
|
"grad_norm": 1.880677342414856, |
|
"learning_rate": 2.29007061049914e-06, |
|
"loss": 0.209, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 2.6348291477974475, |
|
"grad_norm": 2.060526132583618, |
|
"learning_rate": 2.2237362727535043e-06, |
|
"loss": 0.2069, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.6403183751886923, |
|
"grad_norm": 1.8091875314712524, |
|
"learning_rate": 2.1583320935993605e-06, |
|
"loss": 0.2606, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 2.6458076025799366, |
|
"grad_norm": 2.821810483932495, |
|
"learning_rate": 2.0938607440744274e-06, |
|
"loss": 0.2235, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.6512968299711814, |
|
"grad_norm": 2.605480194091797, |
|
"learning_rate": 2.0303248571206244e-06, |
|
"loss": 0.2454, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 2.6567860573624262, |
|
"grad_norm": 2.1553428173065186, |
|
"learning_rate": 1.967727027476568e-06, |
|
"loss": 0.1998, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.662275284753671, |
|
"grad_norm": 2.485527753829956, |
|
"learning_rate": 1.9060698115716063e-06, |
|
"loss": 0.2377, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.667764512144916, |
|
"grad_norm": 1.8272162675857544, |
|
"learning_rate": 1.8453557274214162e-06, |
|
"loss": 0.2288, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.67325373953616, |
|
"grad_norm": 2.5798721313476562, |
|
"learning_rate": 1.7855872545251757e-06, |
|
"loss": 0.2419, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 2.678742966927405, |
|
"grad_norm": 2.7085471153259277, |
|
"learning_rate": 1.7267668337642761e-06, |
|
"loss": 0.222, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.68423219431865, |
|
"grad_norm": 2.2424449920654297, |
|
"learning_rate": 1.6688968673026773e-06, |
|
"loss": 0.1913, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 2.689721421709894, |
|
"grad_norm": 2.0495047569274902, |
|
"learning_rate": 1.6119797184887792e-06, |
|
"loss": 0.1905, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.695210649101139, |
|
"grad_norm": 2.048985242843628, |
|
"learning_rate": 1.5560177117589197e-06, |
|
"loss": 0.1978, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 2.7006998764923837, |
|
"grad_norm": 2.4100003242492676, |
|
"learning_rate": 1.5010131325424337e-06, |
|
"loss": 0.2575, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.7061891038836285, |
|
"grad_norm": 2.420525074005127, |
|
"learning_rate": 1.4469682271683327e-06, |
|
"loss": 0.215, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 2.711678331274873, |
|
"grad_norm": 1.7320780754089355, |
|
"learning_rate": 1.3938852027735594e-06, |
|
"loss": 0.2259, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.7171675586661177, |
|
"grad_norm": 2.3320376873016357, |
|
"learning_rate": 1.3417662272128484e-06, |
|
"loss": 0.2514, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.7226567860573625, |
|
"grad_norm": 2.5615234375, |
|
"learning_rate": 1.2906134289701998e-06, |
|
"loss": 0.2342, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.7281460134486073, |
|
"grad_norm": 2.3866004943847656, |
|
"learning_rate": 1.240428897071949e-06, |
|
"loss": 0.2273, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 2.7336352408398517, |
|
"grad_norm": 2.7888448238372803, |
|
"learning_rate": 1.191214681001454e-06, |
|
"loss": 0.216, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.7391244682310965, |
|
"grad_norm": 2.318481922149658, |
|
"learning_rate": 1.142972790615407e-06, |
|
"loss": 0.2053, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 2.7446136956223413, |
|
"grad_norm": 1.82982337474823, |
|
"learning_rate": 1.095705196061722e-06, |
|
"loss": 0.2265, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.7501029230135856, |
|
"grad_norm": 1.4905016422271729, |
|
"learning_rate": 1.0494138276991278e-06, |
|
"loss": 0.1784, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 2.7555921504048304, |
|
"grad_norm": 1.6236484050750732, |
|
"learning_rate": 1.0041005760182853e-06, |
|
"loss": 0.1953, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.761081377796075, |
|
"grad_norm": 1.9028195142745972, |
|
"learning_rate": 9.597672915646116e-07, |
|
"loss": 0.2076, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 2.76657060518732, |
|
"grad_norm": 2.1804606914520264, |
|
"learning_rate": 9.164157848626842e-07, |
|
"loss": 0.2155, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.772059832578565, |
|
"grad_norm": 1.961748480796814, |
|
"learning_rate": 8.740478263423197e-07, |
|
"loss": 0.2072, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.777549059969809, |
|
"grad_norm": 2.2191579341888428, |
|
"learning_rate": 8.32665146266276e-07, |
|
"loss": 0.2314, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.783038287361054, |
|
"grad_norm": 2.2354133129119873, |
|
"learning_rate": 7.922694346595511e-07, |
|
"loss": 0.2297, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.7885275147522988, |
|
"grad_norm": 1.8483319282531738, |
|
"learning_rate": 7.528623412404179e-07, |
|
"loss": 0.214, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.794016742143543, |
|
"grad_norm": 1.9461801052093506, |
|
"learning_rate": 7.144454753530067e-07, |
|
"loss": 0.2173, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 2.799505969534788, |
|
"grad_norm": 1.8403065204620361, |
|
"learning_rate": 6.770204059016127e-07, |
|
"loss": 0.2012, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.8049951969260327, |
|
"grad_norm": 2.654057264328003, |
|
"learning_rate": 6.405886612866036e-07, |
|
"loss": 0.224, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 2.810484424317277, |
|
"grad_norm": 2.0720152854919434, |
|
"learning_rate": 6.051517293420101e-07, |
|
"loss": 0.1893, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.815973651708522, |
|
"grad_norm": 1.8411768674850464, |
|
"learning_rate": 5.707110572747587e-07, |
|
"loss": 0.2351, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 2.8214628790997667, |
|
"grad_norm": 1.893263578414917, |
|
"learning_rate": 5.3726805160558e-07, |
|
"loss": 0.2541, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.8269521064910115, |
|
"grad_norm": 2.347729206085205, |
|
"learning_rate": 5.048240781115571e-07, |
|
"loss": 0.2351, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.8324413338822563, |
|
"grad_norm": 2.2554593086242676, |
|
"learning_rate": 4.7338046177035354e-07, |
|
"loss": 0.245, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.8379305612735006, |
|
"grad_norm": 2.649017095565796, |
|
"learning_rate": 4.429384867061015e-07, |
|
"loss": 0.2444, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 2.8434197886647454, |
|
"grad_norm": 2.740800142288208, |
|
"learning_rate": 4.1349939613695434e-07, |
|
"loss": 0.2354, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.8489090160559902, |
|
"grad_norm": 2.5310161113739014, |
|
"learning_rate": 3.85064392324333e-07, |
|
"loss": 0.2086, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 2.8543982434472346, |
|
"grad_norm": 2.4969282150268555, |
|
"learning_rate": 3.5763463652380146e-07, |
|
"loss": 0.2329, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.8598874708384794, |
|
"grad_norm": 2.0695693492889404, |
|
"learning_rate": 3.3121124893766287e-07, |
|
"loss": 0.1665, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 2.865376698229724, |
|
"grad_norm": 2.0718321800231934, |
|
"learning_rate": 3.057953086692017e-07, |
|
"loss": 0.2444, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 2.870865925620969, |
|
"grad_norm": 2.0257515907287598, |
|
"learning_rate": 2.8138785367860796e-07, |
|
"loss": 0.2303, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 2.876355153012214, |
|
"grad_norm": 2.0960233211517334, |
|
"learning_rate": 2.5798988074061394e-07, |
|
"loss": 0.2274, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 2.881844380403458, |
|
"grad_norm": 2.4282174110412598, |
|
"learning_rate": 2.3560234540375424e-07, |
|
"loss": 0.1995, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.887333607794703, |
|
"grad_norm": 1.898910641670227, |
|
"learning_rate": 2.1422616195136692e-07, |
|
"loss": 0.2002, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 2.8928228351859477, |
|
"grad_norm": 1.6507282257080078, |
|
"learning_rate": 1.9386220336423678e-07, |
|
"loss": 0.1811, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 2.898312062577192, |
|
"grad_norm": 2.447411060333252, |
|
"learning_rate": 1.7451130128495753e-07, |
|
"loss": 0.2376, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 2.903801289968437, |
|
"grad_norm": 1.9209644794464111, |
|
"learning_rate": 1.5617424598396712e-07, |
|
"loss": 0.236, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 2.9092905173596817, |
|
"grad_norm": 2.0094797611236572, |
|
"learning_rate": 1.3885178632726536e-07, |
|
"loss": 0.208, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.914779744750926, |
|
"grad_norm": 2.3810887336730957, |
|
"learning_rate": 1.225446297458327e-07, |
|
"loss": 0.2124, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 2.920268972142171, |
|
"grad_norm": 2.039491891860962, |
|
"learning_rate": 1.0725344220675337e-07, |
|
"loss": 0.1983, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 2.9257581995334156, |
|
"grad_norm": 2.2363178730010986, |
|
"learning_rate": 9.297884818599556e-08, |
|
"loss": 0.2173, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 2.9312474269246604, |
|
"grad_norm": 2.1318199634552, |
|
"learning_rate": 7.972143064292892e-08, |
|
"loss": 0.182, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 2.9367366543159052, |
|
"grad_norm": 1.9546360969543457, |
|
"learning_rate": 6.748173099650202e-08, |
|
"loss": 0.1871, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.9422258817071496, |
|
"grad_norm": 1.8476676940917969, |
|
"learning_rate": 5.626024910314609e-08, |
|
"loss": 0.1989, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 2.9477151090983944, |
|
"grad_norm": 2.1973392963409424, |
|
"learning_rate": 4.605744323634142e-08, |
|
"loss": 0.2186, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 2.953204336489639, |
|
"grad_norm": 1.935354232788086, |
|
"learning_rate": 3.687373006792394e-08, |
|
"loss": 0.2124, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 2.9586935638808836, |
|
"grad_norm": 2.1322507858276367, |
|
"learning_rate": 2.870948465105161e-08, |
|
"loss": 0.2145, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 2.9641827912721284, |
|
"grad_norm": 2.501298189163208, |
|
"learning_rate": 2.1565040404902813e-08, |
|
"loss": 0.22, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.969672018663373, |
|
"grad_norm": 1.6250516176223755, |
|
"learning_rate": 1.544068910104002e-08, |
|
"loss": 0.218, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 2.975161246054618, |
|
"grad_norm": 2.794093370437622, |
|
"learning_rate": 1.0336680851516512e-08, |
|
"loss": 0.257, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 2.9806504734458628, |
|
"grad_norm": 2.0987253189086914, |
|
"learning_rate": 6.2532240986457044e-09, |
|
"loss": 0.2193, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 2.986139700837107, |
|
"grad_norm": 2.217175006866455, |
|
"learning_rate": 3.1904856064940424e-09, |
|
"loss": 0.2392, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 2.991628928228352, |
|
"grad_norm": 2.0461556911468506, |
|
"learning_rate": 1.1485904540697867e-09, |
|
"loss": 0.2137, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.9971181556195967, |
|
"grad_norm": 1.9249966144561768, |
|
"learning_rate": 1.276220302215414e-10, |
|
"loss": 0.2132, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.99876492383697, |
|
"step": 5463, |
|
"total_flos": 3.233486247100416e+17, |
|
"train_loss": 0.5040911292388242, |
|
"train_runtime": 9149.3841, |
|
"train_samples_per_second": 4.779, |
|
"train_steps_per_second": 0.597 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 5463, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.233486247100416e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|