|
{ |
|
"best_metric": 1.000998854637146, |
|
"best_model_checkpoint": "./lora-alpaca3/checkpoint-1000", |
|
"epoch": 2.9942122186495177, |
|
"eval_steps": 200, |
|
"global_step": 1164, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02572347266881029, |
|
"grad_norm": 0.5646638870239258, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 1.9009, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05144694533762058, |
|
"grad_norm": 0.5377612709999084, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 1.8355, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07717041800643087, |
|
"grad_norm": 0.5958595871925354, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 1.6639, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.10289389067524116, |
|
"grad_norm": 0.5721263885498047, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 1.3994, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12861736334405144, |
|
"grad_norm": 0.2665337026119232, |
|
"learning_rate": 0.00015, |
|
"loss": 1.1767, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15434083601286175, |
|
"grad_norm": 0.12332847714424133, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 1.1413, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18006430868167203, |
|
"grad_norm": 0.12157242000102997, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 1.1103, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2057877813504823, |
|
"grad_norm": 0.12799611687660217, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 1.1003, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2315112540192926, |
|
"grad_norm": 0.12139850109815598, |
|
"learning_rate": 0.00027, |
|
"loss": 1.084, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2572347266881029, |
|
"grad_norm": 0.144248366355896, |
|
"learning_rate": 0.0003, |
|
"loss": 1.0722, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2829581993569132, |
|
"grad_norm": 0.15326175093650818, |
|
"learning_rate": 0.00029718045112781953, |
|
"loss": 1.0731, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3086816720257235, |
|
"grad_norm": 0.12561754882335663, |
|
"learning_rate": 0.00029436090225563904, |
|
"loss": 1.0537, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.33440514469453375, |
|
"grad_norm": 0.15570670366287231, |
|
"learning_rate": 0.0002915413533834586, |
|
"loss": 1.049, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36012861736334406, |
|
"grad_norm": 0.14343878626823425, |
|
"learning_rate": 0.00028872180451127816, |
|
"loss": 1.0273, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3858520900321543, |
|
"grad_norm": 0.1315772831439972, |
|
"learning_rate": 0.0002859022556390977, |
|
"loss": 1.0529, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4115755627009646, |
|
"grad_norm": 0.13874055445194244, |
|
"learning_rate": 0.0002830827067669173, |
|
"loss": 1.044, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.43729903536977494, |
|
"grad_norm": 0.1271531730890274, |
|
"learning_rate": 0.00028026315789473683, |
|
"loss": 1.0388, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4630225080385852, |
|
"grad_norm": 0.12684497237205505, |
|
"learning_rate": 0.0002774436090225564, |
|
"loss": 1.0385, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4887459807073955, |
|
"grad_norm": 0.11675913631916046, |
|
"learning_rate": 0.0002746240601503759, |
|
"loss": 1.0349, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5144694533762058, |
|
"grad_norm": 0.15151233971118927, |
|
"learning_rate": 0.00027180451127819546, |
|
"loss": 1.0282, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5144694533762058, |
|
"eval_loss": 1.0400722026824951, |
|
"eval_runtime": 55.5366, |
|
"eval_samples_per_second": 36.012, |
|
"eval_steps_per_second": 4.502, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5401929260450161, |
|
"grad_norm": 0.1420961171388626, |
|
"learning_rate": 0.000268984962406015, |
|
"loss": 1.0297, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5659163987138264, |
|
"grad_norm": 0.13161741197109222, |
|
"learning_rate": 0.0002661654135338346, |
|
"loss": 1.0276, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5916398713826366, |
|
"grad_norm": 0.13450871407985687, |
|
"learning_rate": 0.00026334586466165413, |
|
"loss": 1.0285, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.617363344051447, |
|
"grad_norm": 0.13193918764591217, |
|
"learning_rate": 0.0002605263157894737, |
|
"loss": 1.0309, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6430868167202572, |
|
"grad_norm": 0.1393500566482544, |
|
"learning_rate": 0.0002577067669172932, |
|
"loss": 1.0315, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6688102893890675, |
|
"grad_norm": 0.11902929842472076, |
|
"learning_rate": 0.00025488721804511276, |
|
"loss": 1.0347, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6945337620578779, |
|
"grad_norm": 0.1258976310491562, |
|
"learning_rate": 0.0002520676691729323, |
|
"loss": 1.0273, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7202572347266881, |
|
"grad_norm": 0.12598267197608948, |
|
"learning_rate": 0.0002492481203007519, |
|
"loss": 1.0128, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7459807073954984, |
|
"grad_norm": 0.122463159263134, |
|
"learning_rate": 0.0002464285714285714, |
|
"loss": 1.0251, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7717041800643086, |
|
"grad_norm": 0.1293506771326065, |
|
"learning_rate": 0.00024360902255639094, |
|
"loss": 1.0317, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.797427652733119, |
|
"grad_norm": 0.13788191974163055, |
|
"learning_rate": 0.00024078947368421052, |
|
"loss": 1.029, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8231511254019293, |
|
"grad_norm": 0.1445332169532776, |
|
"learning_rate": 0.00023796992481203005, |
|
"loss": 1.0173, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8488745980707395, |
|
"grad_norm": 0.11408714205026627, |
|
"learning_rate": 0.00023515037593984961, |
|
"loss": 1.0109, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.8745980707395499, |
|
"grad_norm": 0.11767855286598206, |
|
"learning_rate": 0.00023233082706766915, |
|
"loss": 1.0116, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9003215434083601, |
|
"grad_norm": 0.12555481493473053, |
|
"learning_rate": 0.0002295112781954887, |
|
"loss": 1.0144, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9260450160771704, |
|
"grad_norm": 0.12403016537427902, |
|
"learning_rate": 0.00022669172932330824, |
|
"loss": 1.0299, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9517684887459807, |
|
"grad_norm": 0.12968535721302032, |
|
"learning_rate": 0.0002238721804511278, |
|
"loss": 1.0081, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.977491961414791, |
|
"grad_norm": 0.12369370460510254, |
|
"learning_rate": 0.00022105263157894733, |
|
"loss": 1.0065, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0032154340836013, |
|
"grad_norm": 0.12212081998586655, |
|
"learning_rate": 0.0002182330827067669, |
|
"loss": 1.0448, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0289389067524115, |
|
"grad_norm": 0.1301109939813614, |
|
"learning_rate": 0.00021541353383458647, |
|
"loss": 1.0052, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0289389067524115, |
|
"eval_loss": 1.0189071893692017, |
|
"eval_runtime": 55.4593, |
|
"eval_samples_per_second": 36.062, |
|
"eval_steps_per_second": 4.508, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0546623794212218, |
|
"grad_norm": 0.13505908846855164, |
|
"learning_rate": 0.000212593984962406, |
|
"loss": 1.0009, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.0803858520900322, |
|
"grad_norm": 0.12307268381118774, |
|
"learning_rate": 0.00020977443609022556, |
|
"loss": 1.0039, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.1061093247588425, |
|
"grad_norm": 0.11501840502023697, |
|
"learning_rate": 0.0002069548872180451, |
|
"loss": 0.9988, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.1318327974276527, |
|
"grad_norm": 0.12922251224517822, |
|
"learning_rate": 0.00020413533834586463, |
|
"loss": 0.9938, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.157556270096463, |
|
"grad_norm": 0.12304320186376572, |
|
"learning_rate": 0.0002013157894736842, |
|
"loss": 1.0223, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.1832797427652733, |
|
"grad_norm": 0.11323860287666321, |
|
"learning_rate": 0.00019849624060150372, |
|
"loss": 0.9957, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.2090032154340835, |
|
"grad_norm": 0.1335834562778473, |
|
"learning_rate": 0.00019567669172932328, |
|
"loss": 1.0042, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.234726688102894, |
|
"grad_norm": 0.12371603399515152, |
|
"learning_rate": 0.00019285714285714286, |
|
"loss": 1.0056, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.2604501607717042, |
|
"grad_norm": 0.12318002432584763, |
|
"learning_rate": 0.0001900375939849624, |
|
"loss": 1.0122, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.2861736334405145, |
|
"grad_norm": 0.13187570869922638, |
|
"learning_rate": 0.00018721804511278195, |
|
"loss": 0.9971, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3118971061093248, |
|
"grad_norm": 0.11861589550971985, |
|
"learning_rate": 0.00018439849624060149, |
|
"loss": 0.9926, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.337620578778135, |
|
"grad_norm": 0.11287492513656616, |
|
"learning_rate": 0.00018157894736842105, |
|
"loss": 1.016, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.3633440514469453, |
|
"grad_norm": 0.1384376585483551, |
|
"learning_rate": 0.00017875939849624058, |
|
"loss": 1.0059, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.3890675241157555, |
|
"grad_norm": 0.12147587537765503, |
|
"learning_rate": 0.00017593984962406014, |
|
"loss": 1.0213, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.414790996784566, |
|
"grad_norm": 0.13031207025051117, |
|
"learning_rate": 0.00017312030075187967, |
|
"loss": 1.005, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.4405144694533762, |
|
"grad_norm": 0.13779744505882263, |
|
"learning_rate": 0.00017030075187969925, |
|
"loss": 1.0077, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.4662379421221865, |
|
"grad_norm": 0.13196057081222534, |
|
"learning_rate": 0.00016748120300751879, |
|
"loss": 0.9944, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.4919614147909968, |
|
"grad_norm": 0.117030069231987, |
|
"learning_rate": 0.00016466165413533835, |
|
"loss": 1.0082, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.517684887459807, |
|
"grad_norm": 0.1274162381887436, |
|
"learning_rate": 0.00016184210526315788, |
|
"loss": 1.0102, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.5434083601286175, |
|
"grad_norm": 0.13315673172473907, |
|
"learning_rate": 0.00015902255639097744, |
|
"loss": 0.985, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.5434083601286175, |
|
"eval_loss": 1.0096462965011597, |
|
"eval_runtime": 55.507, |
|
"eval_samples_per_second": 36.031, |
|
"eval_steps_per_second": 4.504, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.5691318327974275, |
|
"grad_norm": 0.12220187485218048, |
|
"learning_rate": 0.00015620300751879697, |
|
"loss": 0.9912, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.594855305466238, |
|
"grad_norm": 0.12432006746530533, |
|
"learning_rate": 0.00015338345864661653, |
|
"loss": 0.9963, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.6205787781350482, |
|
"grad_norm": 0.12138667702674866, |
|
"learning_rate": 0.00015056390977443606, |
|
"loss": 1.0014, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.6463022508038585, |
|
"grad_norm": 0.13801203668117523, |
|
"learning_rate": 0.00014774436090225562, |
|
"loss": 0.9922, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.6720257234726688, |
|
"grad_norm": 0.13330678641796112, |
|
"learning_rate": 0.00014492481203007518, |
|
"loss": 0.9897, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.697749196141479, |
|
"grad_norm": 0.12127404659986496, |
|
"learning_rate": 0.0001421052631578947, |
|
"loss": 0.9898, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.7234726688102895, |
|
"grad_norm": 0.13159097731113434, |
|
"learning_rate": 0.00013928571428571427, |
|
"loss": 0.9943, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.7491961414790995, |
|
"grad_norm": 0.13246209919452667, |
|
"learning_rate": 0.00013646616541353383, |
|
"loss": 1.0048, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.77491961414791, |
|
"grad_norm": 0.11693169921636581, |
|
"learning_rate": 0.00013364661654135336, |
|
"loss": 0.9996, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.8006430868167203, |
|
"grad_norm": 0.13176512718200684, |
|
"learning_rate": 0.00013082706766917292, |
|
"loss": 0.9923, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.8263665594855305, |
|
"grad_norm": 0.1224486380815506, |
|
"learning_rate": 0.00012800751879699248, |
|
"loss": 0.9799, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.852090032154341, |
|
"grad_norm": 0.12146233022212982, |
|
"learning_rate": 0.000125187969924812, |
|
"loss": 0.9874, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.877813504823151, |
|
"grad_norm": 0.12821047008037567, |
|
"learning_rate": 0.00012236842105263157, |
|
"loss": 0.9982, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.9035369774919615, |
|
"grad_norm": 0.12273906171321869, |
|
"learning_rate": 0.00011954887218045111, |
|
"loss": 0.9946, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.9292604501607717, |
|
"grad_norm": 0.1282244175672531, |
|
"learning_rate": 0.00011672932330827067, |
|
"loss": 0.9976, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.954983922829582, |
|
"grad_norm": 0.13315148651599884, |
|
"learning_rate": 0.00011390977443609022, |
|
"loss": 1.0012, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.9807073954983923, |
|
"grad_norm": 0.12510079145431519, |
|
"learning_rate": 0.00011109022556390976, |
|
"loss": 0.9865, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.0064308681672025, |
|
"grad_norm": 0.11581259220838547, |
|
"learning_rate": 0.00010827067669172931, |
|
"loss": 1.0306, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.032154340836013, |
|
"grad_norm": 0.1278076171875, |
|
"learning_rate": 0.00010545112781954887, |
|
"loss": 0.9984, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.057877813504823, |
|
"grad_norm": 0.12879391014575958, |
|
"learning_rate": 0.00010263157894736841, |
|
"loss": 0.9729, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.057877813504823, |
|
"eval_loss": 1.0042115449905396, |
|
"eval_runtime": 55.4463, |
|
"eval_samples_per_second": 36.071, |
|
"eval_steps_per_second": 4.509, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.0836012861736335, |
|
"grad_norm": 0.1267639547586441, |
|
"learning_rate": 9.981203007518796e-05, |
|
"loss": 0.9899, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.1093247588424435, |
|
"grad_norm": 0.13013407588005066, |
|
"learning_rate": 9.69924812030075e-05, |
|
"loss": 0.9913, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.135048231511254, |
|
"grad_norm": 0.12404422461986542, |
|
"learning_rate": 9.417293233082706e-05, |
|
"loss": 0.9923, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.1607717041800645, |
|
"grad_norm": 0.12091611325740814, |
|
"learning_rate": 9.135338345864661e-05, |
|
"loss": 0.9905, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.1864951768488745, |
|
"grad_norm": 0.13067036867141724, |
|
"learning_rate": 8.853383458646615e-05, |
|
"loss": 0.9944, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.212218649517685, |
|
"grad_norm": 0.13169625401496887, |
|
"learning_rate": 8.57142857142857e-05, |
|
"loss": 0.9848, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.237942122186495, |
|
"grad_norm": 0.13107703626155853, |
|
"learning_rate": 8.289473684210526e-05, |
|
"loss": 0.9712, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.2636655948553055, |
|
"grad_norm": 0.1307050734758377, |
|
"learning_rate": 8.00751879699248e-05, |
|
"loss": 0.9855, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.289389067524116, |
|
"grad_norm": 0.13297782838344574, |
|
"learning_rate": 7.725563909774435e-05, |
|
"loss": 0.9918, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.315112540192926, |
|
"grad_norm": 0.1355738788843155, |
|
"learning_rate": 7.44360902255639e-05, |
|
"loss": 0.9845, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.3408360128617365, |
|
"grad_norm": 0.12372130155563354, |
|
"learning_rate": 7.161654135338345e-05, |
|
"loss": 0.9887, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.3665594855305465, |
|
"grad_norm": 0.1307709813117981, |
|
"learning_rate": 6.8796992481203e-05, |
|
"loss": 0.9886, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.392282958199357, |
|
"grad_norm": 0.12701715528964996, |
|
"learning_rate": 6.597744360902256e-05, |
|
"loss": 0.9885, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.418006430868167, |
|
"grad_norm": 0.13114945590496063, |
|
"learning_rate": 6.315789473684209e-05, |
|
"loss": 0.9788, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.4437299035369775, |
|
"grad_norm": 0.12744566798210144, |
|
"learning_rate": 6.033834586466165e-05, |
|
"loss": 0.986, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.469453376205788, |
|
"grad_norm": 0.13428843021392822, |
|
"learning_rate": 5.7518796992481194e-05, |
|
"loss": 0.974, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.495176848874598, |
|
"grad_norm": 0.13112470507621765, |
|
"learning_rate": 5.4699248120300746e-05, |
|
"loss": 0.9818, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.5209003215434085, |
|
"grad_norm": 0.1363256722688675, |
|
"learning_rate": 5.187969924812029e-05, |
|
"loss": 0.9909, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.5466237942122185, |
|
"grad_norm": 0.13036535680294037, |
|
"learning_rate": 4.9060150375939844e-05, |
|
"loss": 0.9921, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.572347266881029, |
|
"grad_norm": 0.13345099985599518, |
|
"learning_rate": 4.624060150375939e-05, |
|
"loss": 0.9722, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.572347266881029, |
|
"eval_loss": 1.000998854637146, |
|
"eval_runtime": 55.4742, |
|
"eval_samples_per_second": 36.053, |
|
"eval_steps_per_second": 4.507, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.598070739549839, |
|
"grad_norm": 0.12475313246250153, |
|
"learning_rate": 4.342105263157895e-05, |
|
"loss": 0.973, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.6237942122186495, |
|
"grad_norm": 0.13274677097797394, |
|
"learning_rate": 4.060150375939849e-05, |
|
"loss": 0.9904, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.64951768488746, |
|
"grad_norm": 0.1265428215265274, |
|
"learning_rate": 3.7781954887218046e-05, |
|
"loss": 0.9817, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.67524115755627, |
|
"grad_norm": 0.13538296520709991, |
|
"learning_rate": 3.496240601503759e-05, |
|
"loss": 0.9721, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.7009646302250805, |
|
"grad_norm": 0.1293276697397232, |
|
"learning_rate": 3.214285714285714e-05, |
|
"loss": 0.9799, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.7266881028938905, |
|
"grad_norm": 0.12871681153774261, |
|
"learning_rate": 2.9323308270676686e-05, |
|
"loss": 0.9944, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.752411575562701, |
|
"grad_norm": 0.13096405565738678, |
|
"learning_rate": 2.6503759398496238e-05, |
|
"loss": 0.9788, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.778135048231511, |
|
"grad_norm": 0.1427333801984787, |
|
"learning_rate": 2.3684210526315787e-05, |
|
"loss": 0.9701, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.8038585209003215, |
|
"grad_norm": 0.1345263570547104, |
|
"learning_rate": 2.0864661654135336e-05, |
|
"loss": 0.9786, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.829581993569132, |
|
"grad_norm": 0.12688259780406952, |
|
"learning_rate": 1.8045112781954885e-05, |
|
"loss": 0.9705, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.855305466237942, |
|
"grad_norm": 0.12778617441654205, |
|
"learning_rate": 1.5225563909774434e-05, |
|
"loss": 0.9824, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.8810289389067525, |
|
"grad_norm": 0.1293039619922638, |
|
"learning_rate": 1.2406015037593982e-05, |
|
"loss": 0.9892, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.906752411575563, |
|
"grad_norm": 0.1282673329114914, |
|
"learning_rate": 9.586466165413533e-06, |
|
"loss": 0.9799, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.932475884244373, |
|
"grad_norm": 0.12761889398097992, |
|
"learning_rate": 6.766917293233082e-06, |
|
"loss": 0.9863, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.958199356913183, |
|
"grad_norm": 0.13192933797836304, |
|
"learning_rate": 3.947368421052631e-06, |
|
"loss": 0.9882, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.9839228295819935, |
|
"grad_norm": 0.12644025683403015, |
|
"learning_rate": 1.1278195488721803e-06, |
|
"loss": 0.9667, |
|
"step": 1160 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1164, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.2857331693584384e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|