{ "best_metric": 1.000998854637146, "best_model_checkpoint": "./lora-alpaca3/checkpoint-1000", "epoch": 2.572347266881029, "eval_steps": 200, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02572347266881029, "grad_norm": 0.5646638870239258, "learning_rate": 2.9999999999999997e-05, "loss": 1.9009, "step": 10 }, { "epoch": 0.05144694533762058, "grad_norm": 0.5377612709999084, "learning_rate": 5.9999999999999995e-05, "loss": 1.8355, "step": 20 }, { "epoch": 0.07717041800643087, "grad_norm": 0.5958595871925354, "learning_rate": 8.999999999999999e-05, "loss": 1.6639, "step": 30 }, { "epoch": 0.10289389067524116, "grad_norm": 0.5721263885498047, "learning_rate": 0.00011999999999999999, "loss": 1.3994, "step": 40 }, { "epoch": 0.12861736334405144, "grad_norm": 0.2665337026119232, "learning_rate": 0.00015, "loss": 1.1767, "step": 50 }, { "epoch": 0.15434083601286175, "grad_norm": 0.12332847714424133, "learning_rate": 0.00017999999999999998, "loss": 1.1413, "step": 60 }, { "epoch": 0.18006430868167203, "grad_norm": 0.12157242000102997, "learning_rate": 0.00020999999999999998, "loss": 1.1103, "step": 70 }, { "epoch": 0.2057877813504823, "grad_norm": 0.12799611687660217, "learning_rate": 0.00023999999999999998, "loss": 1.1003, "step": 80 }, { "epoch": 0.2315112540192926, "grad_norm": 0.12139850109815598, "learning_rate": 0.00027, "loss": 1.084, "step": 90 }, { "epoch": 0.2572347266881029, "grad_norm": 0.144248366355896, "learning_rate": 0.0003, "loss": 1.0722, "step": 100 }, { "epoch": 0.2829581993569132, "grad_norm": 0.15326175093650818, "learning_rate": 0.00029718045112781953, "loss": 1.0731, "step": 110 }, { "epoch": 0.3086816720257235, "grad_norm": 0.12561754882335663, "learning_rate": 0.00029436090225563904, "loss": 1.0537, "step": 120 }, { "epoch": 0.33440514469453375, "grad_norm": 0.15570670366287231, "learning_rate": 0.0002915413533834586, "loss": 1.049, "step": 130 }, { "epoch": 0.36012861736334406, "grad_norm": 0.14343878626823425, "learning_rate": 0.00028872180451127816, "loss": 1.0273, "step": 140 }, { "epoch": 0.3858520900321543, "grad_norm": 0.1315772831439972, "learning_rate": 0.0002859022556390977, "loss": 1.0529, "step": 150 }, { "epoch": 0.4115755627009646, "grad_norm": 0.13874055445194244, "learning_rate": 0.0002830827067669173, "loss": 1.044, "step": 160 }, { "epoch": 0.43729903536977494, "grad_norm": 0.1271531730890274, "learning_rate": 0.00028026315789473683, "loss": 1.0388, "step": 170 }, { "epoch": 0.4630225080385852, "grad_norm": 0.12684497237205505, "learning_rate": 0.0002774436090225564, "loss": 1.0385, "step": 180 }, { "epoch": 0.4887459807073955, "grad_norm": 0.11675913631916046, "learning_rate": 0.0002746240601503759, "loss": 1.0349, "step": 190 }, { "epoch": 0.5144694533762058, "grad_norm": 0.15151233971118927, "learning_rate": 0.00027180451127819546, "loss": 1.0282, "step": 200 }, { "epoch": 0.5144694533762058, "eval_loss": 1.0400722026824951, "eval_runtime": 55.5366, "eval_samples_per_second": 36.012, "eval_steps_per_second": 4.502, "step": 200 }, { "epoch": 0.5401929260450161, "grad_norm": 0.1420961171388626, "learning_rate": 0.000268984962406015, "loss": 1.0297, "step": 210 }, { "epoch": 0.5659163987138264, "grad_norm": 0.13161741197109222, "learning_rate": 0.0002661654135338346, "loss": 1.0276, "step": 220 }, { "epoch": 0.5916398713826366, "grad_norm": 0.13450871407985687, "learning_rate": 0.00026334586466165413, "loss": 1.0285, "step": 230 }, { "epoch": 0.617363344051447, "grad_norm": 0.13193918764591217, "learning_rate": 0.0002605263157894737, "loss": 1.0309, "step": 240 }, { "epoch": 0.6430868167202572, "grad_norm": 0.1393500566482544, "learning_rate": 0.0002577067669172932, "loss": 1.0315, "step": 250 }, { "epoch": 0.6688102893890675, "grad_norm": 0.11902929842472076, "learning_rate": 0.00025488721804511276, "loss": 1.0347, "step": 260 }, { "epoch": 0.6945337620578779, "grad_norm": 0.1258976310491562, "learning_rate": 0.0002520676691729323, "loss": 1.0273, "step": 270 }, { "epoch": 0.7202572347266881, "grad_norm": 0.12598267197608948, "learning_rate": 0.0002492481203007519, "loss": 1.0128, "step": 280 }, { "epoch": 0.7459807073954984, "grad_norm": 0.122463159263134, "learning_rate": 0.0002464285714285714, "loss": 1.0251, "step": 290 }, { "epoch": 0.7717041800643086, "grad_norm": 0.1293506771326065, "learning_rate": 0.00024360902255639094, "loss": 1.0317, "step": 300 }, { "epoch": 0.797427652733119, "grad_norm": 0.13788191974163055, "learning_rate": 0.00024078947368421052, "loss": 1.029, "step": 310 }, { "epoch": 0.8231511254019293, "grad_norm": 0.1445332169532776, "learning_rate": 0.00023796992481203005, "loss": 1.0173, "step": 320 }, { "epoch": 0.8488745980707395, "grad_norm": 0.11408714205026627, "learning_rate": 0.00023515037593984961, "loss": 1.0109, "step": 330 }, { "epoch": 0.8745980707395499, "grad_norm": 0.11767855286598206, "learning_rate": 0.00023233082706766915, "loss": 1.0116, "step": 340 }, { "epoch": 0.9003215434083601, "grad_norm": 0.12555481493473053, "learning_rate": 0.0002295112781954887, "loss": 1.0144, "step": 350 }, { "epoch": 0.9260450160771704, "grad_norm": 0.12403016537427902, "learning_rate": 0.00022669172932330824, "loss": 1.0299, "step": 360 }, { "epoch": 0.9517684887459807, "grad_norm": 0.12968535721302032, "learning_rate": 0.0002238721804511278, "loss": 1.0081, "step": 370 }, { "epoch": 0.977491961414791, "grad_norm": 0.12369370460510254, "learning_rate": 0.00022105263157894733, "loss": 1.0065, "step": 380 }, { "epoch": 1.0032154340836013, "grad_norm": 0.12212081998586655, "learning_rate": 0.0002182330827067669, "loss": 1.0448, "step": 390 }, { "epoch": 1.0289389067524115, "grad_norm": 0.1301109939813614, "learning_rate": 0.00021541353383458647, "loss": 1.0052, "step": 400 }, { "epoch": 1.0289389067524115, "eval_loss": 1.0189071893692017, "eval_runtime": 55.4593, "eval_samples_per_second": 36.062, "eval_steps_per_second": 4.508, "step": 400 }, { "epoch": 1.0546623794212218, "grad_norm": 0.13505908846855164, "learning_rate": 0.000212593984962406, "loss": 1.0009, "step": 410 }, { "epoch": 1.0803858520900322, "grad_norm": 0.12307268381118774, "learning_rate": 0.00020977443609022556, "loss": 1.0039, "step": 420 }, { "epoch": 1.1061093247588425, "grad_norm": 0.11501840502023697, "learning_rate": 0.0002069548872180451, "loss": 0.9988, "step": 430 }, { "epoch": 1.1318327974276527, "grad_norm": 0.12922251224517822, "learning_rate": 0.00020413533834586463, "loss": 0.9938, "step": 440 }, { "epoch": 1.157556270096463, "grad_norm": 0.12304320186376572, "learning_rate": 0.0002013157894736842, "loss": 1.0223, "step": 450 }, { "epoch": 1.1832797427652733, "grad_norm": 0.11323860287666321, "learning_rate": 0.00019849624060150372, "loss": 0.9957, "step": 460 }, { "epoch": 1.2090032154340835, "grad_norm": 0.1335834562778473, "learning_rate": 0.00019567669172932328, "loss": 1.0042, "step": 470 }, { "epoch": 1.234726688102894, "grad_norm": 0.12371603399515152, "learning_rate": 0.00019285714285714286, "loss": 1.0056, "step": 480 }, { "epoch": 1.2604501607717042, "grad_norm": 0.12318002432584763, "learning_rate": 0.0001900375939849624, "loss": 1.0122, "step": 490 }, { "epoch": 1.2861736334405145, "grad_norm": 0.13187570869922638, "learning_rate": 0.00018721804511278195, "loss": 0.9971, "step": 500 }, { "epoch": 1.3118971061093248, "grad_norm": 0.11861589550971985, "learning_rate": 0.00018439849624060149, "loss": 0.9926, "step": 510 }, { "epoch": 1.337620578778135, "grad_norm": 0.11287492513656616, "learning_rate": 0.00018157894736842105, "loss": 1.016, "step": 520 }, { "epoch": 1.3633440514469453, "grad_norm": 0.1384376585483551, "learning_rate": 0.00017875939849624058, "loss": 1.0059, "step": 530 }, { "epoch": 1.3890675241157555, "grad_norm": 0.12147587537765503, "learning_rate": 0.00017593984962406014, "loss": 1.0213, "step": 540 }, { "epoch": 1.414790996784566, "grad_norm": 0.13031207025051117, "learning_rate": 0.00017312030075187967, "loss": 1.005, "step": 550 }, { "epoch": 1.4405144694533762, "grad_norm": 0.13779744505882263, "learning_rate": 0.00017030075187969925, "loss": 1.0077, "step": 560 }, { "epoch": 1.4662379421221865, "grad_norm": 0.13196057081222534, "learning_rate": 0.00016748120300751879, "loss": 0.9944, "step": 570 }, { "epoch": 1.4919614147909968, "grad_norm": 0.117030069231987, "learning_rate": 0.00016466165413533835, "loss": 1.0082, "step": 580 }, { "epoch": 1.517684887459807, "grad_norm": 0.1274162381887436, "learning_rate": 0.00016184210526315788, "loss": 1.0102, "step": 590 }, { "epoch": 1.5434083601286175, "grad_norm": 0.13315673172473907, "learning_rate": 0.00015902255639097744, "loss": 0.985, "step": 600 }, { "epoch": 1.5434083601286175, "eval_loss": 1.0096462965011597, "eval_runtime": 55.507, "eval_samples_per_second": 36.031, "eval_steps_per_second": 4.504, "step": 600 }, { "epoch": 1.5691318327974275, "grad_norm": 0.12220187485218048, "learning_rate": 0.00015620300751879697, "loss": 0.9912, "step": 610 }, { "epoch": 1.594855305466238, "grad_norm": 0.12432006746530533, "learning_rate": 0.00015338345864661653, "loss": 0.9963, "step": 620 }, { "epoch": 1.6205787781350482, "grad_norm": 0.12138667702674866, "learning_rate": 0.00015056390977443606, "loss": 1.0014, "step": 630 }, { "epoch": 1.6463022508038585, "grad_norm": 0.13801203668117523, "learning_rate": 0.00014774436090225562, "loss": 0.9922, "step": 640 }, { "epoch": 1.6720257234726688, "grad_norm": 0.13330678641796112, "learning_rate": 0.00014492481203007518, "loss": 0.9897, "step": 650 }, { "epoch": 1.697749196141479, "grad_norm": 0.12127404659986496, "learning_rate": 0.0001421052631578947, "loss": 0.9898, "step": 660 }, { "epoch": 1.7234726688102895, "grad_norm": 0.13159097731113434, "learning_rate": 0.00013928571428571427, "loss": 0.9943, "step": 670 }, { "epoch": 1.7491961414790995, "grad_norm": 0.13246209919452667, "learning_rate": 0.00013646616541353383, "loss": 1.0048, "step": 680 }, { "epoch": 1.77491961414791, "grad_norm": 0.11693169921636581, "learning_rate": 0.00013364661654135336, "loss": 0.9996, "step": 690 }, { "epoch": 1.8006430868167203, "grad_norm": 0.13176512718200684, "learning_rate": 0.00013082706766917292, "loss": 0.9923, "step": 700 }, { "epoch": 1.8263665594855305, "grad_norm": 0.1224486380815506, "learning_rate": 0.00012800751879699248, "loss": 0.9799, "step": 710 }, { "epoch": 1.852090032154341, "grad_norm": 0.12146233022212982, "learning_rate": 0.000125187969924812, "loss": 0.9874, "step": 720 }, { "epoch": 1.877813504823151, "grad_norm": 0.12821047008037567, "learning_rate": 0.00012236842105263157, "loss": 0.9982, "step": 730 }, { "epoch": 1.9035369774919615, "grad_norm": 0.12273906171321869, "learning_rate": 0.00011954887218045111, "loss": 0.9946, "step": 740 }, { "epoch": 1.9292604501607717, "grad_norm": 0.1282244175672531, "learning_rate": 0.00011672932330827067, "loss": 0.9976, "step": 750 }, { "epoch": 1.954983922829582, "grad_norm": 0.13315148651599884, "learning_rate": 0.00011390977443609022, "loss": 1.0012, "step": 760 }, { "epoch": 1.9807073954983923, "grad_norm": 0.12510079145431519, "learning_rate": 0.00011109022556390976, "loss": 0.9865, "step": 770 }, { "epoch": 2.0064308681672025, "grad_norm": 0.11581259220838547, "learning_rate": 0.00010827067669172931, "loss": 1.0306, "step": 780 }, { "epoch": 2.032154340836013, "grad_norm": 0.1278076171875, "learning_rate": 0.00010545112781954887, "loss": 0.9984, "step": 790 }, { "epoch": 2.057877813504823, "grad_norm": 0.12879391014575958, "learning_rate": 0.00010263157894736841, "loss": 0.9729, "step": 800 }, { "epoch": 2.057877813504823, "eval_loss": 1.0042115449905396, "eval_runtime": 55.4463, "eval_samples_per_second": 36.071, "eval_steps_per_second": 4.509, "step": 800 }, { "epoch": 2.0836012861736335, "grad_norm": 0.1267639547586441, "learning_rate": 9.981203007518796e-05, "loss": 0.9899, "step": 810 }, { "epoch": 2.1093247588424435, "grad_norm": 0.13013407588005066, "learning_rate": 9.69924812030075e-05, "loss": 0.9913, "step": 820 }, { "epoch": 2.135048231511254, "grad_norm": 0.12404422461986542, "learning_rate": 9.417293233082706e-05, "loss": 0.9923, "step": 830 }, { "epoch": 2.1607717041800645, "grad_norm": 0.12091611325740814, "learning_rate": 9.135338345864661e-05, "loss": 0.9905, "step": 840 }, { "epoch": 2.1864951768488745, "grad_norm": 0.13067036867141724, "learning_rate": 8.853383458646615e-05, "loss": 0.9944, "step": 850 }, { "epoch": 2.212218649517685, "grad_norm": 0.13169625401496887, "learning_rate": 8.57142857142857e-05, "loss": 0.9848, "step": 860 }, { "epoch": 2.237942122186495, "grad_norm": 0.13107703626155853, "learning_rate": 8.289473684210526e-05, "loss": 0.9712, "step": 870 }, { "epoch": 2.2636655948553055, "grad_norm": 0.1307050734758377, "learning_rate": 8.00751879699248e-05, "loss": 0.9855, "step": 880 }, { "epoch": 2.289389067524116, "grad_norm": 0.13297782838344574, "learning_rate": 7.725563909774435e-05, "loss": 0.9918, "step": 890 }, { "epoch": 2.315112540192926, "grad_norm": 0.1355738788843155, "learning_rate": 7.44360902255639e-05, "loss": 0.9845, "step": 900 }, { "epoch": 2.3408360128617365, "grad_norm": 0.12372130155563354, "learning_rate": 7.161654135338345e-05, "loss": 0.9887, "step": 910 }, { "epoch": 2.3665594855305465, "grad_norm": 0.1307709813117981, "learning_rate": 6.8796992481203e-05, "loss": 0.9886, "step": 920 }, { "epoch": 2.392282958199357, "grad_norm": 0.12701715528964996, "learning_rate": 6.597744360902256e-05, "loss": 0.9885, "step": 930 }, { "epoch": 2.418006430868167, "grad_norm": 0.13114945590496063, "learning_rate": 6.315789473684209e-05, "loss": 0.9788, "step": 940 }, { "epoch": 2.4437299035369775, "grad_norm": 0.12744566798210144, "learning_rate": 6.033834586466165e-05, "loss": 0.986, "step": 950 }, { "epoch": 2.469453376205788, "grad_norm": 0.13428843021392822, "learning_rate": 5.7518796992481194e-05, "loss": 0.974, "step": 960 }, { "epoch": 2.495176848874598, "grad_norm": 0.13112470507621765, "learning_rate": 5.4699248120300746e-05, "loss": 0.9818, "step": 970 }, { "epoch": 2.5209003215434085, "grad_norm": 0.1363256722688675, "learning_rate": 5.187969924812029e-05, "loss": 0.9909, "step": 980 }, { "epoch": 2.5466237942122185, "grad_norm": 0.13036535680294037, "learning_rate": 4.9060150375939844e-05, "loss": 0.9921, "step": 990 }, { "epoch": 2.572347266881029, "grad_norm": 0.13345099985599518, "learning_rate": 4.624060150375939e-05, "loss": 0.9722, "step": 1000 }, { "epoch": 2.572347266881029, "eval_loss": 1.000998854637146, "eval_runtime": 55.4742, "eval_samples_per_second": 36.053, "eval_steps_per_second": 4.507, "step": 1000 } ], "logging_steps": 10, "max_steps": 1164, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.9627225935210086e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }