|
{ |
|
"best_metric": 1.9997398853302002, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.03611412062116288, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00018057060310581438, |
|
"grad_norm": 0.6624407768249512, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5655, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00018057060310581438, |
|
"eval_loss": 2.7124359607696533, |
|
"eval_runtime": 685.9613, |
|
"eval_samples_per_second": 13.597, |
|
"eval_steps_per_second": 3.4, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00036114120621162876, |
|
"grad_norm": 0.7581428289413452, |
|
"learning_rate": 2e-05, |
|
"loss": 2.5189, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0005417118093174431, |
|
"grad_norm": 0.724696695804596, |
|
"learning_rate": 3e-05, |
|
"loss": 2.485, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0007222824124232575, |
|
"grad_norm": 0.8178666234016418, |
|
"learning_rate": 4e-05, |
|
"loss": 2.4495, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0009028530155290719, |
|
"grad_norm": 0.7225940823554993, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5998, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0010834236186348862, |
|
"grad_norm": 0.6008687615394592, |
|
"learning_rate": 6e-05, |
|
"loss": 2.5265, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0012639942217407005, |
|
"grad_norm": 0.6511266827583313, |
|
"learning_rate": 7e-05, |
|
"loss": 2.4456, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.001444564824846515, |
|
"grad_norm": 0.5716150999069214, |
|
"learning_rate": 8e-05, |
|
"loss": 2.2869, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0016251354279523294, |
|
"grad_norm": 0.6196761131286621, |
|
"learning_rate": 9e-05, |
|
"loss": 2.4605, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0018057060310581437, |
|
"grad_norm": 0.8213319182395935, |
|
"learning_rate": 0.0001, |
|
"loss": 2.3148, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0019862766341639583, |
|
"grad_norm": 0.6406309604644775, |
|
"learning_rate": 9.999316524962345e-05, |
|
"loss": 2.2636, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0021668472372697724, |
|
"grad_norm": 0.5477151870727539, |
|
"learning_rate": 9.997266286704631e-05, |
|
"loss": 2.1784, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.002347417840375587, |
|
"grad_norm": 0.5454879403114319, |
|
"learning_rate": 9.993849845741524e-05, |
|
"loss": 2.2564, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.002527988443481401, |
|
"grad_norm": 0.44204509258270264, |
|
"learning_rate": 9.989068136093873e-05, |
|
"loss": 2.1552, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0027085590465872156, |
|
"grad_norm": 0.43791064620018005, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 2.105, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00288912964969303, |
|
"grad_norm": 0.43272435665130615, |
|
"learning_rate": 9.975414512725057e-05, |
|
"loss": 2.1747, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0030697002527988442, |
|
"grad_norm": 0.41262441873550415, |
|
"learning_rate": 9.966546331768191e-05, |
|
"loss": 2.2292, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0032502708559046588, |
|
"grad_norm": 0.4293859302997589, |
|
"learning_rate": 9.956320346634876e-05, |
|
"loss": 2.1232, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0034308414590104733, |
|
"grad_norm": 0.460185706615448, |
|
"learning_rate": 9.944739353007344e-05, |
|
"loss": 2.0791, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0036114120621162874, |
|
"grad_norm": 0.4025264382362366, |
|
"learning_rate": 9.931806517013612e-05, |
|
"loss": 2.0914, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.003791982665222102, |
|
"grad_norm": 0.43764618039131165, |
|
"learning_rate": 9.917525374361912e-05, |
|
"loss": 2.168, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0039725532683279165, |
|
"grad_norm": 0.474251389503479, |
|
"learning_rate": 9.901899829374047e-05, |
|
"loss": 2.1354, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.004153123871433731, |
|
"grad_norm": 0.4564894437789917, |
|
"learning_rate": 9.884934153917997e-05, |
|
"loss": 2.1433, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.004333694474539545, |
|
"grad_norm": 0.4421786665916443, |
|
"learning_rate": 9.86663298624003e-05, |
|
"loss": 2.2349, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.00451426507764536, |
|
"grad_norm": 0.4256881773471832, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 2.1171, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.004694835680751174, |
|
"grad_norm": 0.41441676020622253, |
|
"learning_rate": 9.826044551386744e-05, |
|
"loss": 2.0831, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.004875406283856988, |
|
"grad_norm": 0.4659394919872284, |
|
"learning_rate": 9.803768380684242e-05, |
|
"loss": 2.0631, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.005055976886962802, |
|
"grad_norm": 0.46133676171302795, |
|
"learning_rate": 9.780178907671789e-05, |
|
"loss": 2.1254, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.005236547490068617, |
|
"grad_norm": 0.4646921753883362, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 2.1308, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.005417118093174431, |
|
"grad_norm": 0.4502306878566742, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 2.0584, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.005597688696280245, |
|
"grad_norm": 0.5119293928146362, |
|
"learning_rate": 9.701596950580806e-05, |
|
"loss": 2.0493, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.00577825929938606, |
|
"grad_norm": 0.5046262741088867, |
|
"learning_rate": 9.672822322997305e-05, |
|
"loss": 2.1177, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.005958829902491874, |
|
"grad_norm": 0.4912949204444885, |
|
"learning_rate": 9.642770192448536e-05, |
|
"loss": 2.1225, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0061394005055976884, |
|
"grad_norm": 0.5434021353721619, |
|
"learning_rate": 9.611448774886924e-05, |
|
"loss": 2.1468, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.006319971108703503, |
|
"grad_norm": 0.480820894241333, |
|
"learning_rate": 9.578866633275288e-05, |
|
"loss": 1.999, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0065005417118093175, |
|
"grad_norm": 0.49585434794425964, |
|
"learning_rate": 9.545032675245813e-05, |
|
"loss": 1.978, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.006681112314915132, |
|
"grad_norm": 0.5492971539497375, |
|
"learning_rate": 9.509956150664796e-05, |
|
"loss": 2.1228, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.006861682918020947, |
|
"grad_norm": 0.511803925037384, |
|
"learning_rate": 9.473646649103818e-05, |
|
"loss": 2.1231, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.007042253521126761, |
|
"grad_norm": 0.5920349359512329, |
|
"learning_rate": 9.43611409721806e-05, |
|
"loss": 2.101, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.007222824124232575, |
|
"grad_norm": 0.5832512974739075, |
|
"learning_rate": 9.397368756032445e-05, |
|
"loss": 2.0776, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.007403394727338389, |
|
"grad_norm": 0.6450849175453186, |
|
"learning_rate": 9.357421218136386e-05, |
|
"loss": 2.1303, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.007583965330444204, |
|
"grad_norm": 0.6384161114692688, |
|
"learning_rate": 9.316282404787871e-05, |
|
"loss": 2.1216, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.007764535933550018, |
|
"grad_norm": 0.5975738763809204, |
|
"learning_rate": 9.273963562927695e-05, |
|
"loss": 2.0778, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.007945106536655833, |
|
"grad_norm": 0.6410922408103943, |
|
"learning_rate": 9.230476262104677e-05, |
|
"loss": 2.2383, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.008125677139761646, |
|
"grad_norm": 0.6352540850639343, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 2.161, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.008306247742867461, |
|
"grad_norm": 0.644791841506958, |
|
"learning_rate": 9.140044155740101e-05, |
|
"loss": 2.2545, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.008486818345973276, |
|
"grad_norm": 0.7133358120918274, |
|
"learning_rate": 9.093124073433463e-05, |
|
"loss": 2.2299, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.00866738894907909, |
|
"grad_norm": 0.7115877270698547, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 2.4298, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.008847959552184904, |
|
"grad_norm": 0.841839075088501, |
|
"learning_rate": 8.995939984474624e-05, |
|
"loss": 2.3537, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.00902853015529072, |
|
"grad_norm": 0.7993963956832886, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 2.5511, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.00902853015529072, |
|
"eval_loss": 2.129507541656494, |
|
"eval_runtime": 689.754, |
|
"eval_samples_per_second": 13.522, |
|
"eval_steps_per_second": 3.381, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.009209100758396533, |
|
"grad_norm": 0.42209938168525696, |
|
"learning_rate": 8.894386393810563e-05, |
|
"loss": 2.0888, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.009389671361502348, |
|
"grad_norm": 0.46093735098838806, |
|
"learning_rate": 8.842005554284296e-05, |
|
"loss": 2.054, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.009570241964608163, |
|
"grad_norm": 0.43566620349884033, |
|
"learning_rate": 8.788574348801675e-05, |
|
"loss": 2.0435, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.009750812567713976, |
|
"grad_norm": 0.3839430809020996, |
|
"learning_rate": 8.73410738492077e-05, |
|
"loss": 2.1214, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.00993138317081979, |
|
"grad_norm": 0.38588741421699524, |
|
"learning_rate": 8.678619553365659e-05, |
|
"loss": 1.9208, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.010111953773925604, |
|
"grad_norm": 0.307731956243515, |
|
"learning_rate": 8.622126023955446e-05, |
|
"loss": 1.9611, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.010292524377031419, |
|
"grad_norm": 0.31476905941963196, |
|
"learning_rate": 8.564642241456986e-05, |
|
"loss": 2.1076, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.010473094980137234, |
|
"grad_norm": 0.36871978640556335, |
|
"learning_rate": 8.506183921362443e-05, |
|
"loss": 2.0672, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.010653665583243047, |
|
"grad_norm": 0.40399834513664246, |
|
"learning_rate": 8.44676704559283e-05, |
|
"loss": 2.0905, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.010834236186348862, |
|
"grad_norm": 0.6111615896224976, |
|
"learning_rate": 8.386407858128706e-05, |
|
"loss": 1.896, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.011014806789454677, |
|
"grad_norm": 0.37530985474586487, |
|
"learning_rate": 8.32512286056924e-05, |
|
"loss": 2.1393, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.01119537739256049, |
|
"grad_norm": 0.41041845083236694, |
|
"learning_rate": 8.262928807620843e-05, |
|
"loss": 2.1757, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.011375947995666305, |
|
"grad_norm": 0.3588217496871948, |
|
"learning_rate": 8.199842702516583e-05, |
|
"loss": 1.9678, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.01155651859877212, |
|
"grad_norm": 0.3710726499557495, |
|
"learning_rate": 8.135881792367686e-05, |
|
"loss": 2.0829, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.011737089201877934, |
|
"grad_norm": 0.3658828139305115, |
|
"learning_rate": 8.07106356344834e-05, |
|
"loss": 1.9924, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.011917659804983749, |
|
"grad_norm": 0.3595665395259857, |
|
"learning_rate": 8.005405736415126e-05, |
|
"loss": 2.0748, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.012098230408089564, |
|
"grad_norm": 0.3625120222568512, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 2.0614, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.012278801011195377, |
|
"grad_norm": 0.3999013900756836, |
|
"learning_rate": 7.871643313414718e-05, |
|
"loss": 1.9798, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.012459371614301192, |
|
"grad_norm": 0.4314456582069397, |
|
"learning_rate": 7.803575286758364e-05, |
|
"loss": 2.1344, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.012639942217407007, |
|
"grad_norm": 0.4366953372955322, |
|
"learning_rate": 7.734740790612136e-05, |
|
"loss": 1.9307, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01282051282051282, |
|
"grad_norm": 0.3855569362640381, |
|
"learning_rate": 7.66515864363997e-05, |
|
"loss": 2.0309, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.013001083423618635, |
|
"grad_norm": 0.409615159034729, |
|
"learning_rate": 7.594847868906076e-05, |
|
"loss": 2.0181, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.01318165402672445, |
|
"grad_norm": 0.41800588369369507, |
|
"learning_rate": 7.52382768867422e-05, |
|
"loss": 1.9527, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.013362224629830263, |
|
"grad_norm": 0.4262048602104187, |
|
"learning_rate": 7.452117519152542e-05, |
|
"loss": 2.1873, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.013542795232936078, |
|
"grad_norm": 0.4645998179912567, |
|
"learning_rate": 7.379736965185368e-05, |
|
"loss": 2.2557, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.013723365836041893, |
|
"grad_norm": 0.3942699432373047, |
|
"learning_rate": 7.30670581489344e-05, |
|
"loss": 1.9468, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.013903936439147706, |
|
"grad_norm": 0.4233456254005432, |
|
"learning_rate": 7.233044034264034e-05, |
|
"loss": 2.2922, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.014084507042253521, |
|
"grad_norm": 0.4349347949028015, |
|
"learning_rate": 7.158771761692464e-05, |
|
"loss": 2.0787, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.014265077645359335, |
|
"grad_norm": 0.40108171105384827, |
|
"learning_rate": 7.083909302476453e-05, |
|
"loss": 1.7189, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.01444564824846515, |
|
"grad_norm": 0.42598074674606323, |
|
"learning_rate": 7.008477123264848e-05, |
|
"loss": 1.9847, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.014626218851570965, |
|
"grad_norm": 0.4288007915019989, |
|
"learning_rate": 6.932495846462261e-05, |
|
"loss": 1.9231, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.014806789454676778, |
|
"grad_norm": 0.4508149325847626, |
|
"learning_rate": 6.855986244591104e-05, |
|
"loss": 1.9797, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.014987360057782593, |
|
"grad_norm": 0.442592978477478, |
|
"learning_rate": 6.778969234612584e-05, |
|
"loss": 2.0614, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.015167930660888408, |
|
"grad_norm": 0.4825979173183441, |
|
"learning_rate": 6.701465872208216e-05, |
|
"loss": 1.9582, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.015348501263994221, |
|
"grad_norm": 0.5308371782302856, |
|
"learning_rate": 6.623497346023418e-05, |
|
"loss": 2.0677, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.015529071867100036, |
|
"grad_norm": 0.46342289447784424, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 1.9101, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.01570964247020585, |
|
"grad_norm": 0.48983103036880493, |
|
"learning_rate": 6.466250186922325e-05, |
|
"loss": 1.8399, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.015890213073311666, |
|
"grad_norm": 0.5859423279762268, |
|
"learning_rate": 6.387014543809223e-05, |
|
"loss": 2.1832, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.01607078367641748, |
|
"grad_norm": 0.5067974925041199, |
|
"learning_rate": 6.307399704769099e-05, |
|
"loss": 1.9312, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.016251354279523293, |
|
"grad_norm": 0.5896835327148438, |
|
"learning_rate": 6.227427435703997e-05, |
|
"loss": 2.1292, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01643192488262911, |
|
"grad_norm": 0.5307599902153015, |
|
"learning_rate": 6.147119600233758e-05, |
|
"loss": 1.8862, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.016612495485734922, |
|
"grad_norm": 0.5295412540435791, |
|
"learning_rate": 6.066498153718735e-05, |
|
"loss": 2.0057, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.016793066088840736, |
|
"grad_norm": 0.5694815516471863, |
|
"learning_rate": 5.985585137257401e-05, |
|
"loss": 1.9578, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.016973636691946552, |
|
"grad_norm": 0.6204591393470764, |
|
"learning_rate": 5.90440267166055e-05, |
|
"loss": 2.0074, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.017154207295052366, |
|
"grad_norm": 0.6003352999687195, |
|
"learning_rate": 5.8229729514036705e-05, |
|
"loss": 2.072, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.01733477789815818, |
|
"grad_norm": 0.6216194033622742, |
|
"learning_rate": 5.74131823855921e-05, |
|
"loss": 2.052, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.017515348501263996, |
|
"grad_norm": 0.6048287749290466, |
|
"learning_rate": 5.6594608567103456e-05, |
|
"loss": 2.1365, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.01769591910436981, |
|
"grad_norm": 0.666451632976532, |
|
"learning_rate": 5.577423184847932e-05, |
|
"loss": 2.3475, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.017876489707475622, |
|
"grad_norm": 0.7967808842658997, |
|
"learning_rate": 5.495227651252315e-05, |
|
"loss": 2.282, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.01805706031058144, |
|
"grad_norm": 1.0120813846588135, |
|
"learning_rate": 5.4128967273616625e-05, |
|
"loss": 2.5553, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01805706031058144, |
|
"eval_loss": 2.059274911880493, |
|
"eval_runtime": 689.9216, |
|
"eval_samples_per_second": 13.519, |
|
"eval_steps_per_second": 3.38, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.018237630913687252, |
|
"grad_norm": 0.3895765542984009, |
|
"learning_rate": 5.330452921628497e-05, |
|
"loss": 2.2459, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.018418201516793065, |
|
"grad_norm": 0.4267996549606323, |
|
"learning_rate": 5.247918773366112e-05, |
|
"loss": 1.9942, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.018598772119898882, |
|
"grad_norm": 0.39692366123199463, |
|
"learning_rate": 5.165316846586541e-05, |
|
"loss": 2.1023, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.018779342723004695, |
|
"grad_norm": 0.373115599155426, |
|
"learning_rate": 5.0826697238317935e-05, |
|
"loss": 1.9709, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.01895991332611051, |
|
"grad_norm": 0.38031432032585144, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0388, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.019140483929216325, |
|
"grad_norm": 0.4079199433326721, |
|
"learning_rate": 4.917330276168208e-05, |
|
"loss": 2.2443, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.01932105453232214, |
|
"grad_norm": 0.34905269742012024, |
|
"learning_rate": 4.834683153413459e-05, |
|
"loss": 2.0283, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.01950162513542795, |
|
"grad_norm": 0.4155385196208954, |
|
"learning_rate": 4.7520812266338885e-05, |
|
"loss": 2.0262, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.01968219573853377, |
|
"grad_norm": 0.43362778425216675, |
|
"learning_rate": 4.669547078371504e-05, |
|
"loss": 1.9427, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.01986276634163958, |
|
"grad_norm": 0.37069815397262573, |
|
"learning_rate": 4.5871032726383386e-05, |
|
"loss": 1.9019, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.020043336944745395, |
|
"grad_norm": 0.3514178693294525, |
|
"learning_rate": 4.504772348747687e-05, |
|
"loss": 1.8921, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.020223907547851208, |
|
"grad_norm": 0.3448438048362732, |
|
"learning_rate": 4.4225768151520694e-05, |
|
"loss": 1.9658, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.020404478150957025, |
|
"grad_norm": 0.373558908700943, |
|
"learning_rate": 4.3405391432896555e-05, |
|
"loss": 2.0503, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.020585048754062838, |
|
"grad_norm": 0.3926731050014496, |
|
"learning_rate": 4.2586817614407895e-05, |
|
"loss": 1.9073, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.02076561935716865, |
|
"grad_norm": 0.40110158920288086, |
|
"learning_rate": 4.17702704859633e-05, |
|
"loss": 2.0409, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.020946189960274468, |
|
"grad_norm": 0.37772661447525024, |
|
"learning_rate": 4.095597328339452e-05, |
|
"loss": 1.9971, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.02112676056338028, |
|
"grad_norm": 0.40943604707717896, |
|
"learning_rate": 4.0144148627425993e-05, |
|
"loss": 1.878, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.021307331166486095, |
|
"grad_norm": 0.4381054937839508, |
|
"learning_rate": 3.933501846281267e-05, |
|
"loss": 2.0748, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.02148790176959191, |
|
"grad_norm": 0.4295724034309387, |
|
"learning_rate": 3.852880399766243e-05, |
|
"loss": 2.1721, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.021668472372697724, |
|
"grad_norm": 0.4464626908302307, |
|
"learning_rate": 3.772572564296005e-05, |
|
"loss": 2.0626, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.021849042975803538, |
|
"grad_norm": 0.43986180424690247, |
|
"learning_rate": 3.6926002952309016e-05, |
|
"loss": 1.9922, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.022029613578909354, |
|
"grad_norm": 0.4166202247142792, |
|
"learning_rate": 3.612985456190778e-05, |
|
"loss": 1.9705, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.022210184182015168, |
|
"grad_norm": 0.4246368110179901, |
|
"learning_rate": 3.533749813077677e-05, |
|
"loss": 1.9834, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.02239075478512098, |
|
"grad_norm": 0.4012325406074524, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 1.9898, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.022571325388226798, |
|
"grad_norm": 0.44190362095832825, |
|
"learning_rate": 3.3765026539765834e-05, |
|
"loss": 2.0085, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.02275189599133261, |
|
"grad_norm": 0.4317069947719574, |
|
"learning_rate": 3.298534127791785e-05, |
|
"loss": 1.9294, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.022932466594438424, |
|
"grad_norm": 0.4211461544036865, |
|
"learning_rate": 3.221030765387417e-05, |
|
"loss": 1.8911, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.02311303719754424, |
|
"grad_norm": 0.4544403851032257, |
|
"learning_rate": 3.144013755408895e-05, |
|
"loss": 1.9197, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.023293607800650054, |
|
"grad_norm": 0.43215247988700867, |
|
"learning_rate": 3.0675041535377405e-05, |
|
"loss": 1.9928, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.023474178403755867, |
|
"grad_norm": 0.43440431356430054, |
|
"learning_rate": 2.991522876735154e-05, |
|
"loss": 1.9714, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.023654749006861684, |
|
"grad_norm": 0.4638649523258209, |
|
"learning_rate": 2.916090697523549e-05, |
|
"loss": 1.9549, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.023835319609967497, |
|
"grad_norm": 0.4485500156879425, |
|
"learning_rate": 2.8412282383075363e-05, |
|
"loss": 1.7946, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.02401589021307331, |
|
"grad_norm": 0.471426784992218, |
|
"learning_rate": 2.766955965735968e-05, |
|
"loss": 2.0043, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.024196460816179127, |
|
"grad_norm": 0.4864135980606079, |
|
"learning_rate": 2.693294185106562e-05, |
|
"loss": 1.9737, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.02437703141928494, |
|
"grad_norm": 0.49553418159484863, |
|
"learning_rate": 2.6202630348146324e-05, |
|
"loss": 1.9599, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.024557602022390754, |
|
"grad_norm": 0.48198094964027405, |
|
"learning_rate": 2.547882480847461e-05, |
|
"loss": 1.7155, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.02473817262549657, |
|
"grad_norm": 0.5210572481155396, |
|
"learning_rate": 2.476172311325783e-05, |
|
"loss": 2.0872, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.024918743228602384, |
|
"grad_norm": 0.517304539680481, |
|
"learning_rate": 2.405152131093926e-05, |
|
"loss": 1.8473, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.025099313831708197, |
|
"grad_norm": 0.5049439668655396, |
|
"learning_rate": 2.3348413563600325e-05, |
|
"loss": 1.9836, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.025279884434814014, |
|
"grad_norm": 0.5623946189880371, |
|
"learning_rate": 2.2652592093878666e-05, |
|
"loss": 1.9298, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.025460455037919827, |
|
"grad_norm": 0.5387139320373535, |
|
"learning_rate": 2.196424713241637e-05, |
|
"loss": 2.0437, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.02564102564102564, |
|
"grad_norm": 0.5679649114608765, |
|
"learning_rate": 2.128356686585282e-05, |
|
"loss": 2.0832, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.025821596244131457, |
|
"grad_norm": 0.5492368340492249, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 1.9627, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.02600216684723727, |
|
"grad_norm": 0.5792611241340637, |
|
"learning_rate": 1.9945942635848748e-05, |
|
"loss": 1.9441, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.026182737450343083, |
|
"grad_norm": 0.566913902759552, |
|
"learning_rate": 1.928936436551661e-05, |
|
"loss": 1.9718, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.0263633080534489, |
|
"grad_norm": 0.6455346941947937, |
|
"learning_rate": 1.8641182076323148e-05, |
|
"loss": 2.1376, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.026543878656554713, |
|
"grad_norm": 0.6583056449890137, |
|
"learning_rate": 1.800157297483417e-05, |
|
"loss": 2.143, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.026724449259660527, |
|
"grad_norm": 0.6932365298271179, |
|
"learning_rate": 1.7370711923791567e-05, |
|
"loss": 2.1724, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.026905019862766343, |
|
"grad_norm": 0.735855221748352, |
|
"learning_rate": 1.6748771394307585e-05, |
|
"loss": 2.5011, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.027085590465872156, |
|
"grad_norm": 0.8176823258399963, |
|
"learning_rate": 1.6135921418712956e-05, |
|
"loss": 2.5626, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.027085590465872156, |
|
"eval_loss": 2.012115478515625, |
|
"eval_runtime": 689.2055, |
|
"eval_samples_per_second": 13.533, |
|
"eval_steps_per_second": 3.384, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02726616106897797, |
|
"grad_norm": 0.3280618488788605, |
|
"learning_rate": 1.553232954407171e-05, |
|
"loss": 2.1242, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.027446731672083786, |
|
"grad_norm": 0.376322478055954, |
|
"learning_rate": 1.4938160786375572e-05, |
|
"loss": 2.0758, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.0276273022751896, |
|
"grad_norm": 0.39415812492370605, |
|
"learning_rate": 1.435357758543015e-05, |
|
"loss": 2.1176, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.027807872878295413, |
|
"grad_norm": 0.38797420263290405, |
|
"learning_rate": 1.3778739760445552e-05, |
|
"loss": 2.2395, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.027988443481401226, |
|
"grad_norm": 0.4005449414253235, |
|
"learning_rate": 1.3213804466343421e-05, |
|
"loss": 2.1176, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.028169014084507043, |
|
"grad_norm": 0.3990994989871979, |
|
"learning_rate": 1.2658926150792322e-05, |
|
"loss": 2.0359, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.028349584687612856, |
|
"grad_norm": 0.4104321300983429, |
|
"learning_rate": 1.2114256511983274e-05, |
|
"loss": 2.0776, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.02853015529071867, |
|
"grad_norm": 0.3822646737098694, |
|
"learning_rate": 1.157994445715706e-05, |
|
"loss": 2.1051, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.028710725893824486, |
|
"grad_norm": 0.3958430886268616, |
|
"learning_rate": 1.1056136061894384e-05, |
|
"loss": 1.8198, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.0288912964969303, |
|
"grad_norm": 0.47039517760276794, |
|
"learning_rate": 1.0542974530180327e-05, |
|
"loss": 1.6944, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.029071867100036113, |
|
"grad_norm": 0.37155014276504517, |
|
"learning_rate": 1.0040600155253765e-05, |
|
"loss": 1.8661, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.02925243770314193, |
|
"grad_norm": 0.38018548488616943, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 1.9036, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.029433008306247743, |
|
"grad_norm": 0.4522913098335266, |
|
"learning_rate": 9.068759265665384e-06, |
|
"loss": 2.1851, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.029613578909353556, |
|
"grad_norm": 0.42370957136154175, |
|
"learning_rate": 8.599558442598998e-06, |
|
"loss": 2.0044, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.029794149512459372, |
|
"grad_norm": 0.40131139755249023, |
|
"learning_rate": 8.141676086873572e-06, |
|
"loss": 1.927, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.029974720115565186, |
|
"grad_norm": 0.3930165469646454, |
|
"learning_rate": 7.695237378953223e-06, |
|
"loss": 2.0277, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.030155290718671, |
|
"grad_norm": 0.405367910861969, |
|
"learning_rate": 7.260364370723044e-06, |
|
"loss": 2.0332, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.030335861321776816, |
|
"grad_norm": 0.3978680372238159, |
|
"learning_rate": 6.837175952121306e-06, |
|
"loss": 1.9382, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.03051643192488263, |
|
"grad_norm": 0.41161930561065674, |
|
"learning_rate": 6.425787818636131e-06, |
|
"loss": 1.8757, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.030697002527988442, |
|
"grad_norm": 0.3971489667892456, |
|
"learning_rate": 6.026312439675552e-06, |
|
"loss": 1.8836, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03087757313109426, |
|
"grad_norm": 0.42250388860702515, |
|
"learning_rate": 5.6388590278194096e-06, |
|
"loss": 2.0028, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.031058143734200072, |
|
"grad_norm": 0.43362781405448914, |
|
"learning_rate": 5.263533508961827e-06, |
|
"loss": 1.8517, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.031238714337305885, |
|
"grad_norm": 0.4665611684322357, |
|
"learning_rate": 4.900438493352055e-06, |
|
"loss": 1.9923, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.0314192849404117, |
|
"grad_norm": 0.4335770606994629, |
|
"learning_rate": 4.549673247541875e-06, |
|
"loss": 1.9364, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.03159985554351752, |
|
"grad_norm": 0.4319969415664673, |
|
"learning_rate": 4.2113336672471245e-06, |
|
"loss": 2.0736, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.03178042614662333, |
|
"grad_norm": 0.48032429814338684, |
|
"learning_rate": 3.885512251130763e-06, |
|
"loss": 1.9101, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.031960996749729145, |
|
"grad_norm": 0.47980910539627075, |
|
"learning_rate": 3.5722980755146517e-06, |
|
"loss": 1.9445, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.03214156735283496, |
|
"grad_norm": 0.4643956422805786, |
|
"learning_rate": 3.271776770026963e-06, |
|
"loss": 1.9368, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.03232213795594077, |
|
"grad_norm": 0.46265125274658203, |
|
"learning_rate": 2.9840304941919415e-06, |
|
"loss": 2.0272, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.032502708559046585, |
|
"grad_norm": 0.4877752363681793, |
|
"learning_rate": 2.7091379149682685e-06, |
|
"loss": 1.8422, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0326832791621524, |
|
"grad_norm": 0.48772457242012024, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 2.0428, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.03286384976525822, |
|
"grad_norm": 0.45916715264320374, |
|
"learning_rate": 2.1982109232821178e-06, |
|
"loss": 1.8086, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.03304442036836403, |
|
"grad_norm": 0.4550033211708069, |
|
"learning_rate": 1.962316193157593e-06, |
|
"loss": 2.0127, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.033224990971469845, |
|
"grad_norm": 0.5124539732933044, |
|
"learning_rate": 1.7395544861325718e-06, |
|
"loss": 1.8886, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.03340556157457566, |
|
"grad_norm": 0.4621534049510956, |
|
"learning_rate": 1.5299867030334814e-06, |
|
"loss": 1.9027, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.03358613217768147, |
|
"grad_norm": 0.4838806092739105, |
|
"learning_rate": 1.333670137599713e-06, |
|
"loss": 1.9198, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.033766702780787285, |
|
"grad_norm": 0.4880743622779846, |
|
"learning_rate": 1.1506584608200367e-06, |
|
"loss": 1.9477, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.033947273383893105, |
|
"grad_norm": 0.6479881405830383, |
|
"learning_rate": 9.810017062595322e-07, |
|
"loss": 1.9241, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.03412784398699892, |
|
"grad_norm": 0.5301950573921204, |
|
"learning_rate": 8.247462563808817e-07, |
|
"loss": 2.104, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.03430841459010473, |
|
"grad_norm": 0.53728848695755, |
|
"learning_rate": 6.819348298638839e-07, |
|
"loss": 1.9955, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.034488985193210545, |
|
"grad_norm": 0.5534414052963257, |
|
"learning_rate": 5.526064699265753e-07, |
|
"loss": 2.0029, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.03466955579631636, |
|
"grad_norm": 0.563531219959259, |
|
"learning_rate": 4.367965336512403e-07, |
|
"loss": 1.9819, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.03485012639942217, |
|
"grad_norm": 0.5725007057189941, |
|
"learning_rate": 3.3453668231809286e-07, |
|
"loss": 1.947, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.03503069700252799, |
|
"grad_norm": 0.610448956489563, |
|
"learning_rate": 2.458548727494292e-07, |
|
"loss": 1.97, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.035211267605633804, |
|
"grad_norm": 0.6770745515823364, |
|
"learning_rate": 1.7077534966650766e-07, |
|
"loss": 2.1646, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.03539183820873962, |
|
"grad_norm": 0.5982887744903564, |
|
"learning_rate": 1.0931863906127327e-07, |
|
"loss": 1.8725, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.03557240881184543, |
|
"grad_norm": 0.5943398475646973, |
|
"learning_rate": 6.150154258476315e-08, |
|
"loss": 2.0284, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.035752979414951244, |
|
"grad_norm": 0.7149994373321533, |
|
"learning_rate": 2.7337132953697554e-08, |
|
"loss": 2.1589, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.03593355001805706, |
|
"grad_norm": 0.8116176128387451, |
|
"learning_rate": 6.834750376549792e-09, |
|
"loss": 2.3804, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.03611412062116288, |
|
"grad_norm": 0.8255523443222046, |
|
"learning_rate": 0.0, |
|
"loss": 2.4566, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03611412062116288, |
|
"eval_loss": 1.9997398853302002, |
|
"eval_runtime": 689.5883, |
|
"eval_samples_per_second": 13.525, |
|
"eval_steps_per_second": 3.382, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.70499288449024e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|