|
{ |
|
"best_metric": 2.660818836375256e-06, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.4417753848277766, |
|
"eval_steps": 25, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0022088769241388833, |
|
"grad_norm": 17.61588478088379, |
|
"learning_rate": 1.25e-05, |
|
"loss": 3.9116, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0022088769241388833, |
|
"eval_loss": 5.86042594909668, |
|
"eval_runtime": 2.0601, |
|
"eval_samples_per_second": 24.27, |
|
"eval_steps_per_second": 6.31, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0044177538482777666, |
|
"grad_norm": 18.221654891967773, |
|
"learning_rate": 2.5e-05, |
|
"loss": 4.2731, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006626630772416649, |
|
"grad_norm": 18.73583984375, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 4.2538, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.008835507696555533, |
|
"grad_norm": 16.894182205200195, |
|
"learning_rate": 5e-05, |
|
"loss": 3.5849, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.011044384620694415, |
|
"grad_norm": 13.671380996704102, |
|
"learning_rate": 6.25e-05, |
|
"loss": 2.4494, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.013253261544833299, |
|
"grad_norm": 14.39706039428711, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.5397, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.015462138468972183, |
|
"grad_norm": 36.43305587768555, |
|
"learning_rate": 8.75e-05, |
|
"loss": 4.0588, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.017671015393111066, |
|
"grad_norm": 29.52732276916504, |
|
"learning_rate": 0.0001, |
|
"loss": 2.5498, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.019879892317249948, |
|
"grad_norm": 15.871655464172363, |
|
"learning_rate": 9.999397620593029e-05, |
|
"loss": 0.7469, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02208876924138883, |
|
"grad_norm": 37.426918029785156, |
|
"learning_rate": 9.997590643643647e-05, |
|
"loss": 0.2828, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.024297646165527716, |
|
"grad_norm": 38.68975830078125, |
|
"learning_rate": 9.994579552923277e-05, |
|
"loss": 0.4165, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.026506523089666598, |
|
"grad_norm": 3.2423059940338135, |
|
"learning_rate": 9.990365154573717e-05, |
|
"loss": 0.0656, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02871540001380548, |
|
"grad_norm": 21.096725463867188, |
|
"learning_rate": 9.984948576891312e-05, |
|
"loss": 1.0799, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.030924276937944365, |
|
"grad_norm": 10.848109245300293, |
|
"learning_rate": 9.978331270024886e-05, |
|
"loss": 0.4541, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.033133153862083244, |
|
"grad_norm": 3.0925045013427734, |
|
"learning_rate": 9.9705150055875e-05, |
|
"loss": 0.0921, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03534203078622213, |
|
"grad_norm": 0.7270008325576782, |
|
"learning_rate": 9.961501876182148e-05, |
|
"loss": 0.012, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.037550907710361014, |
|
"grad_norm": 2.127373695373535, |
|
"learning_rate": 9.951294294841516e-05, |
|
"loss": 0.006, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.039759784634499896, |
|
"grad_norm": 13.903672218322754, |
|
"learning_rate": 9.939894994381957e-05, |
|
"loss": 0.0372, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04196866155863878, |
|
"grad_norm": 0.047538336366415024, |
|
"learning_rate": 9.927307026671848e-05, |
|
"loss": 0.0005, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04417753848277766, |
|
"grad_norm": 0.12976495921611786, |
|
"learning_rate": 9.913533761814537e-05, |
|
"loss": 0.0015, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04638641540691654, |
|
"grad_norm": 0.053914595395326614, |
|
"learning_rate": 9.898578887246075e-05, |
|
"loss": 0.0006, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04859529233105543, |
|
"grad_norm": 0.016516996547579765, |
|
"learning_rate": 9.882446406748002e-05, |
|
"loss": 0.0002, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05080416925519431, |
|
"grad_norm": 0.004742575343698263, |
|
"learning_rate": 9.865140639375449e-05, |
|
"loss": 0.0, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.053013046179333195, |
|
"grad_norm": 0.001722239889204502, |
|
"learning_rate": 9.846666218300807e-05, |
|
"loss": 0.0, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05522192310347208, |
|
"grad_norm": 0.0011573476949706674, |
|
"learning_rate": 9.827028089573329e-05, |
|
"loss": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05522192310347208, |
|
"eval_loss": 0.22512628138065338, |
|
"eval_runtime": 2.0717, |
|
"eval_samples_per_second": 24.135, |
|
"eval_steps_per_second": 6.275, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05743080002761096, |
|
"grad_norm": 0.7982810735702515, |
|
"learning_rate": 9.80623151079494e-05, |
|
"loss": 0.002, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05963967695174984, |
|
"grad_norm": 0.04265284910798073, |
|
"learning_rate": 9.78428204971266e-05, |
|
"loss": 0.0005, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06184855387588873, |
|
"grad_norm": 0.02398202195763588, |
|
"learning_rate": 9.761185582727977e-05, |
|
"loss": 0.0003, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06405743080002761, |
|
"grad_norm": 0.020716844126582146, |
|
"learning_rate": 9.736948293323593e-05, |
|
"loss": 0.0003, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06626630772416649, |
|
"grad_norm": 0.012970969080924988, |
|
"learning_rate": 9.711576670407965e-05, |
|
"loss": 0.0002, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06847518464830538, |
|
"grad_norm": 0.0036041405983269215, |
|
"learning_rate": 9.685077506578055e-05, |
|
"loss": 0.0, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07068406157244427, |
|
"grad_norm": 0.0019516213797032833, |
|
"learning_rate": 9.657457896300791e-05, |
|
"loss": 0.0, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07289293849658314, |
|
"grad_norm": 0.0024989373050630093, |
|
"learning_rate": 9.62872523401371e-05, |
|
"loss": 0.0, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07510181542072203, |
|
"grad_norm": 0.002753888489678502, |
|
"learning_rate": 9.598887212145291e-05, |
|
"loss": 0.0, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0773106923448609, |
|
"grad_norm": 0.0028842987958341837, |
|
"learning_rate": 9.567951819055496e-05, |
|
"loss": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07951956926899979, |
|
"grad_norm": 0.0026685006450861692, |
|
"learning_rate": 9.535927336897098e-05, |
|
"loss": 0.0, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08172844619313868, |
|
"grad_norm": 0.002319728024303913, |
|
"learning_rate": 9.502822339398353e-05, |
|
"loss": 0.0, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.08393732311727756, |
|
"grad_norm": 0.0013683955185115337, |
|
"learning_rate": 9.468645689567598e-05, |
|
"loss": 0.0, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08614620004141645, |
|
"grad_norm": 0.007890756241977215, |
|
"learning_rate": 9.433406537320415e-05, |
|
"loss": 0.0, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08835507696555532, |
|
"grad_norm": 0.005061345640569925, |
|
"learning_rate": 9.397114317029975e-05, |
|
"loss": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09056395388969421, |
|
"grad_norm": 0.0023479792289435863, |
|
"learning_rate": 9.359778745001225e-05, |
|
"loss": 0.0, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.09277283081383308, |
|
"grad_norm": 0.0018866774626076221, |
|
"learning_rate": 9.321409816869605e-05, |
|
"loss": 0.0, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09498170773797197, |
|
"grad_norm": 0.0013167414581403136, |
|
"learning_rate": 9.282017804924972e-05, |
|
"loss": 0.0, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09719058466211086, |
|
"grad_norm": 0.07475938647985458, |
|
"learning_rate": 9.241613255361455e-05, |
|
"loss": 0.0001, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09939946158624974, |
|
"grad_norm": 0.0007279085111804307, |
|
"learning_rate": 9.200206985453987e-05, |
|
"loss": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10160833851038863, |
|
"grad_norm": 0.0006586903473362327, |
|
"learning_rate": 9.157810080662269e-05, |
|
"loss": 0.0, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1038172154345275, |
|
"grad_norm": 0.0005882774712517858, |
|
"learning_rate": 9.114433891662902e-05, |
|
"loss": 0.0, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.10602609235866639, |
|
"grad_norm": 0.0005312666180543602, |
|
"learning_rate": 9.070090031310558e-05, |
|
"loss": 0.0, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.10823496928280528, |
|
"grad_norm": 0.0004986180574633181, |
|
"learning_rate": 9.024790371528927e-05, |
|
"loss": 0.0, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.11044384620694415, |
|
"grad_norm": 0.0004686967877205461, |
|
"learning_rate": 8.978547040132317e-05, |
|
"loss": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11044384620694415, |
|
"eval_loss": 6.856966138002463e-06, |
|
"eval_runtime": 2.084, |
|
"eval_samples_per_second": 23.992, |
|
"eval_steps_per_second": 6.238, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11265272313108304, |
|
"grad_norm": 0.00035957881482318044, |
|
"learning_rate": 8.931372417578747e-05, |
|
"loss": 0.0, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.11486160005522192, |
|
"grad_norm": 0.0005644855555146933, |
|
"learning_rate": 8.883279133655399e-05, |
|
"loss": 0.0, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.11707047697936081, |
|
"grad_norm": 0.000498141860589385, |
|
"learning_rate": 8.834280064097317e-05, |
|
"loss": 0.0, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.11927935390349968, |
|
"grad_norm": 0.0004631740739569068, |
|
"learning_rate": 8.78438832714026e-05, |
|
"loss": 0.0, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.12148823082763857, |
|
"grad_norm": 0.0005917078233323991, |
|
"learning_rate": 8.733617280008642e-05, |
|
"loss": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12369710775177746, |
|
"grad_norm": 0.0012398248072713614, |
|
"learning_rate": 8.681980515339464e-05, |
|
"loss": 0.0, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.12590598467591635, |
|
"grad_norm": 0.00034377642441540956, |
|
"learning_rate": 8.629491857543253e-05, |
|
"loss": 0.0, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.12811486160005522, |
|
"grad_norm": 0.0002728489344008267, |
|
"learning_rate": 8.57616535910292e-05, |
|
"loss": 0.0, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.1303237385241941, |
|
"grad_norm": 0.0002754317538347095, |
|
"learning_rate": 8.522015296811584e-05, |
|
"loss": 0.0, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.13253261544833297, |
|
"grad_norm": 0.0002577640407253057, |
|
"learning_rate": 8.467056167950311e-05, |
|
"loss": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13474149237247188, |
|
"grad_norm": 0.00025405758060514927, |
|
"learning_rate": 8.411302686406859e-05, |
|
"loss": 0.0, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.13695036929661075, |
|
"grad_norm": 0.0002456019283272326, |
|
"learning_rate": 8.354769778736406e-05, |
|
"loss": 0.0, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.13915924622074963, |
|
"grad_norm": 0.0002701326156966388, |
|
"learning_rate": 8.29747258016535e-05, |
|
"loss": 0.0, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.14136812314488853, |
|
"grad_norm": 0.0004698407428804785, |
|
"learning_rate": 8.239426430539243e-05, |
|
"loss": 0.0, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.1435770000690274, |
|
"grad_norm": 0.00043744893628172576, |
|
"learning_rate": 8.180646870215952e-05, |
|
"loss": 0.0, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14578587699316628, |
|
"grad_norm": 0.0004982824320904911, |
|
"learning_rate": 8.12114963590511e-05, |
|
"loss": 0.0, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.14799475391730518, |
|
"grad_norm": 0.0004062750958837569, |
|
"learning_rate": 8.060950656455043e-05, |
|
"loss": 0.0, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.15020363084144406, |
|
"grad_norm": 0.00123576819896698, |
|
"learning_rate": 8.000066048588211e-05, |
|
"loss": 0.0, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.15241250776558293, |
|
"grad_norm": 0.0007974767941050231, |
|
"learning_rate": 7.938512112586383e-05, |
|
"loss": 0.0, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1546213846897218, |
|
"grad_norm": 0.0001897720358101651, |
|
"learning_rate": 7.876305327926657e-05, |
|
"loss": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1568302616138607, |
|
"grad_norm": 0.0001893111621029675, |
|
"learning_rate": 7.813462348869497e-05, |
|
"loss": 0.0, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.15903913853799959, |
|
"grad_norm": 0.00018786548753269017, |
|
"learning_rate": 7.75e-05, |
|
"loss": 0.0, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.16124801546213846, |
|
"grad_norm": 0.00018544185149949044, |
|
"learning_rate": 7.68593527172353e-05, |
|
"loss": 0.0, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.16345689238627736, |
|
"grad_norm": 0.00018172743148170412, |
|
"learning_rate": 7.62128531571699e-05, |
|
"loss": 0.0, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.16566576931041624, |
|
"grad_norm": 0.0001720397121971473, |
|
"learning_rate": 7.556067440336894e-05, |
|
"loss": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16566576931041624, |
|
"eval_loss": 4.231893854012014e-06, |
|
"eval_runtime": 2.1042, |
|
"eval_samples_per_second": 23.762, |
|
"eval_steps_per_second": 6.178, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1678746462345551, |
|
"grad_norm": 0.0003285811108071357, |
|
"learning_rate": 7.490299105985507e-05, |
|
"loss": 0.0, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.170083523158694, |
|
"grad_norm": 0.00037658450310118496, |
|
"learning_rate": 7.42399792043627e-05, |
|
"loss": 0.0, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.1722924000828329, |
|
"grad_norm": 0.00041075507760979235, |
|
"learning_rate": 7.357181634119777e-05, |
|
"loss": 0.0, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.17450127700697177, |
|
"grad_norm": 0.0002939265687018633, |
|
"learning_rate": 7.28986813537155e-05, |
|
"loss": 0.0, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.17671015393111064, |
|
"grad_norm": 0.00039604472112841904, |
|
"learning_rate": 7.222075445642904e-05, |
|
"loss": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17891903085524954, |
|
"grad_norm": 0.0007199989049695432, |
|
"learning_rate": 7.153821714676166e-05, |
|
"loss": 0.0, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.18112790777938842, |
|
"grad_norm": 0.0003389615740161389, |
|
"learning_rate": 7.085125215645552e-05, |
|
"loss": 0.0, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.1833367847035273, |
|
"grad_norm": 0.00015800297842361033, |
|
"learning_rate": 7.01600434026499e-05, |
|
"loss": 0.0, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.18554566162766617, |
|
"grad_norm": 0.00016026933735702187, |
|
"learning_rate": 6.946477593864228e-05, |
|
"loss": 0.0, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.18775453855180507, |
|
"grad_norm": 0.00015380079275928438, |
|
"learning_rate": 6.876563590434491e-05, |
|
"loss": 0.0, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.18996341547594395, |
|
"grad_norm": 0.00014766880485694855, |
|
"learning_rate": 6.80628104764508e-05, |
|
"loss": 0.0, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.19217229240008282, |
|
"grad_norm": 0.00014809212007094175, |
|
"learning_rate": 6.735648781832196e-05, |
|
"loss": 0.0, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.19438116932422173, |
|
"grad_norm": 0.00012480569421313703, |
|
"learning_rate": 6.664685702961344e-05, |
|
"loss": 0.0, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.1965900462483606, |
|
"grad_norm": 0.000344978179782629, |
|
"learning_rate": 6.593410809564689e-05, |
|
"loss": 0.0, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.19879892317249948, |
|
"grad_norm": 0.00035067732096649706, |
|
"learning_rate": 6.52184318365468e-05, |
|
"loss": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20100780009663838, |
|
"grad_norm": 0.00028731199563480914, |
|
"learning_rate": 6.450001985615342e-05, |
|
"loss": 0.0, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.20321667702077725, |
|
"grad_norm": 0.0003451658121775836, |
|
"learning_rate": 6.377906449072578e-05, |
|
"loss": 0.0, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.20542555394491613, |
|
"grad_norm": 0.0005577579722739756, |
|
"learning_rate": 6.305575875744848e-05, |
|
"loss": 0.0, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.207634430869055, |
|
"grad_norm": 0.0003858502604998648, |
|
"learning_rate": 6.23302963027565e-05, |
|
"loss": 0.0, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2098433077931939, |
|
"grad_norm": 0.00013492356811184436, |
|
"learning_rate": 6.160287135049127e-05, |
|
"loss": 0.0, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21205218471733278, |
|
"grad_norm": 0.00013036909513175488, |
|
"learning_rate": 6.087367864990233e-05, |
|
"loss": 0.0, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.21426106164147166, |
|
"grad_norm": 0.00013071863213554025, |
|
"learning_rate": 6.01429134235081e-05, |
|
"loss": 0.0, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.21646993856561056, |
|
"grad_norm": 0.00013250780466478318, |
|
"learning_rate": 5.941077131483025e-05, |
|
"loss": 0.0, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.21867881548974943, |
|
"grad_norm": 0.00012893717212136835, |
|
"learning_rate": 5.867744833601507e-05, |
|
"loss": 0.0, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2208876924138883, |
|
"grad_norm": 0.00012246635742485523, |
|
"learning_rate": 5.794314081535644e-05, |
|
"loss": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2208876924138883, |
|
"eval_loss": 3.5253201531304512e-06, |
|
"eval_runtime": 2.1031, |
|
"eval_samples_per_second": 23.775, |
|
"eval_steps_per_second": 6.181, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22309656933802718, |
|
"grad_norm": 0.000191736122360453, |
|
"learning_rate": 5.720804534473382e-05, |
|
"loss": 0.0, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2253054462621661, |
|
"grad_norm": 0.0002980089630000293, |
|
"learning_rate": 5.6472358726979935e-05, |
|
"loss": 0.0, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.22751432318630496, |
|
"grad_norm": 0.0003073852858506143, |
|
"learning_rate": 5.5736277923191916e-05, |
|
"loss": 0.0, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.22972320011044384, |
|
"grad_norm": 0.00027709436835721135, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 0.0, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.23193207703458274, |
|
"grad_norm": 0.00034520094050094485, |
|
"learning_rate": 5.42637220768081e-05, |
|
"loss": 0.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23414095395872161, |
|
"grad_norm": 0.0005847822758369148, |
|
"learning_rate": 5.352764127302008e-05, |
|
"loss": 0.0, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.2363498308828605, |
|
"grad_norm": 0.00017146494064945728, |
|
"learning_rate": 5.27919546552662e-05, |
|
"loss": 0.0, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.23855870780699936, |
|
"grad_norm": 0.00011689545499393716, |
|
"learning_rate": 5.205685918464356e-05, |
|
"loss": 0.0, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.24076758473113827, |
|
"grad_norm": 0.00011620977602433413, |
|
"learning_rate": 5.1322551663984955e-05, |
|
"loss": 0.0, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.24297646165527714, |
|
"grad_norm": 0.00011741516209440306, |
|
"learning_rate": 5.058922868516978e-05, |
|
"loss": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24518533857941602, |
|
"grad_norm": 0.00011821599036920816, |
|
"learning_rate": 4.985708657649191e-05, |
|
"loss": 0.0, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.24739421550355492, |
|
"grad_norm": 0.00011795986938523129, |
|
"learning_rate": 4.912632135009769e-05, |
|
"loss": 0.0, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.2496030924276938, |
|
"grad_norm": 0.00014575726527255028, |
|
"learning_rate": 4.839712864950873e-05, |
|
"loss": 0.0, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.2518119693518327, |
|
"grad_norm": 0.00030223472276702523, |
|
"learning_rate": 4.7669703697243516e-05, |
|
"loss": 0.0, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.25402084627597155, |
|
"grad_norm": 0.0002797936904244125, |
|
"learning_rate": 4.6944241242551546e-05, |
|
"loss": 0.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.25622972320011045, |
|
"grad_norm": 0.0002620798768475652, |
|
"learning_rate": 4.6220935509274235e-05, |
|
"loss": 0.0, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.25843860012424935, |
|
"grad_norm": 0.00028032332193106413, |
|
"learning_rate": 4.5499980143846564e-05, |
|
"loss": 0.0, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.2606474770483882, |
|
"grad_norm": 0.0003947988443542272, |
|
"learning_rate": 4.478156816345321e-05, |
|
"loss": 0.0, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.2628563539725271, |
|
"grad_norm": 0.00045703398063778877, |
|
"learning_rate": 4.406589190435313e-05, |
|
"loss": 0.0, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.26506523089666595, |
|
"grad_norm": 0.0001099837027140893, |
|
"learning_rate": 4.3353142970386564e-05, |
|
"loss": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26727410782080485, |
|
"grad_norm": 0.00010697261313907802, |
|
"learning_rate": 4.2643512181678056e-05, |
|
"loss": 0.0, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.26948298474494375, |
|
"grad_norm": 0.0001075834225048311, |
|
"learning_rate": 4.19371895235492e-05, |
|
"loss": 0.0, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.2716918616690826, |
|
"grad_norm": 0.00010644174471963197, |
|
"learning_rate": 4.12343640956551e-05, |
|
"loss": 0.0, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.2739007385932215, |
|
"grad_norm": 0.00010687617032090202, |
|
"learning_rate": 4.053522406135775e-05, |
|
"loss": 0.0, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2761096155173604, |
|
"grad_norm": 0.00010714936070144176, |
|
"learning_rate": 3.98399565973501e-05, |
|
"loss": 0.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2761096155173604, |
|
"eval_loss": 3.0905191579222446e-06, |
|
"eval_runtime": 2.0999, |
|
"eval_samples_per_second": 23.81, |
|
"eval_steps_per_second": 6.191, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27831849244149925, |
|
"grad_norm": 0.0002063347928924486, |
|
"learning_rate": 3.9148747843544495e-05, |
|
"loss": 0.0, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.28052736936563816, |
|
"grad_norm": 0.00025974729214794934, |
|
"learning_rate": 3.846178285323835e-05, |
|
"loss": 0.0, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.28273624628977706, |
|
"grad_norm": 0.00024177682644221932, |
|
"learning_rate": 3.777924554357096e-05, |
|
"loss": 0.0, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.2849451232139159, |
|
"grad_norm": 0.0002588094212114811, |
|
"learning_rate": 3.710131864628451e-05, |
|
"loss": 0.0, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.2871540001380548, |
|
"grad_norm": 0.0002662424521986395, |
|
"learning_rate": 3.642818365880224e-05, |
|
"loss": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2893628770621937, |
|
"grad_norm": 0.0004572535108309239, |
|
"learning_rate": 3.576002079563732e-05, |
|
"loss": 0.0, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.29157175398633256, |
|
"grad_norm": 0.00027820674586109817, |
|
"learning_rate": 3.509700894014496e-05, |
|
"loss": 0.0, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.29378063091047146, |
|
"grad_norm": 0.00010199982352787629, |
|
"learning_rate": 3.443932559663107e-05, |
|
"loss": 0.0, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.29598950783461037, |
|
"grad_norm": 0.00010178149386774749, |
|
"learning_rate": 3.378714684283011e-05, |
|
"loss": 0.0, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.2981983847587492, |
|
"grad_norm": 0.00010338863648939878, |
|
"learning_rate": 3.31406472827647e-05, |
|
"loss": 0.0, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3004072616828881, |
|
"grad_norm": 0.0001027277103275992, |
|
"learning_rate": 3.250000000000001e-05, |
|
"loss": 0.0, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.30261613860702696, |
|
"grad_norm": 0.00010317438864149153, |
|
"learning_rate": 3.186537651130503e-05, |
|
"loss": 0.0, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.30482501553116587, |
|
"grad_norm": 0.00016677897656336427, |
|
"learning_rate": 3.123694672073344e-05, |
|
"loss": 0.0, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.30703389245530477, |
|
"grad_norm": 0.00022237653320189565, |
|
"learning_rate": 3.061487887413619e-05, |
|
"loss": 0.0, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3092427693794436, |
|
"grad_norm": 0.00023152329958975315, |
|
"learning_rate": 2.9999339514117912e-05, |
|
"loss": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3114516463035825, |
|
"grad_norm": 0.00022528883710037917, |
|
"learning_rate": 2.9390493435449572e-05, |
|
"loss": 0.0, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3136605232277214, |
|
"grad_norm": 0.00024947745259851217, |
|
"learning_rate": 2.8788503640948912e-05, |
|
"loss": 0.0, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.31586940015186027, |
|
"grad_norm": 0.00043007542262785137, |
|
"learning_rate": 2.8193531297840503e-05, |
|
"loss": 0.0, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.31807827707599917, |
|
"grad_norm": 0.00035726267378777266, |
|
"learning_rate": 2.760573569460757e-05, |
|
"loss": 0.0, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3202871540001381, |
|
"grad_norm": 9.570374095346779e-05, |
|
"learning_rate": 2.702527419834653e-05, |
|
"loss": 0.0, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.3224960309242769, |
|
"grad_norm": 9.726483403937891e-05, |
|
"learning_rate": 2.645230221263596e-05, |
|
"loss": 0.0, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3247049078484158, |
|
"grad_norm": 9.980611503124237e-05, |
|
"learning_rate": 2.5886973135931425e-05, |
|
"loss": 0.0, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.3269137847725547, |
|
"grad_norm": 9.672047599451616e-05, |
|
"learning_rate": 2.53294383204969e-05, |
|
"loss": 0.0, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3291226616966936, |
|
"grad_norm": 9.55902723944746e-05, |
|
"learning_rate": 2.4779847031884175e-05, |
|
"loss": 0.0, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.3313315386208325, |
|
"grad_norm": 9.5816605607979e-05, |
|
"learning_rate": 2.423834640897079e-05, |
|
"loss": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3313315386208325, |
|
"eval_loss": 2.842964704541373e-06, |
|
"eval_runtime": 2.1034, |
|
"eval_samples_per_second": 23.771, |
|
"eval_steps_per_second": 6.18, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3335404155449714, |
|
"grad_norm": 0.000154680063133128, |
|
"learning_rate": 2.370508142456748e-05, |
|
"loss": 0.0, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.3357492924691102, |
|
"grad_norm": 0.00023134062939789146, |
|
"learning_rate": 2.3180194846605367e-05, |
|
"loss": 0.0, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.33795816939324913, |
|
"grad_norm": 0.00024014056543819606, |
|
"learning_rate": 2.2663827199913588e-05, |
|
"loss": 0.0, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.340167046317388, |
|
"grad_norm": 0.00021546079369727522, |
|
"learning_rate": 2.215611672859741e-05, |
|
"loss": 0.0, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3423759232415269, |
|
"grad_norm": 0.00024820741964504123, |
|
"learning_rate": 2.165719935902685e-05, |
|
"loss": 0.0, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3445848001656658, |
|
"grad_norm": 0.00044995194184593856, |
|
"learning_rate": 2.1167208663446025e-05, |
|
"loss": 0.0, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.34679367708980463, |
|
"grad_norm": 0.00015662639634683728, |
|
"learning_rate": 2.068627582421254e-05, |
|
"loss": 0.0, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.34900255401394353, |
|
"grad_norm": 9.385013981955126e-05, |
|
"learning_rate": 2.0214529598676836e-05, |
|
"loss": 0.0, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.35121143093808244, |
|
"grad_norm": 9.166620293399319e-05, |
|
"learning_rate": 1.9752096284710738e-05, |
|
"loss": 0.0, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.3534203078622213, |
|
"grad_norm": 9.074221452465281e-05, |
|
"learning_rate": 1.9299099686894423e-05, |
|
"loss": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3556291847863602, |
|
"grad_norm": 9.394592780154198e-05, |
|
"learning_rate": 1.8855661083370986e-05, |
|
"loss": 0.0, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.3578380617104991, |
|
"grad_norm": 9.24318956094794e-05, |
|
"learning_rate": 1.842189919337732e-05, |
|
"loss": 0.0, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.36004693863463794, |
|
"grad_norm": 0.00011218286090297624, |
|
"learning_rate": 1.7997930145460136e-05, |
|
"loss": 0.0, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.36225581555877684, |
|
"grad_norm": 0.00019017797603737563, |
|
"learning_rate": 1.758386744638546e-05, |
|
"loss": 0.0, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.36446469248291574, |
|
"grad_norm": 0.00019140807853545994, |
|
"learning_rate": 1.7179821950750284e-05, |
|
"loss": 0.0, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3666735694070546, |
|
"grad_norm": 0.00020481123647186905, |
|
"learning_rate": 1.6785901831303956e-05, |
|
"loss": 0.0, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.3688824463311935, |
|
"grad_norm": 0.00022451799304690212, |
|
"learning_rate": 1.6402212549987762e-05, |
|
"loss": 0.0, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.37109132325533234, |
|
"grad_norm": 0.00030974153196439147, |
|
"learning_rate": 1.602885682970026e-05, |
|
"loss": 0.0, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.37330020017947124, |
|
"grad_norm": 0.00048727114335633814, |
|
"learning_rate": 1.566593462679586e-05, |
|
"loss": 0.0, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.37550907710361014, |
|
"grad_norm": 9.068298095371574e-05, |
|
"learning_rate": 1.531354310432403e-05, |
|
"loss": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.377717954027749, |
|
"grad_norm": 9.067923383554444e-05, |
|
"learning_rate": 1.4971776606016482e-05, |
|
"loss": 0.0, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.3799268309518879, |
|
"grad_norm": 9.05448105186224e-05, |
|
"learning_rate": 1.464072663102903e-05, |
|
"loss": 0.0, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3821357078760268, |
|
"grad_norm": 9.20113452593796e-05, |
|
"learning_rate": 1.4320481809445051e-05, |
|
"loss": 0.0, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.38434458480016565, |
|
"grad_norm": 9.098761074710637e-05, |
|
"learning_rate": 1.4011127878547087e-05, |
|
"loss": 0.0, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.38655346172430455, |
|
"grad_norm": 9.139224857790396e-05, |
|
"learning_rate": 1.3712747659862896e-05, |
|
"loss": 0.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.38655346172430455, |
|
"eval_loss": 2.7154196686751675e-06, |
|
"eval_runtime": 2.0847, |
|
"eval_samples_per_second": 23.985, |
|
"eval_steps_per_second": 6.236, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.38876233864844345, |
|
"grad_norm": 0.0001819442113628611, |
|
"learning_rate": 1.3425421036992098e-05, |
|
"loss": 0.0, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.3909712155725823, |
|
"grad_norm": 0.00019746804900933057, |
|
"learning_rate": 1.314922493421946e-05, |
|
"loss": 0.0, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.3931800924967212, |
|
"grad_norm": 0.0002262179768877104, |
|
"learning_rate": 1.2884233295920353e-05, |
|
"loss": 0.0, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.3953889694208601, |
|
"grad_norm": 0.00020046616555191576, |
|
"learning_rate": 1.2630517066764069e-05, |
|
"loss": 0.0, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.39759784634499895, |
|
"grad_norm": 0.0002561356814112514, |
|
"learning_rate": 1.2388144172720251e-05, |
|
"loss": 0.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39980672326913785, |
|
"grad_norm": 0.0003955549036618322, |
|
"learning_rate": 1.2157179502873409e-05, |
|
"loss": 0.0, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.40201560019327676, |
|
"grad_norm": 0.00018629009719006717, |
|
"learning_rate": 1.1937684892050604e-05, |
|
"loss": 0.0, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.4042244771174156, |
|
"grad_norm": 9.03993786778301e-05, |
|
"learning_rate": 1.172971910426671e-05, |
|
"loss": 0.0, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.4064333540415545, |
|
"grad_norm": 8.99579026736319e-05, |
|
"learning_rate": 1.1533337816991932e-05, |
|
"loss": 0.0, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.40864223096569335, |
|
"grad_norm": 8.806282130535692e-05, |
|
"learning_rate": 1.1348593606245522e-05, |
|
"loss": 0.0, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.41085110788983226, |
|
"grad_norm": 8.73383687576279e-05, |
|
"learning_rate": 1.1175535932519987e-05, |
|
"loss": 0.0, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.41305998481397116, |
|
"grad_norm": 8.832193998387083e-05, |
|
"learning_rate": 1.1014211127539271e-05, |
|
"loss": 0.0, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.41526886173811, |
|
"grad_norm": 0.00011703837662935257, |
|
"learning_rate": 1.0864662381854632e-05, |
|
"loss": 0.0, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.4174777386622489, |
|
"grad_norm": 0.0002513094514142722, |
|
"learning_rate": 1.0726929733281515e-05, |
|
"loss": 0.0, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.4196866155863878, |
|
"grad_norm": 0.00021101209858898073, |
|
"learning_rate": 1.0601050056180447e-05, |
|
"loss": 0.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42189549251052666, |
|
"grad_norm": 0.00020866327395197004, |
|
"learning_rate": 1.0487057051584856e-05, |
|
"loss": 0.0, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.42410436943466556, |
|
"grad_norm": 0.00022013194393366575, |
|
"learning_rate": 1.0384981238178534e-05, |
|
"loss": 0.0, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.42631324635880447, |
|
"grad_norm": 0.00032330441172234714, |
|
"learning_rate": 1.0294849944125004e-05, |
|
"loss": 0.0, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.4285221232829433, |
|
"grad_norm": 0.00035217651748098433, |
|
"learning_rate": 1.0216687299751144e-05, |
|
"loss": 0.0, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.4307310002070822, |
|
"grad_norm": 8.39560671010986e-05, |
|
"learning_rate": 1.0150514231086887e-05, |
|
"loss": 0.0, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4329398771312211, |
|
"grad_norm": 8.635565609438345e-05, |
|
"learning_rate": 1.0096348454262845e-05, |
|
"loss": 0.0, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.43514875405535997, |
|
"grad_norm": 8.677168807480484e-05, |
|
"learning_rate": 1.0054204470767243e-05, |
|
"loss": 0.0, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.43735763097949887, |
|
"grad_norm": 8.660169260110706e-05, |
|
"learning_rate": 1.0024093563563546e-05, |
|
"loss": 0.0, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.43956650790363777, |
|
"grad_norm": 8.837666246108711e-05, |
|
"learning_rate": 1.000602379406972e-05, |
|
"loss": 0.0, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.4417753848277766, |
|
"grad_norm": 8.881120447767898e-05, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4417753848277766, |
|
"eval_loss": 2.660818836375256e-06, |
|
"eval_runtime": 2.0833, |
|
"eval_samples_per_second": 24.001, |
|
"eval_steps_per_second": 6.24, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.49923578036224e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|