|
{ |
|
"best_metric": 0.6820038701354548, |
|
"best_model_checkpoint": "/content/drive/Shareddrives/Data Science Team folder-open/datasets/Lesam/BOL/combined dataset AD-656 + AD-580 /model/Bol1.0/checkpoint-8500", |
|
"epoch": 12.676056338028168, |
|
"eval_steps": 100, |
|
"global_step": 9000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14084507042253522, |
|
"eval_accuracy": 0.9531156222418359, |
|
"eval_f1": 0.09613869188337273, |
|
"eval_loss": 0.42083606123924255, |
|
"eval_precision": 0.5083333333333333, |
|
"eval_recall": 0.05308964316797215, |
|
"eval_runtime": 7.3353, |
|
"eval_samples_per_second": 32.718, |
|
"eval_steps_per_second": 16.359, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28169014084507044, |
|
"eval_accuracy": 0.9531391585760518, |
|
"eval_f1": 0.09598741148701809, |
|
"eval_loss": 0.31483981013298035, |
|
"eval_precision": 0.5, |
|
"eval_recall": 0.05308964316797215, |
|
"eval_runtime": 7.317, |
|
"eval_samples_per_second": 32.801, |
|
"eval_steps_per_second": 16.4, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4225352112676056, |
|
"eval_accuracy": 0.9565754633715798, |
|
"eval_f1": 0.12402044293015331, |
|
"eval_loss": 0.28479117155075073, |
|
"eval_precision": 0.2857142857142857, |
|
"eval_recall": 0.07919930374238468, |
|
"eval_runtime": 7.3928, |
|
"eval_samples_per_second": 32.464, |
|
"eval_steps_per_second": 16.232, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5633802816901409, |
|
"eval_accuracy": 0.9590820829655781, |
|
"eval_f1": 0.15186615186615188, |
|
"eval_loss": 0.2616308927536011, |
|
"eval_precision": 0.291358024691358, |
|
"eval_recall": 0.10269799825935597, |
|
"eval_runtime": 7.21, |
|
"eval_samples_per_second": 33.287, |
|
"eval_steps_per_second": 16.643, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.704225352112676, |
|
"grad_norm": 3.1437392234802246, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 0.4533, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.704225352112676, |
|
"eval_accuracy": 0.9585525154457193, |
|
"eval_f1": 0.1576517150395778, |
|
"eval_loss": 0.24514709413051605, |
|
"eval_precision": 0.32561307901907355, |
|
"eval_recall": 0.10400348128807659, |
|
"eval_runtime": 7.3357, |
|
"eval_samples_per_second": 32.717, |
|
"eval_steps_per_second": 16.358, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8450704225352113, |
|
"eval_accuracy": 0.9594586643130333, |
|
"eval_f1": 0.15076717811874582, |
|
"eval_loss": 0.23794828355312347, |
|
"eval_precision": 0.32285714285714284, |
|
"eval_recall": 0.09834638816362054, |
|
"eval_runtime": 7.345, |
|
"eval_samples_per_second": 32.675, |
|
"eval_steps_per_second": 16.338, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9859154929577465, |
|
"eval_accuracy": 0.9603059723448073, |
|
"eval_f1": 0.17906683480453975, |
|
"eval_loss": 0.21762977540493011, |
|
"eval_precision": 0.32494279176201374, |
|
"eval_recall": 0.12358572671888599, |
|
"eval_runtime": 7.3679, |
|
"eval_samples_per_second": 32.574, |
|
"eval_steps_per_second": 16.287, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1267605633802817, |
|
"eval_accuracy": 0.9655663430420712, |
|
"eval_f1": 0.2692101020956475, |
|
"eval_loss": 0.20588882267475128, |
|
"eval_precision": 0.3518258426966292, |
|
"eval_recall": 0.21801566579634465, |
|
"eval_runtime": 7.173, |
|
"eval_samples_per_second": 33.459, |
|
"eval_steps_per_second": 16.729, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.267605633802817, |
|
"eval_accuracy": 0.9650956163577523, |
|
"eval_f1": 0.2747485442032822, |
|
"eval_loss": 0.20205169916152954, |
|
"eval_precision": 0.3506756756756757, |
|
"eval_recall": 0.2258485639686684, |
|
"eval_runtime": 7.327, |
|
"eval_samples_per_second": 32.755, |
|
"eval_steps_per_second": 16.378, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.408450704225352, |
|
"grad_norm": 0.12497588992118835, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.2053, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.408450704225352, |
|
"eval_accuracy": 0.9687437481612239, |
|
"eval_f1": 0.29104679469840417, |
|
"eval_loss": 0.17978309094905853, |
|
"eval_precision": 0.38456040028591854, |
|
"eval_recall": 0.23411662315056572, |
|
"eval_runtime": 7.3218, |
|
"eval_samples_per_second": 32.779, |
|
"eval_steps_per_second": 16.389, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5492957746478875, |
|
"eval_accuracy": 0.9712621359223301, |
|
"eval_f1": 0.34724021767297225, |
|
"eval_loss": 0.16675764322280884, |
|
"eval_precision": 0.42921204356181936, |
|
"eval_recall": 0.2915578764142733, |
|
"eval_runtime": 7.3153, |
|
"eval_samples_per_second": 32.808, |
|
"eval_steps_per_second": 16.404, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6901408450704225, |
|
"eval_accuracy": 0.9708973227419829, |
|
"eval_f1": 0.33114668066124375, |
|
"eval_loss": 0.1615074723958969, |
|
"eval_precision": 0.4170522141440846, |
|
"eval_recall": 0.27458659704090516, |
|
"eval_runtime": 7.4019, |
|
"eval_samples_per_second": 32.424, |
|
"eval_steps_per_second": 16.212, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.8309859154929577, |
|
"eval_accuracy": 0.9715563401000294, |
|
"eval_f1": 0.37593984962406013, |
|
"eval_loss": 0.1544044315814972, |
|
"eval_precision": 0.4246575342465753, |
|
"eval_recall": 0.3372497824194952, |
|
"eval_runtime": 7.2284, |
|
"eval_samples_per_second": 33.202, |
|
"eval_steps_per_second": 16.601, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.971830985915493, |
|
"eval_accuracy": 0.9729685201529862, |
|
"eval_f1": 0.354788877445932, |
|
"eval_loss": 0.14469194412231445, |
|
"eval_precision": 0.4344262295081967, |
|
"eval_recall": 0.2998259355961706, |
|
"eval_runtime": 7.2572, |
|
"eval_samples_per_second": 33.07, |
|
"eval_steps_per_second": 16.535, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.112676056338028, |
|
"grad_norm": 3.3000364303588867, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.1633, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.112676056338028, |
|
"eval_accuracy": 0.9744866137099146, |
|
"eval_f1": 0.4306997742663657, |
|
"eval_loss": 0.14256681501865387, |
|
"eval_precision": 0.4474671669793621, |
|
"eval_recall": 0.4151436031331593, |
|
"eval_runtime": 7.3087, |
|
"eval_samples_per_second": 32.838, |
|
"eval_steps_per_second": 16.419, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.2535211267605635, |
|
"eval_accuracy": 0.9750632538982054, |
|
"eval_f1": 0.41274369947693773, |
|
"eval_loss": 0.13538698852062225, |
|
"eval_precision": 0.4549266247379455, |
|
"eval_recall": 0.3777197563098346, |
|
"eval_runtime": 7.3667, |
|
"eval_samples_per_second": 32.579, |
|
"eval_steps_per_second": 16.29, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.3943661971830985, |
|
"eval_accuracy": 0.9748631950573698, |
|
"eval_f1": 0.4427480916030535, |
|
"eval_loss": 0.1301625669002533, |
|
"eval_precision": 0.45732838589981445, |
|
"eval_recall": 0.4290687554395126, |
|
"eval_runtime": 7.3457, |
|
"eval_samples_per_second": 32.672, |
|
"eval_steps_per_second": 16.336, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.535211267605634, |
|
"eval_accuracy": 0.9771226831421006, |
|
"eval_f1": 0.4529600734281781, |
|
"eval_loss": 0.12118236720561981, |
|
"eval_precision": 0.479126213592233, |
|
"eval_recall": 0.42950391644908614, |
|
"eval_runtime": 7.3572, |
|
"eval_samples_per_second": 32.621, |
|
"eval_steps_per_second": 16.311, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.676056338028169, |
|
"eval_accuracy": 0.9772285966460724, |
|
"eval_f1": 0.4805283534502391, |
|
"eval_loss": 0.12013120204210281, |
|
"eval_precision": 0.5040611562350693, |
|
"eval_recall": 0.459094865100087, |
|
"eval_runtime": 7.2756, |
|
"eval_samples_per_second": 32.987, |
|
"eval_steps_per_second": 16.494, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.816901408450704, |
|
"grad_norm": 4.632040500640869, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.116, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.816901408450704, |
|
"eval_accuracy": 0.9765695792880259, |
|
"eval_f1": 0.46877098723975824, |
|
"eval_loss": 0.12145345658063889, |
|
"eval_precision": 0.4827109266943292, |
|
"eval_recall": 0.4556135770234987, |
|
"eval_runtime": 7.3168, |
|
"eval_samples_per_second": 32.801, |
|
"eval_steps_per_second": 16.401, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.9577464788732395, |
|
"eval_accuracy": 0.9755104442483084, |
|
"eval_f1": 0.4675658317276814, |
|
"eval_loss": 0.12608888745307922, |
|
"eval_precision": 0.46017699115044247, |
|
"eval_recall": 0.4751958224543081, |
|
"eval_runtime": 7.3583, |
|
"eval_samples_per_second": 32.616, |
|
"eval_steps_per_second": 16.308, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.0985915492957745, |
|
"eval_accuracy": 0.9789938217122683, |
|
"eval_f1": 0.4885803851321093, |
|
"eval_loss": 0.11443355679512024, |
|
"eval_precision": 0.5032287822878229, |
|
"eval_recall": 0.47476066144473456, |
|
"eval_runtime": 7.3495, |
|
"eval_samples_per_second": 32.655, |
|
"eval_steps_per_second": 16.328, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.23943661971831, |
|
"eval_accuracy": 0.9794880847308032, |
|
"eval_f1": 0.501056586053064, |
|
"eval_loss": 0.10821868479251862, |
|
"eval_precision": 0.5441101478837328, |
|
"eval_recall": 0.46431679721496955, |
|
"eval_runtime": 7.3804, |
|
"eval_samples_per_second": 32.519, |
|
"eval_steps_per_second": 16.259, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.380281690140845, |
|
"eval_accuracy": 0.9779935275080907, |
|
"eval_f1": 0.49851360621998636, |
|
"eval_loss": 0.11080476641654968, |
|
"eval_precision": 0.5253012048192771, |
|
"eval_recall": 0.47432550043516103, |
|
"eval_runtime": 7.2629, |
|
"eval_samples_per_second": 33.045, |
|
"eval_steps_per_second": 16.522, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.52112676056338, |
|
"grad_norm": 0.7869037985801697, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 0.0909, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.52112676056338, |
|
"eval_accuracy": 0.9786525448661371, |
|
"eval_f1": 0.5188613746817866, |
|
"eval_loss": 0.11673837155103683, |
|
"eval_precision": 0.5541275333662877, |
|
"eval_recall": 0.48781549173194083, |
|
"eval_runtime": 7.2497, |
|
"eval_samples_per_second": 33.105, |
|
"eval_steps_per_second": 16.552, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.6619718309859155, |
|
"eval_accuracy": 0.979982347749338, |
|
"eval_f1": 0.5242311276794035, |
|
"eval_loss": 0.11096884310245514, |
|
"eval_precision": 0.5641925777331996, |
|
"eval_recall": 0.489556135770235, |
|
"eval_runtime": 7.3912, |
|
"eval_samples_per_second": 32.471, |
|
"eval_steps_per_second": 16.236, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.802816901408451, |
|
"eval_accuracy": 0.9792644895557517, |
|
"eval_f1": 0.5294117647058825, |
|
"eval_loss": 0.10684900730848312, |
|
"eval_precision": 0.5382194244604317, |
|
"eval_recall": 0.52088772845953, |
|
"eval_runtime": 7.4198, |
|
"eval_samples_per_second": 32.346, |
|
"eval_steps_per_second": 16.173, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.943661971830986, |
|
"eval_accuracy": 0.9792527213886437, |
|
"eval_f1": 0.5260178532549532, |
|
"eval_loss": 0.10498978942632675, |
|
"eval_precision": 0.5263616557734205, |
|
"eval_recall": 0.525674499564839, |
|
"eval_runtime": 7.3935, |
|
"eval_samples_per_second": 32.461, |
|
"eval_steps_per_second": 16.23, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.084507042253521, |
|
"eval_accuracy": 0.9806178287731686, |
|
"eval_f1": 0.537687318273317, |
|
"eval_loss": 0.10101501643657684, |
|
"eval_precision": 0.55315232397607, |
|
"eval_recall": 0.5230635335073978, |
|
"eval_runtime": 7.3652, |
|
"eval_samples_per_second": 32.586, |
|
"eval_steps_per_second": 16.293, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.225352112676056, |
|
"grad_norm": 3.056248426437378, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.08, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.225352112676056, |
|
"eval_accuracy": 0.9793350985583995, |
|
"eval_f1": 0.534056399132321, |
|
"eval_loss": 0.1095050647854805, |
|
"eval_precision": 0.532439446366782, |
|
"eval_recall": 0.5356832027850305, |
|
"eval_runtime": 7.3022, |
|
"eval_samples_per_second": 32.867, |
|
"eval_steps_per_second": 16.433, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.366197183098592, |
|
"eval_accuracy": 0.9807355104442483, |
|
"eval_f1": 0.5566556655665567, |
|
"eval_loss": 0.10490421205759048, |
|
"eval_precision": 0.5629728526924789, |
|
"eval_recall": 0.5504786771105309, |
|
"eval_runtime": 7.3941, |
|
"eval_samples_per_second": 32.458, |
|
"eval_steps_per_second": 16.229, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.507042253521127, |
|
"eval_accuracy": 0.9806884377758164, |
|
"eval_f1": 0.5487348734873487, |
|
"eval_loss": 0.0985017642378807, |
|
"eval_precision": 0.5549621717846017, |
|
"eval_recall": 0.5426457789382071, |
|
"eval_runtime": 7.3897, |
|
"eval_samples_per_second": 32.478, |
|
"eval_steps_per_second": 16.239, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.647887323943662, |
|
"eval_accuracy": 0.9803118564283613, |
|
"eval_f1": 0.5325077399380805, |
|
"eval_loss": 0.10188236087560654, |
|
"eval_precision": 0.5413669064748201, |
|
"eval_recall": 0.5239338555265448, |
|
"eval_runtime": 7.3669, |
|
"eval_samples_per_second": 32.578, |
|
"eval_steps_per_second": 16.289, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.788732394366197, |
|
"eval_accuracy": 0.9811591644601353, |
|
"eval_f1": 0.5600684053014109, |
|
"eval_loss": 0.09488189220428467, |
|
"eval_precision": 0.5504201680672269, |
|
"eval_recall": 0.5700609225413403, |
|
"eval_runtime": 7.3775, |
|
"eval_samples_per_second": 32.531, |
|
"eval_steps_per_second": 16.266, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.929577464788732, |
|
"grad_norm": 0.00509398290887475, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 0.0671, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.929577464788732, |
|
"eval_accuracy": 0.9823477493380406, |
|
"eval_f1": 0.5749396532806671, |
|
"eval_loss": 0.09079114347696304, |
|
"eval_precision": 0.5799026117751217, |
|
"eval_recall": 0.5700609225413403, |
|
"eval_runtime": 7.2549, |
|
"eval_samples_per_second": 33.081, |
|
"eval_steps_per_second": 16.54, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.070422535211268, |
|
"eval_accuracy": 0.9842188879082083, |
|
"eval_f1": 0.5980850590069028, |
|
"eval_loss": 0.08618247509002686, |
|
"eval_precision": 0.6124031007751938, |
|
"eval_recall": 0.5844212358572672, |
|
"eval_runtime": 7.3833, |
|
"eval_samples_per_second": 32.506, |
|
"eval_steps_per_second": 16.253, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.211267605633803, |
|
"eval_accuracy": 0.98177110914975, |
|
"eval_f1": 0.5608871827681807, |
|
"eval_loss": 0.09391660988330841, |
|
"eval_precision": 0.5499790882475951, |
|
"eval_recall": 0.572236727589208, |
|
"eval_runtime": 7.376, |
|
"eval_samples_per_second": 32.538, |
|
"eval_steps_per_second": 16.269, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 5.352112676056338, |
|
"eval_accuracy": 0.9824889673433362, |
|
"eval_f1": 0.5940507436570428, |
|
"eval_loss": 0.09653446823358536, |
|
"eval_precision": 0.5971855760773966, |
|
"eval_recall": 0.5909486510008704, |
|
"eval_runtime": 7.386, |
|
"eval_samples_per_second": 32.494, |
|
"eval_steps_per_second": 16.247, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 5.492957746478873, |
|
"eval_accuracy": 0.9821006178287731, |
|
"eval_f1": 0.5902280130293159, |
|
"eval_loss": 0.09449278563261032, |
|
"eval_precision": 0.5890767230169051, |
|
"eval_recall": 0.5913838120104439, |
|
"eval_runtime": 7.3734, |
|
"eval_samples_per_second": 32.549, |
|
"eval_steps_per_second": 16.275, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 5.633802816901408, |
|
"grad_norm": 0.8984728455543518, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.059, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.633802816901408, |
|
"eval_accuracy": 0.982171226831421, |
|
"eval_f1": 0.5924487594390507, |
|
"eval_loss": 0.09081266820430756, |
|
"eval_precision": 0.5875053487376979, |
|
"eval_recall": 0.5974760661444735, |
|
"eval_runtime": 7.3677, |
|
"eval_samples_per_second": 32.575, |
|
"eval_steps_per_second": 16.287, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.774647887323944, |
|
"eval_accuracy": 0.983030303030303, |
|
"eval_f1": 0.5897771952817825, |
|
"eval_loss": 0.08924023061990738, |
|
"eval_precision": 0.5921052631578947, |
|
"eval_recall": 0.587467362924282, |
|
"eval_runtime": 7.278, |
|
"eval_samples_per_second": 32.976, |
|
"eval_steps_per_second": 16.488, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 5.915492957746479, |
|
"eval_accuracy": 0.9837010885554575, |
|
"eval_f1": 0.6034408602150538, |
|
"eval_loss": 0.0901557207107544, |
|
"eval_precision": 0.5965136054421769, |
|
"eval_recall": 0.6105308964316797, |
|
"eval_runtime": 7.4615, |
|
"eval_samples_per_second": 32.165, |
|
"eval_steps_per_second": 16.083, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 6.056338028169014, |
|
"eval_accuracy": 0.9843836422477199, |
|
"eval_f1": 0.6253776435045316, |
|
"eval_loss": 0.08598857372999191, |
|
"eval_precision": 0.620291095890411, |
|
"eval_recall": 0.6305483028720626, |
|
"eval_runtime": 7.367, |
|
"eval_samples_per_second": 32.578, |
|
"eval_steps_per_second": 16.289, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 6.197183098591549, |
|
"eval_accuracy": 0.9844307149161519, |
|
"eval_f1": 0.6058235549760974, |
|
"eval_loss": 0.08827093243598938, |
|
"eval_precision": 0.6050347222222222, |
|
"eval_recall": 0.6066144473455178, |
|
"eval_runtime": 7.3941, |
|
"eval_samples_per_second": 32.458, |
|
"eval_steps_per_second": 16.229, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 6.338028169014084, |
|
"grad_norm": 2.909140110015869, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0516, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.338028169014084, |
|
"eval_accuracy": 0.9840894380700206, |
|
"eval_f1": 0.5963083604777416, |
|
"eval_loss": 0.09003057330846786, |
|
"eval_precision": 0.5951452102297355, |
|
"eval_recall": 0.5974760661444735, |
|
"eval_runtime": 7.3583, |
|
"eval_samples_per_second": 32.616, |
|
"eval_steps_per_second": 16.308, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.47887323943662, |
|
"eval_accuracy": 0.9842541924095323, |
|
"eval_f1": 0.5947252747252747, |
|
"eval_loss": 0.08872821182012558, |
|
"eval_precision": 0.6007992895204263, |
|
"eval_recall": 0.5887728459530026, |
|
"eval_runtime": 7.2679, |
|
"eval_samples_per_second": 33.022, |
|
"eval_steps_per_second": 16.511, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 6.619718309859155, |
|
"eval_accuracy": 0.9841835834068844, |
|
"eval_f1": 0.6222998036220816, |
|
"eval_loss": 0.08372443169355392, |
|
"eval_precision": 0.624070021881838, |
|
"eval_recall": 0.6205395996518712, |
|
"eval_runtime": 7.4046, |
|
"eval_samples_per_second": 32.412, |
|
"eval_steps_per_second": 16.206, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 6.76056338028169, |
|
"eval_accuracy": 0.9844777875845837, |
|
"eval_f1": 0.6397756686798964, |
|
"eval_loss": 0.08945748209953308, |
|
"eval_precision": 0.6343028229255774, |
|
"eval_recall": 0.6453437771975631, |
|
"eval_runtime": 7.3826, |
|
"eval_samples_per_second": 32.509, |
|
"eval_steps_per_second": 16.254, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.901408450704225, |
|
"eval_accuracy": 0.9840541335686966, |
|
"eval_f1": 0.6284970722186076, |
|
"eval_loss": 0.08736680448055267, |
|
"eval_precision": 0.6264591439688716, |
|
"eval_recall": 0.6305483028720626, |
|
"eval_runtime": 7.3836, |
|
"eval_samples_per_second": 32.505, |
|
"eval_steps_per_second": 16.252, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 7.042253521126761, |
|
"grad_norm": 0.0029603431466966867, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.0494, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.042253521126761, |
|
"eval_accuracy": 0.9837481612238894, |
|
"eval_f1": 0.6307495741056218, |
|
"eval_loss": 0.08429873734712601, |
|
"eval_precision": 0.6175979983319433, |
|
"eval_recall": 0.644473455178416, |
|
"eval_runtime": 7.4187, |
|
"eval_samples_per_second": 32.351, |
|
"eval_steps_per_second": 16.175, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.183098591549296, |
|
"eval_accuracy": 0.9844542512503678, |
|
"eval_f1": 0.6298800436205015, |
|
"eval_loss": 0.0860215276479721, |
|
"eval_precision": 0.6313948404022737, |
|
"eval_recall": 0.6283724978241949, |
|
"eval_runtime": 7.3007, |
|
"eval_samples_per_second": 32.874, |
|
"eval_steps_per_second": 16.437, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 7.323943661971831, |
|
"eval_accuracy": 0.9841835834068844, |
|
"eval_f1": 0.6415256053139062, |
|
"eval_loss": 0.08662400394678116, |
|
"eval_precision": 0.6319121992401857, |
|
"eval_recall": 0.6514360313315927, |
|
"eval_runtime": 7.2401, |
|
"eval_samples_per_second": 33.149, |
|
"eval_steps_per_second": 16.574, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 7.464788732394366, |
|
"eval_accuracy": 0.9846072374227715, |
|
"eval_f1": 0.6500537056928034, |
|
"eval_loss": 0.08784898370504379, |
|
"eval_precision": 0.6419176919813322, |
|
"eval_recall": 0.6583986074847694, |
|
"eval_runtime": 7.4104, |
|
"eval_samples_per_second": 32.387, |
|
"eval_steps_per_second": 16.193, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 7.605633802816901, |
|
"eval_accuracy": 0.9842541924095323, |
|
"eval_f1": 0.6414596614640581, |
|
"eval_loss": 0.09084232151508331, |
|
"eval_precision": 0.6481563749444691, |
|
"eval_recall": 0.6348999129677981, |
|
"eval_runtime": 7.3975, |
|
"eval_samples_per_second": 32.443, |
|
"eval_steps_per_second": 16.222, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 7.746478873239437, |
|
"grad_norm": 0.0023182749282568693, |
|
"learning_rate": 3.88888888888889e-06, |
|
"loss": 0.0471, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.746478873239437, |
|
"eval_accuracy": 0.9840188290673727, |
|
"eval_f1": 0.6452567082188886, |
|
"eval_loss": 0.08999249339103699, |
|
"eval_precision": 0.6271047227926078, |
|
"eval_recall": 0.664490861618799, |
|
"eval_runtime": 7.3891, |
|
"eval_samples_per_second": 32.48, |
|
"eval_steps_per_second": 16.24, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.887323943661972, |
|
"eval_accuracy": 0.9845601647543395, |
|
"eval_f1": 0.6460348162475823, |
|
"eval_loss": 0.08533038198947906, |
|
"eval_precision": 0.6382165605095541, |
|
"eval_recall": 0.6540469973890339, |
|
"eval_runtime": 7.4541, |
|
"eval_samples_per_second": 32.197, |
|
"eval_steps_per_second": 16.099, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 8.028169014084508, |
|
"eval_accuracy": 0.9839835245660489, |
|
"eval_f1": 0.6473011055712118, |
|
"eval_loss": 0.08411037921905518, |
|
"eval_precision": 0.6449244060475162, |
|
"eval_recall": 0.6496953872932986, |
|
"eval_runtime": 7.2873, |
|
"eval_samples_per_second": 32.934, |
|
"eval_steps_per_second": 16.467, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 8.169014084507042, |
|
"eval_accuracy": 0.9845837010885554, |
|
"eval_f1": 0.6499032882011605, |
|
"eval_loss": 0.08550503104925156, |
|
"eval_precision": 0.6420382165605095, |
|
"eval_recall": 0.6579634464751958, |
|
"eval_runtime": 7.4063, |
|
"eval_samples_per_second": 32.405, |
|
"eval_steps_per_second": 16.203, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 8.309859154929578, |
|
"eval_accuracy": 0.9845719329214475, |
|
"eval_f1": 0.6492411467116358, |
|
"eval_loss": 0.08611707389354706, |
|
"eval_precision": 0.6295993458708095, |
|
"eval_recall": 0.6701479547432551, |
|
"eval_runtime": 7.4327, |
|
"eval_samples_per_second": 32.29, |
|
"eval_steps_per_second": 16.145, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 8.450704225352112, |
|
"grad_norm": 1.646479606628418, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0372, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.450704225352112, |
|
"eval_accuracy": 0.9843365695792881, |
|
"eval_f1": 0.6446173800259403, |
|
"eval_loss": 0.08737049251794815, |
|
"eval_precision": 0.6404639175257731, |
|
"eval_recall": 0.6488250652741514, |
|
"eval_runtime": 7.382, |
|
"eval_samples_per_second": 32.511, |
|
"eval_steps_per_second": 16.256, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.591549295774648, |
|
"eval_accuracy": 0.9844424830832598, |
|
"eval_f1": 0.6449305847707194, |
|
"eval_loss": 0.08426456153392792, |
|
"eval_precision": 0.624185667752443, |
|
"eval_recall": 0.6671018276762402, |
|
"eval_runtime": 7.3893, |
|
"eval_samples_per_second": 32.479, |
|
"eval_steps_per_second": 16.24, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 8.732394366197184, |
|
"eval_accuracy": 0.9846778464254192, |
|
"eval_f1": 0.6556962025316455, |
|
"eval_loss": 0.08480408787727356, |
|
"eval_precision": 0.6363636363636364, |
|
"eval_recall": 0.6762402088772846, |
|
"eval_runtime": 7.2354, |
|
"eval_samples_per_second": 33.17, |
|
"eval_steps_per_second": 16.585, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 8.873239436619718, |
|
"eval_accuracy": 0.9845601647543395, |
|
"eval_f1": 0.6470588235294118, |
|
"eval_loss": 0.08321469277143478, |
|
"eval_precision": 0.6385593220338983, |
|
"eval_recall": 0.6557876414273281, |
|
"eval_runtime": 7.4183, |
|
"eval_samples_per_second": 32.352, |
|
"eval_steps_per_second": 16.176, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 9.014084507042254, |
|
"eval_accuracy": 0.9850779641070904, |
|
"eval_f1": 0.6635009712928988, |
|
"eval_loss": 0.08342073112726212, |
|
"eval_precision": 0.6582441113490364, |
|
"eval_recall": 0.6688424717145344, |
|
"eval_runtime": 7.3864, |
|
"eval_samples_per_second": 32.492, |
|
"eval_steps_per_second": 16.246, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 9.154929577464788, |
|
"grad_norm": 2.369316339492798, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.0376, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 9.154929577464788, |
|
"eval_accuracy": 0.9844777875845837, |
|
"eval_f1": 0.6438267614738203, |
|
"eval_loss": 0.08599114418029785, |
|
"eval_precision": 0.6376440460947503, |
|
"eval_recall": 0.6501305483028721, |
|
"eval_runtime": 7.4336, |
|
"eval_samples_per_second": 32.286, |
|
"eval_steps_per_second": 16.143, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 9.295774647887324, |
|
"eval_accuracy": 0.9851838776110621, |
|
"eval_f1": 0.6749455337690632, |
|
"eval_loss": 0.0860476940870285, |
|
"eval_precision": 0.6758289703315882, |
|
"eval_recall": 0.6740644038294169, |
|
"eval_runtime": 7.3997, |
|
"eval_samples_per_second": 32.434, |
|
"eval_steps_per_second": 16.217, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 9.43661971830986, |
|
"eval_accuracy": 0.9851132686084142, |
|
"eval_f1": 0.6611642050390965, |
|
"eval_loss": 0.08599450439214706, |
|
"eval_precision": 0.6600173460537727, |
|
"eval_recall": 0.6623150565709313, |
|
"eval_runtime": 7.2459, |
|
"eval_samples_per_second": 33.122, |
|
"eval_steps_per_second": 16.561, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 9.577464788732394, |
|
"eval_accuracy": 0.9855486907914093, |
|
"eval_f1": 0.661723009814613, |
|
"eval_loss": 0.0859636440873146, |
|
"eval_precision": 0.6633143856580673, |
|
"eval_recall": 0.6601392515230635, |
|
"eval_runtime": 7.2743, |
|
"eval_samples_per_second": 32.993, |
|
"eval_steps_per_second": 16.496, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 9.71830985915493, |
|
"eval_accuracy": 0.985642836128273, |
|
"eval_f1": 0.6686567164179105, |
|
"eval_loss": 0.08458743244409561, |
|
"eval_precision": 0.6555183946488294, |
|
"eval_recall": 0.6823324630113142, |
|
"eval_runtime": 7.4204, |
|
"eval_samples_per_second": 32.343, |
|
"eval_steps_per_second": 16.172, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 9.859154929577464, |
|
"grad_norm": 0.0017794760642573237, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.0352, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.859154929577464, |
|
"eval_accuracy": 0.9853250956163577, |
|
"eval_f1": 0.6703601108033241, |
|
"eval_loss": 0.08406654000282288, |
|
"eval_precision": 0.6567849686847599, |
|
"eval_recall": 0.6845082680591819, |
|
"eval_runtime": 7.4608, |
|
"eval_samples_per_second": 32.168, |
|
"eval_steps_per_second": 16.084, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9856310679611651, |
|
"eval_f1": 0.6696619597774925, |
|
"eval_loss": 0.08345002681016922, |
|
"eval_precision": 0.6586700336700336, |
|
"eval_recall": 0.6810269799825935, |
|
"eval_runtime": 7.4149, |
|
"eval_samples_per_second": 32.367, |
|
"eval_steps_per_second": 16.184, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 10.140845070422536, |
|
"eval_accuracy": 0.9852427184466019, |
|
"eval_f1": 0.6715548846236791, |
|
"eval_loss": 0.0879812017083168, |
|
"eval_precision": 0.6656690893544249, |
|
"eval_recall": 0.6775456919060052, |
|
"eval_runtime": 7.4442, |
|
"eval_samples_per_second": 32.24, |
|
"eval_steps_per_second": 16.12, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 10.28169014084507, |
|
"eval_accuracy": 0.9859723448072962, |
|
"eval_f1": 0.6685272649205668, |
|
"eval_loss": 0.08136157691478729, |
|
"eval_precision": 0.6597457627118644, |
|
"eval_recall": 0.6775456919060052, |
|
"eval_runtime": 7.376, |
|
"eval_samples_per_second": 32.538, |
|
"eval_steps_per_second": 16.269, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 10.422535211267606, |
|
"eval_accuracy": 0.9856781406295969, |
|
"eval_f1": 0.6621363346782757, |
|
"eval_loss": 0.08381623774766922, |
|
"eval_precision": 0.646619659892161, |
|
"eval_recall": 0.6784160139251523, |
|
"eval_runtime": 7.4024, |
|
"eval_samples_per_second": 32.422, |
|
"eval_steps_per_second": 16.211, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 10.56338028169014, |
|
"grad_norm": 7.505336761474609, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.0316, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 10.56338028169014, |
|
"eval_accuracy": 0.9855722271256252, |
|
"eval_f1": 0.6696619597774925, |
|
"eval_loss": 0.08473847806453705, |
|
"eval_precision": 0.6586700336700336, |
|
"eval_recall": 0.6810269799825935, |
|
"eval_runtime": 7.4244, |
|
"eval_samples_per_second": 32.326, |
|
"eval_steps_per_second": 16.163, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 10.704225352112676, |
|
"eval_accuracy": 0.9857252132980289, |
|
"eval_f1": 0.6789620416041175, |
|
"eval_loss": 0.08409886062145233, |
|
"eval_precision": 0.6693446088794927, |
|
"eval_recall": 0.6888598781549173, |
|
"eval_runtime": 7.4216, |
|
"eval_samples_per_second": 32.338, |
|
"eval_steps_per_second": 16.169, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 10.845070422535212, |
|
"eval_accuracy": 0.9858546631362165, |
|
"eval_f1": 0.6777609682299546, |
|
"eval_loss": 0.08510851860046387, |
|
"eval_precision": 0.6732503220266208, |
|
"eval_recall": 0.6823324630113142, |
|
"eval_runtime": 7.4269, |
|
"eval_samples_per_second": 32.315, |
|
"eval_steps_per_second": 16.157, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 10.985915492957746, |
|
"eval_accuracy": 0.985219182112386, |
|
"eval_f1": 0.663824344489448, |
|
"eval_loss": 0.08504322916269302, |
|
"eval_precision": 0.6506477225240285, |
|
"eval_recall": 0.6775456919060052, |
|
"eval_runtime": 7.297, |
|
"eval_samples_per_second": 32.89, |
|
"eval_steps_per_second": 16.445, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 11.126760563380282, |
|
"eval_accuracy": 0.9855133862900853, |
|
"eval_f1": 0.6787174521196471, |
|
"eval_loss": 0.08639871329069138, |
|
"eval_precision": 0.6713495104299702, |
|
"eval_recall": 0.6862489120974761, |
|
"eval_runtime": 7.3134, |
|
"eval_samples_per_second": 32.817, |
|
"eval_steps_per_second": 16.408, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 11.267605633802816, |
|
"grad_norm": 0.0015932625392451882, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.0323, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 11.267605633802816, |
|
"eval_accuracy": 0.9859605766401883, |
|
"eval_f1": 0.671434763762462, |
|
"eval_loss": 0.08427204191684723, |
|
"eval_precision": 0.668825561312608, |
|
"eval_recall": 0.6740644038294169, |
|
"eval_runtime": 7.4224, |
|
"eval_samples_per_second": 32.335, |
|
"eval_steps_per_second": 16.167, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 11.408450704225352, |
|
"eval_accuracy": 0.9853368637834657, |
|
"eval_f1": 0.6679487179487179, |
|
"eval_loss": 0.08620592951774597, |
|
"eval_precision": 0.6561712846347607, |
|
"eval_recall": 0.6801566579634465, |
|
"eval_runtime": 7.52, |
|
"eval_samples_per_second": 31.915, |
|
"eval_steps_per_second": 15.957, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 11.549295774647888, |
|
"eval_accuracy": 0.9859723448072962, |
|
"eval_f1": 0.6735661923242777, |
|
"eval_loss": 0.08543656021356583, |
|
"eval_precision": 0.6675213675213675, |
|
"eval_recall": 0.679721496953873, |
|
"eval_runtime": 7.4397, |
|
"eval_samples_per_second": 32.259, |
|
"eval_steps_per_second": 16.13, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 11.690140845070422, |
|
"eval_accuracy": 0.986090026478376, |
|
"eval_f1": 0.679860809047412, |
|
"eval_loss": 0.0849027931690216, |
|
"eval_precision": 0.6795652173913044, |
|
"eval_recall": 0.6801566579634465, |
|
"eval_runtime": 7.4695, |
|
"eval_samples_per_second": 32.131, |
|
"eval_steps_per_second": 16.065, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 11.830985915492958, |
|
"eval_accuracy": 0.9858899676375404, |
|
"eval_f1": 0.6754684838160135, |
|
"eval_loss": 0.08420813083648682, |
|
"eval_precision": 0.6613844870725605, |
|
"eval_recall": 0.6901653611836379, |
|
"eval_runtime": 7.2972, |
|
"eval_samples_per_second": 32.889, |
|
"eval_steps_per_second": 16.445, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 11.971830985915492, |
|
"grad_norm": 4.900869369506836, |
|
"learning_rate": 5.555555555555555e-07, |
|
"loss": 0.0307, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 11.971830985915492, |
|
"eval_accuracy": 0.9861488673139158, |
|
"eval_f1": 0.6820038701354548, |
|
"eval_loss": 0.08459390699863434, |
|
"eval_precision": 0.6740331491712708, |
|
"eval_recall": 0.6901653611836379, |
|
"eval_runtime": 7.4228, |
|
"eval_samples_per_second": 32.333, |
|
"eval_steps_per_second": 16.166, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 12.112676056338028, |
|
"eval_accuracy": 0.9863371579876434, |
|
"eval_f1": 0.6822409690677049, |
|
"eval_loss": 0.08515108376741409, |
|
"eval_precision": 0.6782795698924731, |
|
"eval_recall": 0.6862489120974761, |
|
"eval_runtime": 7.4273, |
|
"eval_samples_per_second": 32.313, |
|
"eval_steps_per_second": 16.157, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 12.253521126760564, |
|
"eval_accuracy": 0.9860076493086202, |
|
"eval_f1": 0.6782646048109965, |
|
"eval_loss": 0.08560565114021301, |
|
"eval_precision": 0.6696352841391009, |
|
"eval_recall": 0.6871192341166231, |
|
"eval_runtime": 7.5069, |
|
"eval_samples_per_second": 31.97, |
|
"eval_steps_per_second": 15.985, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 12.394366197183098, |
|
"eval_accuracy": 0.9859017358046485, |
|
"eval_f1": 0.6775580395528805, |
|
"eval_loss": 0.08563963323831558, |
|
"eval_precision": 0.6694987255734919, |
|
"eval_recall": 0.6858137510879025, |
|
"eval_runtime": 7.5078, |
|
"eval_samples_per_second": 31.967, |
|
"eval_steps_per_second": 15.983, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 12.535211267605634, |
|
"eval_accuracy": 0.9858546631362165, |
|
"eval_f1": 0.6785791173304628, |
|
"eval_loss": 0.08600553870201111, |
|
"eval_precision": 0.6714955262036643, |
|
"eval_recall": 0.6858137510879025, |
|
"eval_runtime": 7.3022, |
|
"eval_samples_per_second": 32.867, |
|
"eval_steps_per_second": 16.433, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 12.676056338028168, |
|
"grad_norm": 0.2440214604139328, |
|
"learning_rate": 0.0, |
|
"loss": 0.028, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 12.676056338028168, |
|
"eval_accuracy": 0.9859252721388644, |
|
"eval_f1": 0.6791559000861327, |
|
"eval_loss": 0.08589298278093338, |
|
"eval_precision": 0.6722080136402387, |
|
"eval_recall": 0.6862489120974761, |
|
"eval_runtime": 7.3893, |
|
"eval_samples_per_second": 32.479, |
|
"eval_steps_per_second": 16.24, |
|
"step": 9000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 9000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 13, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4780487614464000.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|