|
{ |
|
"best_metric": 0.0018136479193344712, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.03170577045022194, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0001585288522511097, |
|
"grad_norm": 12.169403076171875, |
|
"learning_rate": 1.013e-05, |
|
"loss": 4.2706, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0001585288522511097, |
|
"eval_loss": 4.04376220703125, |
|
"eval_runtime": 284.7456, |
|
"eval_samples_per_second": 9.328, |
|
"eval_steps_per_second": 2.332, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0003170577045022194, |
|
"grad_norm": 16.00709342956543, |
|
"learning_rate": 2.026e-05, |
|
"loss": 3.9804, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0004755865567533291, |
|
"grad_norm": 15.737310409545898, |
|
"learning_rate": 3.039e-05, |
|
"loss": 3.9673, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0006341154090044388, |
|
"grad_norm": 14.818962097167969, |
|
"learning_rate": 4.052e-05, |
|
"loss": 3.2126, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0007926442612555486, |
|
"grad_norm": 10.428650856018066, |
|
"learning_rate": 5.065e-05, |
|
"loss": 2.0077, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0009511731135066582, |
|
"grad_norm": 12.499900817871094, |
|
"learning_rate": 6.078e-05, |
|
"loss": 1.2136, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0011097019657577679, |
|
"grad_norm": 10.663622856140137, |
|
"learning_rate": 7.091e-05, |
|
"loss": 0.3359, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0012682308180088776, |
|
"grad_norm": 46.17512512207031, |
|
"learning_rate": 8.104e-05, |
|
"loss": 0.412, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0014267596702599874, |
|
"grad_norm": 1.8651701211929321, |
|
"learning_rate": 9.117e-05, |
|
"loss": 0.0131, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0015852885225110971, |
|
"grad_norm": 0.16528020799160004, |
|
"learning_rate": 0.0001013, |
|
"loss": 0.0008, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0017438173747622067, |
|
"grad_norm": 0.012765788473188877, |
|
"learning_rate": 0.00010076684210526316, |
|
"loss": 0.0001, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0019023462270133164, |
|
"grad_norm": 0.013106022030115128, |
|
"learning_rate": 0.0001002336842105263, |
|
"loss": 0.0001, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.002060875079264426, |
|
"grad_norm": 0.024374201893806458, |
|
"learning_rate": 9.970052631578946e-05, |
|
"loss": 0.0001, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0022194039315155357, |
|
"grad_norm": 0.021921832114458084, |
|
"learning_rate": 9.916736842105263e-05, |
|
"loss": 0.0001, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0023779327837666455, |
|
"grad_norm": 0.04089897498488426, |
|
"learning_rate": 9.863421052631579e-05, |
|
"loss": 0.0001, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0025364616360177552, |
|
"grad_norm": 0.021697349846363068, |
|
"learning_rate": 9.810105263157895e-05, |
|
"loss": 0.0001, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.002694990488268865, |
|
"grad_norm": 0.0019667658489197493, |
|
"learning_rate": 9.756789473684211e-05, |
|
"loss": 0.0, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0028535193405199747, |
|
"grad_norm": 0.0005555606330744922, |
|
"learning_rate": 9.703473684210525e-05, |
|
"loss": 0.0, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0030120481927710845, |
|
"grad_norm": 0.0005540283164009452, |
|
"learning_rate": 9.650157894736842e-05, |
|
"loss": 0.0, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0031705770450221942, |
|
"grad_norm": 0.0006892455858178437, |
|
"learning_rate": 9.596842105263158e-05, |
|
"loss": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0033291058972733036, |
|
"grad_norm": 0.0022405856288969517, |
|
"learning_rate": 9.543526315789474e-05, |
|
"loss": 0.0, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0034876347495244133, |
|
"grad_norm": 0.0009618853800930083, |
|
"learning_rate": 9.49021052631579e-05, |
|
"loss": 0.0, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.003646163601775523, |
|
"grad_norm": 0.0009560861508361995, |
|
"learning_rate": 9.436894736842105e-05, |
|
"loss": 0.0, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.003804692454026633, |
|
"grad_norm": 0.0029522525146603584, |
|
"learning_rate": 9.38357894736842e-05, |
|
"loss": 0.0, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.003963221306277742, |
|
"grad_norm": 0.0008082777494564652, |
|
"learning_rate": 9.330263157894737e-05, |
|
"loss": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.004121750158528852, |
|
"grad_norm": 0.0015600634505972266, |
|
"learning_rate": 9.276947368421051e-05, |
|
"loss": 0.0, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.004280279010779962, |
|
"grad_norm": 0.0023957917001098394, |
|
"learning_rate": 9.223631578947369e-05, |
|
"loss": 0.0, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.004438807863031071, |
|
"grad_norm": 0.0004076052864547819, |
|
"learning_rate": 9.170315789473684e-05, |
|
"loss": 0.0, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.004597336715282181, |
|
"grad_norm": 0.00036426776205189526, |
|
"learning_rate": 9.117e-05, |
|
"loss": 0.0, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.004755865567533291, |
|
"grad_norm": 0.0003226393018849194, |
|
"learning_rate": 9.063684210526316e-05, |
|
"loss": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.004914394419784401, |
|
"grad_norm": 0.0004357333527877927, |
|
"learning_rate": 9.010368421052632e-05, |
|
"loss": 0.0, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0050729232720355105, |
|
"grad_norm": 0.00029580152477137744, |
|
"learning_rate": 8.957052631578946e-05, |
|
"loss": 0.0, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.00523145212428662, |
|
"grad_norm": 0.00045763631351292133, |
|
"learning_rate": 8.903736842105263e-05, |
|
"loss": 0.0, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.00538998097653773, |
|
"grad_norm": 0.0003399694978725165, |
|
"learning_rate": 8.850421052631579e-05, |
|
"loss": 0.0, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.00554850982878884, |
|
"grad_norm": 0.0003934537817258388, |
|
"learning_rate": 8.797105263157895e-05, |
|
"loss": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0057070386810399495, |
|
"grad_norm": 0.0005533421644940972, |
|
"learning_rate": 8.743789473684211e-05, |
|
"loss": 0.0, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.005865567533291059, |
|
"grad_norm": 0.002100046258419752, |
|
"learning_rate": 8.690473684210526e-05, |
|
"loss": 0.0, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.006024096385542169, |
|
"grad_norm": 0.0006316175567917526, |
|
"learning_rate": 8.637157894736842e-05, |
|
"loss": 0.0, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.006182625237793279, |
|
"grad_norm": 0.0004144099948462099, |
|
"learning_rate": 8.583842105263158e-05, |
|
"loss": 0.0, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0063411540900443885, |
|
"grad_norm": 0.002568572061136365, |
|
"learning_rate": 8.530526315789472e-05, |
|
"loss": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.006499682942295497, |
|
"grad_norm": 0.0009425367461517453, |
|
"learning_rate": 8.47721052631579e-05, |
|
"loss": 0.0, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.006658211794546607, |
|
"grad_norm": 0.00028256585937924683, |
|
"learning_rate": 8.423894736842105e-05, |
|
"loss": 0.0, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.006816740646797717, |
|
"grad_norm": 0.000341152714099735, |
|
"learning_rate": 8.37057894736842e-05, |
|
"loss": 0.0, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.006975269499048827, |
|
"grad_norm": 0.00031369487987831235, |
|
"learning_rate": 8.317263157894737e-05, |
|
"loss": 0.0, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.007133798351299936, |
|
"grad_norm": 0.00039442809065803885, |
|
"learning_rate": 8.263947368421053e-05, |
|
"loss": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.007292327203551046, |
|
"grad_norm": 0.00029302932671271265, |
|
"learning_rate": 8.210631578947368e-05, |
|
"loss": 0.0, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.007450856055802156, |
|
"grad_norm": 0.0003696536587085575, |
|
"learning_rate": 8.157315789473684e-05, |
|
"loss": 0.0, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.007609384908053266, |
|
"grad_norm": 0.00028774861129932106, |
|
"learning_rate": 8.104e-05, |
|
"loss": 0.0, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0077679137603043754, |
|
"grad_norm": 0.00027515244437381625, |
|
"learning_rate": 8.050684210526316e-05, |
|
"loss": 0.0, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.007926442612555484, |
|
"grad_norm": 0.000243888032855466, |
|
"learning_rate": 7.997368421052632e-05, |
|
"loss": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.007926442612555484, |
|
"eval_loss": 0.002942207735031843, |
|
"eval_runtime": 284.8103, |
|
"eval_samples_per_second": 9.326, |
|
"eval_steps_per_second": 2.331, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.008084971464806594, |
|
"grad_norm": 9.338234901428223, |
|
"learning_rate": 7.944052631578947e-05, |
|
"loss": 0.3261, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.008243500317057704, |
|
"grad_norm": 6.803124415455386e-05, |
|
"learning_rate": 7.890736842105263e-05, |
|
"loss": 0.0, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.008402029169308814, |
|
"grad_norm": 0.00013048792607150972, |
|
"learning_rate": 7.837421052631579e-05, |
|
"loss": 0.0, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.008560558021559923, |
|
"grad_norm": 0.0003305167774669826, |
|
"learning_rate": 7.784105263157893e-05, |
|
"loss": 0.0, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.008719086873811033, |
|
"grad_norm": 0.0002505861921235919, |
|
"learning_rate": 7.730789473684211e-05, |
|
"loss": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.008877615726062143, |
|
"grad_norm": 0.0005144188180565834, |
|
"learning_rate": 7.677473684210526e-05, |
|
"loss": 0.0, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.009036144578313253, |
|
"grad_norm": 0.0003912892425432801, |
|
"learning_rate": 7.624157894736842e-05, |
|
"loss": 0.0, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.009194673430564362, |
|
"grad_norm": 0.0003942723269574344, |
|
"learning_rate": 7.570842105263158e-05, |
|
"loss": 0.0, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.009353202282815472, |
|
"grad_norm": 0.0006460827426053584, |
|
"learning_rate": 7.517526315789474e-05, |
|
"loss": 0.0, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.009511731135066582, |
|
"grad_norm": 0.0014212304959073663, |
|
"learning_rate": 7.464210526315789e-05, |
|
"loss": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.009670259987317692, |
|
"grad_norm": 0.0020509830210357904, |
|
"learning_rate": 7.410894736842106e-05, |
|
"loss": 0.0, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.009828788839568801, |
|
"grad_norm": 0.003535072784870863, |
|
"learning_rate": 7.35757894736842e-05, |
|
"loss": 0.0, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.009987317691819911, |
|
"grad_norm": 0.0031283413991332054, |
|
"learning_rate": 7.304263157894737e-05, |
|
"loss": 0.0, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.010145846544071021, |
|
"grad_norm": 0.004257969092577696, |
|
"learning_rate": 7.250947368421053e-05, |
|
"loss": 0.0, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.01030437539632213, |
|
"grad_norm": 0.0008224630146287382, |
|
"learning_rate": 7.197631578947368e-05, |
|
"loss": 0.0, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.01046290424857324, |
|
"grad_norm": 0.00048312422586604953, |
|
"learning_rate": 7.144315789473684e-05, |
|
"loss": 0.0, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.01062143310082435, |
|
"grad_norm": 0.0006397454999387264, |
|
"learning_rate": 7.091e-05, |
|
"loss": 0.0, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.01077996195307546, |
|
"grad_norm": 0.0005098761757835746, |
|
"learning_rate": 7.037684210526316e-05, |
|
"loss": 0.0, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.01093849080532657, |
|
"grad_norm": 0.0004371613613329828, |
|
"learning_rate": 6.984368421052632e-05, |
|
"loss": 0.0, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.01109701965757768, |
|
"grad_norm": 0.0003893864050041884, |
|
"learning_rate": 6.931052631578947e-05, |
|
"loss": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01125554850982879, |
|
"grad_norm": 0.000385530962375924, |
|
"learning_rate": 6.877736842105263e-05, |
|
"loss": 0.0, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.011414077362079899, |
|
"grad_norm": 0.0004044200468342751, |
|
"learning_rate": 6.824421052631579e-05, |
|
"loss": 0.0, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.011572606214331009, |
|
"grad_norm": 0.0003012254892382771, |
|
"learning_rate": 6.771105263157895e-05, |
|
"loss": 0.0, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.011731135066582118, |
|
"grad_norm": 0.0002749539853539318, |
|
"learning_rate": 6.71778947368421e-05, |
|
"loss": 0.0, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.011889663918833228, |
|
"grad_norm": 0.00024228697293438017, |
|
"learning_rate": 6.664473684210527e-05, |
|
"loss": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.012048192771084338, |
|
"grad_norm": 0.00025467583327554166, |
|
"learning_rate": 6.611157894736842e-05, |
|
"loss": 0.0, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.012206721623335448, |
|
"grad_norm": 0.00024287324049510062, |
|
"learning_rate": 6.557842105263158e-05, |
|
"loss": 0.0, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.012365250475586557, |
|
"grad_norm": 0.0002723891520872712, |
|
"learning_rate": 6.504526315789474e-05, |
|
"loss": 0.0, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.012523779327837667, |
|
"grad_norm": 0.0002199763839598745, |
|
"learning_rate": 6.451210526315789e-05, |
|
"loss": 0.0, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.012682308180088777, |
|
"grad_norm": 0.0002384045801591128, |
|
"learning_rate": 6.397894736842105e-05, |
|
"loss": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.012840837032339885, |
|
"grad_norm": 0.00017316907178610563, |
|
"learning_rate": 6.344578947368421e-05, |
|
"loss": 0.0, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.012999365884590995, |
|
"grad_norm": 0.00017045978165697306, |
|
"learning_rate": 6.291263157894737e-05, |
|
"loss": 0.0, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.013157894736842105, |
|
"grad_norm": 0.00020900469098705798, |
|
"learning_rate": 6.237947368421053e-05, |
|
"loss": 0.0, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.013316423589093214, |
|
"grad_norm": 0.00019530224381014705, |
|
"learning_rate": 6.184631578947368e-05, |
|
"loss": 0.0, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.013474952441344324, |
|
"grad_norm": 0.00019037234596908092, |
|
"learning_rate": 6.131315789473684e-05, |
|
"loss": 0.0, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.013633481293595434, |
|
"grad_norm": 0.00016165118722710758, |
|
"learning_rate": 6.078e-05, |
|
"loss": 0.0, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.013792010145846544, |
|
"grad_norm": 0.00014685910718981177, |
|
"learning_rate": 6.024684210526315e-05, |
|
"loss": 0.0, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.013950538998097653, |
|
"grad_norm": 0.00014423737593460828, |
|
"learning_rate": 5.9713684210526305e-05, |
|
"loss": 0.0, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.014109067850348763, |
|
"grad_norm": 0.00018514647672418505, |
|
"learning_rate": 5.918052631578947e-05, |
|
"loss": 0.0, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.014267596702599873, |
|
"grad_norm": 0.00020832290465477854, |
|
"learning_rate": 5.8647368421052634e-05, |
|
"loss": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.014426125554850983, |
|
"grad_norm": 0.0001367575750919059, |
|
"learning_rate": 5.811421052631579e-05, |
|
"loss": 0.0, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.014584654407102092, |
|
"grad_norm": 0.00012530827370937914, |
|
"learning_rate": 5.758105263157894e-05, |
|
"loss": 0.0, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.014743183259353202, |
|
"grad_norm": 0.0001300648000324145, |
|
"learning_rate": 5.70478947368421e-05, |
|
"loss": 0.0, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.014901712111604312, |
|
"grad_norm": 0.00019942222570534796, |
|
"learning_rate": 5.6514736842105256e-05, |
|
"loss": 0.0, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.015060240963855422, |
|
"grad_norm": 0.00024527753703296185, |
|
"learning_rate": 5.5981578947368424e-05, |
|
"loss": 0.0, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.015218769816106531, |
|
"grad_norm": 0.00016720422718208283, |
|
"learning_rate": 5.544842105263158e-05, |
|
"loss": 0.0, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.015377298668357641, |
|
"grad_norm": 0.00019603196415118873, |
|
"learning_rate": 5.491526315789474e-05, |
|
"loss": 0.0, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.015535827520608751, |
|
"grad_norm": 0.0002464319404680282, |
|
"learning_rate": 5.438210526315789e-05, |
|
"loss": 0.0, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.01569435637285986, |
|
"grad_norm": 0.00019614743359852582, |
|
"learning_rate": 5.384894736842105e-05, |
|
"loss": 0.0, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.01585288522511097, |
|
"grad_norm": 0.00017342373030260205, |
|
"learning_rate": 5.331578947368421e-05, |
|
"loss": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01585288522511097, |
|
"eval_loss": 0.0025951999705284834, |
|
"eval_runtime": 284.9449, |
|
"eval_samples_per_second": 9.321, |
|
"eval_steps_per_second": 2.33, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01601141407736208, |
|
"grad_norm": 0.0001433816651115194, |
|
"learning_rate": 5.278263157894736e-05, |
|
"loss": 0.0, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.016169942929613188, |
|
"grad_norm": 0.00011838250065920874, |
|
"learning_rate": 5.224947368421053e-05, |
|
"loss": 0.0, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0163284717818643, |
|
"grad_norm": 0.00010393361299065873, |
|
"learning_rate": 5.171631578947368e-05, |
|
"loss": 0.0, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.016487000634115408, |
|
"grad_norm": 9.349365427624434e-05, |
|
"learning_rate": 5.1183157894736844e-05, |
|
"loss": 0.0, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.01664552948636652, |
|
"grad_norm": 9.53435810515657e-05, |
|
"learning_rate": 5.065e-05, |
|
"loss": 0.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.016804058338617627, |
|
"grad_norm": 8.479709504172206e-05, |
|
"learning_rate": 5.011684210526315e-05, |
|
"loss": 0.0, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.01696258719086874, |
|
"grad_norm": 7.610375905642286e-05, |
|
"learning_rate": 4.958368421052631e-05, |
|
"loss": 0.0, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.017121116043119847, |
|
"grad_norm": 7.144361006794497e-05, |
|
"learning_rate": 4.9050526315789473e-05, |
|
"loss": 0.0, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.017279644895370958, |
|
"grad_norm": 6.864387250971049e-05, |
|
"learning_rate": 4.851736842105263e-05, |
|
"loss": 0.0, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.017438173747622066, |
|
"grad_norm": 7.527913840021938e-05, |
|
"learning_rate": 4.798421052631579e-05, |
|
"loss": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.017596702599873178, |
|
"grad_norm": 7.126829586923122e-05, |
|
"learning_rate": 4.745105263157895e-05, |
|
"loss": 0.0, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.017755231452124286, |
|
"grad_norm": 8.467756561003625e-05, |
|
"learning_rate": 4.69178947368421e-05, |
|
"loss": 0.0, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.017913760304375397, |
|
"grad_norm": 7.283923332579434e-05, |
|
"learning_rate": 4.638473684210526e-05, |
|
"loss": 0.0, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.018072289156626505, |
|
"grad_norm": 7.065037789288908e-05, |
|
"learning_rate": 4.585157894736842e-05, |
|
"loss": 0.0, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.018230818008877617, |
|
"grad_norm": 6.294441118370742e-05, |
|
"learning_rate": 4.531842105263158e-05, |
|
"loss": 0.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.018389346861128725, |
|
"grad_norm": 6.32048977422528e-05, |
|
"learning_rate": 4.478526315789473e-05, |
|
"loss": 0.0, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.018547875713379836, |
|
"grad_norm": 6.300484528765082e-05, |
|
"learning_rate": 4.425210526315789e-05, |
|
"loss": 0.0, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.018706404565630944, |
|
"grad_norm": 6.188850966282189e-05, |
|
"learning_rate": 4.3718947368421054e-05, |
|
"loss": 0.0, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.018864933417882056, |
|
"grad_norm": 6.136750744190067e-05, |
|
"learning_rate": 4.318578947368421e-05, |
|
"loss": 0.0, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.019023462270133164, |
|
"grad_norm": 5.3558338549919426e-05, |
|
"learning_rate": 4.265263157894736e-05, |
|
"loss": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.019181991122384275, |
|
"grad_norm": 5.407687422120944e-05, |
|
"learning_rate": 4.211947368421052e-05, |
|
"loss": 0.0, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.019340519974635383, |
|
"grad_norm": 6.248530553421006e-05, |
|
"learning_rate": 4.1586315789473684e-05, |
|
"loss": 0.0, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.019499048826886495, |
|
"grad_norm": 5.69553958484903e-05, |
|
"learning_rate": 4.105315789473684e-05, |
|
"loss": 0.0, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.019657577679137603, |
|
"grad_norm": 5.165593756828457e-05, |
|
"learning_rate": 4.052e-05, |
|
"loss": 0.0, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.019816106531388714, |
|
"grad_norm": 6.129803659860045e-05, |
|
"learning_rate": 3.998684210526316e-05, |
|
"loss": 0.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.019974635383639822, |
|
"grad_norm": 5.8346176956547424e-05, |
|
"learning_rate": 3.945368421052631e-05, |
|
"loss": 0.0, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.020133164235890934, |
|
"grad_norm": 5.828439680044539e-05, |
|
"learning_rate": 3.892052631578947e-05, |
|
"loss": 0.0, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.020291693088142042, |
|
"grad_norm": 6.158412725199014e-05, |
|
"learning_rate": 3.838736842105263e-05, |
|
"loss": 0.0, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.02045022194039315, |
|
"grad_norm": 5.588992280536331e-05, |
|
"learning_rate": 3.785421052631579e-05, |
|
"loss": 0.0, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.02060875079264426, |
|
"grad_norm": 4.7499852371402085e-05, |
|
"learning_rate": 3.732105263157894e-05, |
|
"loss": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02076727964489537, |
|
"grad_norm": 5.2001607400598004e-05, |
|
"learning_rate": 3.67878947368421e-05, |
|
"loss": 0.0, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.02092580849714648, |
|
"grad_norm": 5.1708582759601995e-05, |
|
"learning_rate": 3.6254736842105264e-05, |
|
"loss": 0.0, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.02108433734939759, |
|
"grad_norm": 4.733254172606394e-05, |
|
"learning_rate": 3.572157894736842e-05, |
|
"loss": 0.0, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.0212428662016487, |
|
"grad_norm": 5.3371717513073236e-05, |
|
"learning_rate": 3.518842105263158e-05, |
|
"loss": 0.0, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.02140139505389981, |
|
"grad_norm": 4.716423063655384e-05, |
|
"learning_rate": 3.465526315789473e-05, |
|
"loss": 0.0, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.02155992390615092, |
|
"grad_norm": 4.844993964070454e-05, |
|
"learning_rate": 3.4122105263157894e-05, |
|
"loss": 0.0, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.021718452758402028, |
|
"grad_norm": 5.031727778259665e-05, |
|
"learning_rate": 3.358894736842105e-05, |
|
"loss": 0.0, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.02187698161065314, |
|
"grad_norm": 4.5576842239825055e-05, |
|
"learning_rate": 3.305578947368421e-05, |
|
"loss": 0.0, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.022035510462904247, |
|
"grad_norm": 5.910011168452911e-05, |
|
"learning_rate": 3.252263157894737e-05, |
|
"loss": 0.0, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.02219403931515536, |
|
"grad_norm": 4.8003654228523374e-05, |
|
"learning_rate": 3.198947368421052e-05, |
|
"loss": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.022352568167406467, |
|
"grad_norm": 4.5099386625224724e-05, |
|
"learning_rate": 3.1456315789473684e-05, |
|
"loss": 0.0, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.02251109701965758, |
|
"grad_norm": 5.6082306400639936e-05, |
|
"learning_rate": 3.092315789473684e-05, |
|
"loss": 0.0, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.022669625871908686, |
|
"grad_norm": 4.4970944145461544e-05, |
|
"learning_rate": 3.039e-05, |
|
"loss": 0.0, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.022828154724159798, |
|
"grad_norm": 5.0454917072784156e-05, |
|
"learning_rate": 2.9856842105263153e-05, |
|
"loss": 0.0, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.022986683576410906, |
|
"grad_norm": 8.930585318012163e-05, |
|
"learning_rate": 2.9323684210526317e-05, |
|
"loss": 0.0, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.023145212428662017, |
|
"grad_norm": 6.606173701584339e-05, |
|
"learning_rate": 2.879052631578947e-05, |
|
"loss": 0.0, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.023303741280913125, |
|
"grad_norm": 7.71412014728412e-05, |
|
"learning_rate": 2.8257368421052628e-05, |
|
"loss": 0.0, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.023462270133164237, |
|
"grad_norm": 7.689122867304832e-05, |
|
"learning_rate": 2.772421052631579e-05, |
|
"loss": 0.0, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.023620798985415345, |
|
"grad_norm": 7.011953857727349e-05, |
|
"learning_rate": 2.7191052631578946e-05, |
|
"loss": 0.0, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.023779327837666456, |
|
"grad_norm": 8.041402179514989e-05, |
|
"learning_rate": 2.6657894736842104e-05, |
|
"loss": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.023779327837666456, |
|
"eval_loss": 0.002579666208475828, |
|
"eval_runtime": 284.7051, |
|
"eval_samples_per_second": 9.329, |
|
"eval_steps_per_second": 2.332, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.023937856689917564, |
|
"grad_norm": 4.936805248260498, |
|
"learning_rate": 2.6124736842105265e-05, |
|
"loss": 0.118, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.024096385542168676, |
|
"grad_norm": 8.385646651731804e-05, |
|
"learning_rate": 2.5591578947368422e-05, |
|
"loss": 0.0, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.024254914394419784, |
|
"grad_norm": 0.00011956329399254173, |
|
"learning_rate": 2.5058421052631576e-05, |
|
"loss": 0.0, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.024413443246670895, |
|
"grad_norm": 0.00013739150017499924, |
|
"learning_rate": 2.4525263157894737e-05, |
|
"loss": 0.0, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.024571972098922003, |
|
"grad_norm": 0.00016575689369346946, |
|
"learning_rate": 2.3992105263157894e-05, |
|
"loss": 0.0, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.024730500951173115, |
|
"grad_norm": 0.0002567583287600428, |
|
"learning_rate": 2.345894736842105e-05, |
|
"loss": 0.0, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.024889029803424223, |
|
"grad_norm": 0.0002865093993023038, |
|
"learning_rate": 2.292578947368421e-05, |
|
"loss": 0.0, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.025047558655675334, |
|
"grad_norm": 0.0004684887535404414, |
|
"learning_rate": 2.2392631578947366e-05, |
|
"loss": 0.0, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.025206087507926443, |
|
"grad_norm": 0.0003041870950255543, |
|
"learning_rate": 2.1859473684210527e-05, |
|
"loss": 0.0, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.025364616360177554, |
|
"grad_norm": 0.0004387758672237396, |
|
"learning_rate": 2.132631578947368e-05, |
|
"loss": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.025523145212428662, |
|
"grad_norm": 0.00046817571274004877, |
|
"learning_rate": 2.0793157894736842e-05, |
|
"loss": 0.0, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.02568167406467977, |
|
"grad_norm": 0.0006510078674182296, |
|
"learning_rate": 2.026e-05, |
|
"loss": 0.0, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.02584020291693088, |
|
"grad_norm": 0.00042408722219988704, |
|
"learning_rate": 1.9726842105263157e-05, |
|
"loss": 0.0, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.02599873176918199, |
|
"grad_norm": 0.0006571735139004886, |
|
"learning_rate": 1.9193684210526314e-05, |
|
"loss": 0.0, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.0261572606214331, |
|
"grad_norm": 0.0005432140314951539, |
|
"learning_rate": 1.866052631578947e-05, |
|
"loss": 0.0, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.02631578947368421, |
|
"grad_norm": 0.0005207346403039992, |
|
"learning_rate": 1.8127368421052632e-05, |
|
"loss": 0.0, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.02647431832593532, |
|
"grad_norm": 0.0007389848469756544, |
|
"learning_rate": 1.759421052631579e-05, |
|
"loss": 0.0, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.02663284717818643, |
|
"grad_norm": 0.0006487572682090104, |
|
"learning_rate": 1.7061052631578947e-05, |
|
"loss": 0.0, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.02679137603043754, |
|
"grad_norm": 0.000673374452162534, |
|
"learning_rate": 1.6527894736842104e-05, |
|
"loss": 0.0, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.026949904882688648, |
|
"grad_norm": 0.0006955991266295314, |
|
"learning_rate": 1.599473684210526e-05, |
|
"loss": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.02710843373493976, |
|
"grad_norm": 0.0007812317926436663, |
|
"learning_rate": 1.546157894736842e-05, |
|
"loss": 0.0, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.027266962587190868, |
|
"grad_norm": 0.0007272360380738974, |
|
"learning_rate": 1.4928421052631576e-05, |
|
"loss": 0.0, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.02742549143944198, |
|
"grad_norm": 0.0007635234505869448, |
|
"learning_rate": 1.4395263157894735e-05, |
|
"loss": 0.0, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.027584020291693087, |
|
"grad_norm": 0.0007117385393939912, |
|
"learning_rate": 1.3862105263157895e-05, |
|
"loss": 0.0, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.0277425491439442, |
|
"grad_norm": 0.0005687863449566066, |
|
"learning_rate": 1.3328947368421052e-05, |
|
"loss": 0.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.027901077996195307, |
|
"grad_norm": 0.0005387436249293387, |
|
"learning_rate": 1.2795789473684211e-05, |
|
"loss": 0.0, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.028059606848446418, |
|
"grad_norm": 0.0005557397962547839, |
|
"learning_rate": 1.2262631578947368e-05, |
|
"loss": 0.0, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.028218135700697526, |
|
"grad_norm": 0.0005531954229809344, |
|
"learning_rate": 1.1729473684210526e-05, |
|
"loss": 0.0, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.028376664552948638, |
|
"grad_norm": 0.0010639647953212261, |
|
"learning_rate": 1.1196315789473683e-05, |
|
"loss": 0.0, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.028535193405199746, |
|
"grad_norm": 0.0005514100193977356, |
|
"learning_rate": 1.066315789473684e-05, |
|
"loss": 0.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.028693722257450857, |
|
"grad_norm": 0.000542394642252475, |
|
"learning_rate": 1.013e-05, |
|
"loss": 0.0, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.028852251109701965, |
|
"grad_norm": 0.00043924085912294686, |
|
"learning_rate": 9.596842105263157e-06, |
|
"loss": 0.0, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.029010779961953077, |
|
"grad_norm": 0.000603148655500263, |
|
"learning_rate": 9.063684210526316e-06, |
|
"loss": 0.0, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.029169308814204185, |
|
"grad_norm": 0.00047173965140245855, |
|
"learning_rate": 8.530526315789473e-06, |
|
"loss": 0.0, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.029327837666455296, |
|
"grad_norm": 0.0006395349046215415, |
|
"learning_rate": 7.99736842105263e-06, |
|
"loss": 0.0, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.029486366518706404, |
|
"grad_norm": 0.0005980796995572746, |
|
"learning_rate": 7.464210526315788e-06, |
|
"loss": 0.0, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.029644895370957516, |
|
"grad_norm": 0.0006005927571095526, |
|
"learning_rate": 6.931052631578947e-06, |
|
"loss": 0.0, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.029803424223208624, |
|
"grad_norm": 0.00042718948679976165, |
|
"learning_rate": 6.3978947368421055e-06, |
|
"loss": 0.0, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.029961953075459735, |
|
"grad_norm": 0.00037346952012740076, |
|
"learning_rate": 5.864736842105263e-06, |
|
"loss": 0.0, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.030120481927710843, |
|
"grad_norm": 0.00046352826757356524, |
|
"learning_rate": 5.33157894736842e-06, |
|
"loss": 0.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.030279010779961955, |
|
"grad_norm": 0.0003845041792374104, |
|
"learning_rate": 4.7984210526315785e-06, |
|
"loss": 0.0, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.030437539632213063, |
|
"grad_norm": 0.00042385683627799153, |
|
"learning_rate": 4.265263157894737e-06, |
|
"loss": 0.0, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.030596068484464174, |
|
"grad_norm": 0.0003921858442481607, |
|
"learning_rate": 3.732105263157894e-06, |
|
"loss": 0.0, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.030754597336715282, |
|
"grad_norm": 0.0005148733034729958, |
|
"learning_rate": 3.1989473684210527e-06, |
|
"loss": 0.0, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.03091312618896639, |
|
"grad_norm": 0.0004899385967291892, |
|
"learning_rate": 2.66578947368421e-06, |
|
"loss": 0.0, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.031071655041217502, |
|
"grad_norm": 0.0006933041149750352, |
|
"learning_rate": 2.1326315789473684e-06, |
|
"loss": 0.0, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.03123018389346861, |
|
"grad_norm": 0.0009382757125422359, |
|
"learning_rate": 1.5994736842105264e-06, |
|
"loss": 0.0, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.03138871274571972, |
|
"grad_norm": 0.0007173538906499743, |
|
"learning_rate": 1.0663157894736842e-06, |
|
"loss": 0.0, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.03154724159797083, |
|
"grad_norm": 0.0005706042284145951, |
|
"learning_rate": 5.331578947368421e-07, |
|
"loss": 0.0, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.03170577045022194, |
|
"grad_norm": 0.0005148272030055523, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03170577045022194, |
|
"eval_loss": 0.0018136479193344712, |
|
"eval_runtime": 284.2884, |
|
"eval_samples_per_second": 9.343, |
|
"eval_steps_per_second": 2.336, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.162764252413952e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|