|
{ |
|
"best_metric": 0.4560202658176422, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.11421229209793704, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005710614604896852, |
|
"grad_norm": 9.655982971191406, |
|
"learning_rate": 2.333333333333333e-06, |
|
"loss": 4.9642, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0005710614604896852, |
|
"eval_loss": 0.7650408744812012, |
|
"eval_runtime": 593.1149, |
|
"eval_samples_per_second": 4.974, |
|
"eval_steps_per_second": 1.244, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0011421229209793704, |
|
"grad_norm": 8.363570213317871, |
|
"learning_rate": 4.666666666666666e-06, |
|
"loss": 5.0461, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0017131843814690555, |
|
"grad_norm": 8.605849266052246, |
|
"learning_rate": 7e-06, |
|
"loss": 4.6528, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.002284245841958741, |
|
"grad_norm": 10.32148551940918, |
|
"learning_rate": 9.333333333333333e-06, |
|
"loss": 5.4813, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.002855307302448426, |
|
"grad_norm": 7.543620586395264, |
|
"learning_rate": 1.1666666666666665e-05, |
|
"loss": 4.938, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.003426368762938111, |
|
"grad_norm": 7.9145426750183105, |
|
"learning_rate": 1.4e-05, |
|
"loss": 5.5469, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.003997430223427797, |
|
"grad_norm": 7.476115703582764, |
|
"learning_rate": 1.633333333333333e-05, |
|
"loss": 5.4585, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.004568491683917482, |
|
"grad_norm": 7.091987133026123, |
|
"learning_rate": 1.8666666666666665e-05, |
|
"loss": 5.2762, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.005139553144407167, |
|
"grad_norm": 5.893141746520996, |
|
"learning_rate": 2.1e-05, |
|
"loss": 4.9478, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.005710614604896852, |
|
"grad_norm": 5.861713886260986, |
|
"learning_rate": 2.333333333333333e-05, |
|
"loss": 5.3339, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006281676065386537, |
|
"grad_norm": 5.187582015991211, |
|
"learning_rate": 2.5666666666666663e-05, |
|
"loss": 4.6863, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.006852737525876222, |
|
"grad_norm": 5.725977420806885, |
|
"learning_rate": 2.8e-05, |
|
"loss": 4.2163, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.007423798986365908, |
|
"grad_norm": 6.390807628631592, |
|
"learning_rate": 3.0333333333333333e-05, |
|
"loss": 4.7941, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.007994860446855594, |
|
"grad_norm": 6.105741500854492, |
|
"learning_rate": 3.266666666666666e-05, |
|
"loss": 5.1002, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.008565921907345278, |
|
"grad_norm": 6.119107723236084, |
|
"learning_rate": 3.5e-05, |
|
"loss": 4.9678, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.009136983367834963, |
|
"grad_norm": 5.37454891204834, |
|
"learning_rate": 3.733333333333333e-05, |
|
"loss": 4.6234, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00970804482832465, |
|
"grad_norm": 5.023365497589111, |
|
"learning_rate": 3.9666666666666664e-05, |
|
"loss": 4.5956, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.010279106288814333, |
|
"grad_norm": 5.116684436798096, |
|
"learning_rate": 4.2e-05, |
|
"loss": 4.4032, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.010850167749304019, |
|
"grad_norm": 4.848908424377441, |
|
"learning_rate": 4.4333333333333324e-05, |
|
"loss": 4.1467, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.011421229209793705, |
|
"grad_norm": 5.300320625305176, |
|
"learning_rate": 4.666666666666666e-05, |
|
"loss": 4.711, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.011992290670283389, |
|
"grad_norm": 4.577368259429932, |
|
"learning_rate": 4.899999999999999e-05, |
|
"loss": 4.1949, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.012563352130773075, |
|
"grad_norm": 4.3569560050964355, |
|
"learning_rate": 5.1333333333333325e-05, |
|
"loss": 4.6698, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01313441359126276, |
|
"grad_norm": 4.732718467712402, |
|
"learning_rate": 5.3666666666666666e-05, |
|
"loss": 4.4054, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.013705475051752444, |
|
"grad_norm": 4.412967681884766, |
|
"learning_rate": 5.6e-05, |
|
"loss": 4.3697, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01427653651224213, |
|
"grad_norm": 4.235969543457031, |
|
"learning_rate": 5.833333333333333e-05, |
|
"loss": 4.3441, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.014847597972731816, |
|
"grad_norm": 4.274826526641846, |
|
"learning_rate": 6.0666666666666666e-05, |
|
"loss": 4.6215, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0154186594332215, |
|
"grad_norm": 4.126132488250732, |
|
"learning_rate": 6.3e-05, |
|
"loss": 4.3444, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.015989720893711187, |
|
"grad_norm": 4.711647987365723, |
|
"learning_rate": 6.533333333333333e-05, |
|
"loss": 4.4501, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01656078235420087, |
|
"grad_norm": 4.457403659820557, |
|
"learning_rate": 6.766666666666667e-05, |
|
"loss": 4.5348, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.017131843814690555, |
|
"grad_norm": 4.318757057189941, |
|
"learning_rate": 7e-05, |
|
"loss": 4.8379, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01770290527518024, |
|
"grad_norm": 4.192445755004883, |
|
"learning_rate": 6.999402376603183e-05, |
|
"loss": 4.8049, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.018273966735669927, |
|
"grad_norm": 4.142071723937988, |
|
"learning_rate": 6.99760971050058e-05, |
|
"loss": 4.4266, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.018845028196159613, |
|
"grad_norm": 4.6366167068481445, |
|
"learning_rate": 6.994622613886018e-05, |
|
"loss": 5.603, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0194160896566493, |
|
"grad_norm": 4.616098880767822, |
|
"learning_rate": 6.990442106850258e-05, |
|
"loss": 5.0995, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.01998715111713898, |
|
"grad_norm": 4.354705333709717, |
|
"learning_rate": 6.98506961703262e-05, |
|
"loss": 4.8896, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.020558212577628666, |
|
"grad_norm": 4.386688709259033, |
|
"learning_rate": 6.978506979133457e-05, |
|
"loss": 4.4294, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.021129274038118352, |
|
"grad_norm": 4.3963093757629395, |
|
"learning_rate": 6.9707564342876e-05, |
|
"loss": 4.2744, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.021700335498608038, |
|
"grad_norm": 4.365737438201904, |
|
"learning_rate": 6.96182062929901e-05, |
|
"loss": 3.9286, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.022271396959097724, |
|
"grad_norm": 4.8030571937561035, |
|
"learning_rate": 6.951702615736908e-05, |
|
"loss": 4.4058, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.02284245841958741, |
|
"grad_norm": 5.323755264282227, |
|
"learning_rate": 6.940405848893656e-05, |
|
"loss": 4.145, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.023413519880077092, |
|
"grad_norm": 4.974182605743408, |
|
"learning_rate": 6.92793418660478e-05, |
|
"loss": 4.5518, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.023984581340566778, |
|
"grad_norm": 5.099968910217285, |
|
"learning_rate": 6.914291887931528e-05, |
|
"loss": 4.0907, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.024555642801056463, |
|
"grad_norm": 4.776078224182129, |
|
"learning_rate": 6.899483611706398e-05, |
|
"loss": 4.1122, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.02512670426154615, |
|
"grad_norm": 4.876911640167236, |
|
"learning_rate": 6.883514414942155e-05, |
|
"loss": 4.1617, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.025697765722035835, |
|
"grad_norm": 5.060526371002197, |
|
"learning_rate": 6.866389751104867e-05, |
|
"loss": 3.7848, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02626882718252552, |
|
"grad_norm": 5.239251136779785, |
|
"learning_rate": 6.848115468251542e-05, |
|
"loss": 4.0855, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.026839888643015203, |
|
"grad_norm": 5.517544269561768, |
|
"learning_rate": 6.828697807033038e-05, |
|
"loss": 4.1965, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.02741095010350489, |
|
"grad_norm": 5.758817672729492, |
|
"learning_rate": 6.808143398562868e-05, |
|
"loss": 4.0623, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.027982011563994574, |
|
"grad_norm": 5.886153697967529, |
|
"learning_rate": 6.786459262152698e-05, |
|
"loss": 3.7384, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.02855307302448426, |
|
"grad_norm": 8.272394180297852, |
|
"learning_rate": 6.763652802915244e-05, |
|
"loss": 3.7424, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02855307302448426, |
|
"eval_loss": 0.5147433876991272, |
|
"eval_runtime": 595.5275, |
|
"eval_samples_per_second": 4.954, |
|
"eval_steps_per_second": 1.239, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.029124134484973946, |
|
"grad_norm": 4.8110432624816895, |
|
"learning_rate": 6.739731809235446e-05, |
|
"loss": 3.3424, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.02969519594546363, |
|
"grad_norm": 4.688547611236572, |
|
"learning_rate": 6.71470445011073e-05, |
|
"loss": 3.6607, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.030266257405953317, |
|
"grad_norm": 4.069744110107422, |
|
"learning_rate": 6.688579272361309e-05, |
|
"loss": 3.8212, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.030837318866443, |
|
"grad_norm": 3.885037422180176, |
|
"learning_rate": 6.66136519771145e-05, |
|
"loss": 3.7556, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.03140838032693269, |
|
"grad_norm": 3.9566140174865723, |
|
"learning_rate": 6.633071519742718e-05, |
|
"loss": 3.6849, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.031979441787422375, |
|
"grad_norm": 4.207897186279297, |
|
"learning_rate": 6.603707900720217e-05, |
|
"loss": 3.3258, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.03255050324791205, |
|
"grad_norm": 4.095637321472168, |
|
"learning_rate": 6.573284368292943e-05, |
|
"loss": 3.8785, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.03312156470840174, |
|
"grad_norm": 3.729644536972046, |
|
"learning_rate": 6.541811312069348e-05, |
|
"loss": 3.2926, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.033692626168891425, |
|
"grad_norm": 4.160499572753906, |
|
"learning_rate": 6.509299480069303e-05, |
|
"loss": 3.6083, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.03426368762938111, |
|
"grad_norm": 3.895054578781128, |
|
"learning_rate": 6.47575997505365e-05, |
|
"loss": 3.4853, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.034834749089870796, |
|
"grad_norm": 3.4832022190093994, |
|
"learning_rate": 6.441204250732624e-05, |
|
"loss": 3.1942, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.03540581055036048, |
|
"grad_norm": 3.5753769874572754, |
|
"learning_rate": 6.405644107854427e-05, |
|
"loss": 3.6576, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.03597687201085017, |
|
"grad_norm": 3.4942517280578613, |
|
"learning_rate": 6.369091690175273e-05, |
|
"loss": 3.5523, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.036547933471339854, |
|
"grad_norm": 3.4585907459259033, |
|
"learning_rate": 6.331559480312315e-05, |
|
"loss": 3.4847, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03711899493182954, |
|
"grad_norm": 3.7623393535614014, |
|
"learning_rate": 6.293060295480838e-05, |
|
"loss": 3.9728, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.037690056392319225, |
|
"grad_norm": 3.7405989170074463, |
|
"learning_rate": 6.25360728311719e-05, |
|
"loss": 3.6524, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.03826111785280891, |
|
"grad_norm": 3.857524871826172, |
|
"learning_rate": 6.213213916388954e-05, |
|
"loss": 4.1654, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0388321793132986, |
|
"grad_norm": 3.8793821334838867, |
|
"learning_rate": 6.171893989593859e-05, |
|
"loss": 3.9822, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.039403240773788276, |
|
"grad_norm": 3.9082939624786377, |
|
"learning_rate": 6.129661613449057e-05, |
|
"loss": 4.042, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.03997430223427796, |
|
"grad_norm": 3.849015474319458, |
|
"learning_rate": 6.086531210272306e-05, |
|
"loss": 3.68, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04054536369476765, |
|
"grad_norm": 3.643329381942749, |
|
"learning_rate": 6.042517509056784e-05, |
|
"loss": 3.4066, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.04111642515525733, |
|
"grad_norm": 3.8051083087921143, |
|
"learning_rate": 5.997635540441133e-05, |
|
"loss": 3.9113, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.04168748661574702, |
|
"grad_norm": 3.7576425075531006, |
|
"learning_rate": 5.9519006315765176e-05, |
|
"loss": 3.9575, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.042258548076236704, |
|
"grad_norm": 4.091893196105957, |
|
"learning_rate": 5.9053284008924185e-05, |
|
"loss": 4.044, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.04282960953672639, |
|
"grad_norm": 3.665947437286377, |
|
"learning_rate": 5.85793475276295e-05, |
|
"loss": 3.7113, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.043400670997216076, |
|
"grad_norm": 3.7637410163879395, |
|
"learning_rate": 5.809735872075529e-05, |
|
"loss": 3.9625, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.04397173245770576, |
|
"grad_norm": 3.848707437515259, |
|
"learning_rate": 5.760748218703755e-05, |
|
"loss": 4.3046, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.04454279391819545, |
|
"grad_norm": 3.8582873344421387, |
|
"learning_rate": 5.710988521886378e-05, |
|
"loss": 4.0915, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.04511385537868513, |
|
"grad_norm": 3.9476304054260254, |
|
"learning_rate": 5.660473774514275e-05, |
|
"loss": 3.9352, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.04568491683917482, |
|
"grad_norm": 3.7469491958618164, |
|
"learning_rate": 5.6092212273273975e-05, |
|
"loss": 3.8668, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.046255978299664505, |
|
"grad_norm": 3.9498250484466553, |
|
"learning_rate": 5.557248383023655e-05, |
|
"loss": 4.3159, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.046827039760154184, |
|
"grad_norm": 3.702901601791382, |
|
"learning_rate": 5.5045729902817676e-05, |
|
"loss": 4.057, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.04739810122064387, |
|
"grad_norm": 4.221633434295654, |
|
"learning_rate": 5.4512130377000987e-05, |
|
"loss": 4.4756, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.047969162681133555, |
|
"grad_norm": 3.9935688972473145, |
|
"learning_rate": 5.397186747653573e-05, |
|
"loss": 4.9131, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.04854022414162324, |
|
"grad_norm": 3.9818077087402344, |
|
"learning_rate": 5.342512570070745e-05, |
|
"loss": 4.3006, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.04911128560211293, |
|
"grad_norm": 3.9484941959381104, |
|
"learning_rate": 5.287209176133174e-05, |
|
"loss": 3.9452, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.04968234706260261, |
|
"grad_norm": 3.9193077087402344, |
|
"learning_rate": 5.231295451899226e-05, |
|
"loss": 3.8865, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0502534085230923, |
|
"grad_norm": 4.1261305809021, |
|
"learning_rate": 5.174790491854502e-05, |
|
"loss": 4.026, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.050824469983581984, |
|
"grad_norm": 4.0768351554870605, |
|
"learning_rate": 5.117713592391096e-05, |
|
"loss": 3.7968, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.05139553144407167, |
|
"grad_norm": 4.356061935424805, |
|
"learning_rate": 5.060084245217884e-05, |
|
"loss": 4.048, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.051966592904561355, |
|
"grad_norm": 4.245785236358643, |
|
"learning_rate": 5.0019221307041306e-05, |
|
"loss": 3.5643, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.05253765436505104, |
|
"grad_norm": 4.084421634674072, |
|
"learning_rate": 4.943247111158662e-05, |
|
"loss": 3.5692, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.05310871582554073, |
|
"grad_norm": 4.277196407318115, |
|
"learning_rate": 4.884079224046898e-05, |
|
"loss": 4.2165, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.053679777286030406, |
|
"grad_norm": 4.16418981552124, |
|
"learning_rate": 4.824438675148086e-05, |
|
"loss": 3.3093, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.05425083874652009, |
|
"grad_norm": 4.666293144226074, |
|
"learning_rate": 4.764345831655036e-05, |
|
"loss": 3.5172, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.05482190020700978, |
|
"grad_norm": 4.601908206939697, |
|
"learning_rate": 4.703821215218748e-05, |
|
"loss": 3.3629, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.05539296166749946, |
|
"grad_norm": 4.77185583114624, |
|
"learning_rate": 4.642885494940291e-05, |
|
"loss": 3.5901, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.05596402312798915, |
|
"grad_norm": 5.006587028503418, |
|
"learning_rate": 4.581559480312316e-05, |
|
"loss": 3.2387, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.056535084588478834, |
|
"grad_norm": 5.511896133422852, |
|
"learning_rate": 4.519864114112636e-05, |
|
"loss": 3.5257, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.05710614604896852, |
|
"grad_norm": 7.265841007232666, |
|
"learning_rate": 4.45782046525229e-05, |
|
"loss": 3.6848, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05710614604896852, |
|
"eval_loss": 0.4799676537513733, |
|
"eval_runtime": 595.5075, |
|
"eval_samples_per_second": 4.954, |
|
"eval_steps_per_second": 1.239, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.057677207509458206, |
|
"grad_norm": 4.521103382110596, |
|
"learning_rate": 4.3954497215805244e-05, |
|
"loss": 4.1905, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.05824826896994789, |
|
"grad_norm": 3.9009766578674316, |
|
"learning_rate": 4.332773182649165e-05, |
|
"loss": 3.408, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.05881933043043758, |
|
"grad_norm": 3.8264896869659424, |
|
"learning_rate": 4.2698122524388405e-05, |
|
"loss": 3.433, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.05939039189092726, |
|
"grad_norm": 3.7116568088531494, |
|
"learning_rate": 4.206588432049535e-05, |
|
"loss": 3.5134, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.05996145335141695, |
|
"grad_norm": 3.5707197189331055, |
|
"learning_rate": 4.143123312357996e-05, |
|
"loss": 3.0987, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.060532514811906635, |
|
"grad_norm": 3.62729811668396, |
|
"learning_rate": 4.079438566644454e-05, |
|
"loss": 3.7314, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.061103576272396314, |
|
"grad_norm": 3.6416237354278564, |
|
"learning_rate": 4.015555943191231e-05, |
|
"loss": 3.4506, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.061674637732886, |
|
"grad_norm": 3.462261199951172, |
|
"learning_rate": 3.9514972578557114e-05, |
|
"loss": 3.3004, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.062245699193375685, |
|
"grad_norm": 3.664212942123413, |
|
"learning_rate": 3.8872843866202525e-05, |
|
"loss": 3.3434, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.06281676065386538, |
|
"grad_norm": 3.6109557151794434, |
|
"learning_rate": 3.8229392581215565e-05, |
|
"loss": 3.6783, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06338782211435506, |
|
"grad_norm": 3.6711537837982178, |
|
"learning_rate": 3.7584838461620587e-05, |
|
"loss": 3.5776, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.06395888357484475, |
|
"grad_norm": 3.6779439449310303, |
|
"learning_rate": 3.693940162205895e-05, |
|
"loss": 3.6493, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.06452994503533442, |
|
"grad_norm": 3.770522356033325, |
|
"learning_rate": 3.629330247862007e-05, |
|
"loss": 3.4767, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.0651010064958241, |
|
"grad_norm": 3.7113234996795654, |
|
"learning_rate": 3.564676167356954e-05, |
|
"loss": 3.3844, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.06567206795631379, |
|
"grad_norm": 3.5234830379486084, |
|
"learning_rate": 3.5e-05, |
|
"loss": 3.3633, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.06624312941680348, |
|
"grad_norm": 3.8543918132781982, |
|
"learning_rate": 3.435323832643046e-05, |
|
"loss": 4.189, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.06681419087729316, |
|
"grad_norm": 3.756317138671875, |
|
"learning_rate": 3.370669752137993e-05, |
|
"loss": 3.4077, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.06738525233778285, |
|
"grad_norm": 3.5420141220092773, |
|
"learning_rate": 3.306059837794105e-05, |
|
"loss": 3.577, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.06795631379827254, |
|
"grad_norm": 3.767037868499756, |
|
"learning_rate": 3.241516153837941e-05, |
|
"loss": 4.1839, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.06852737525876222, |
|
"grad_norm": 3.63523268699646, |
|
"learning_rate": 3.177060741878443e-05, |
|
"loss": 3.8141, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06909843671925191, |
|
"grad_norm": 3.623882532119751, |
|
"learning_rate": 3.1127156133797475e-05, |
|
"loss": 3.5584, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.06966949817974159, |
|
"grad_norm": 3.6835837364196777, |
|
"learning_rate": 3.048502742144289e-05, |
|
"loss": 3.6744, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.07024055964023128, |
|
"grad_norm": 3.5777692794799805, |
|
"learning_rate": 2.984444056808768e-05, |
|
"loss": 3.8484, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.07081162110072096, |
|
"grad_norm": 3.7121081352233887, |
|
"learning_rate": 2.9205614333555444e-05, |
|
"loss": 3.9062, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.07138268256121065, |
|
"grad_norm": 3.627039909362793, |
|
"learning_rate": 2.856876687642003e-05, |
|
"loss": 3.8178, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.07195374402170034, |
|
"grad_norm": 3.5459561347961426, |
|
"learning_rate": 2.7934115679504645e-05, |
|
"loss": 3.4396, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.07252480548219002, |
|
"grad_norm": 3.5370867252349854, |
|
"learning_rate": 2.7301877475611606e-05, |
|
"loss": 3.5644, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.07309586694267971, |
|
"grad_norm": 3.6710925102233887, |
|
"learning_rate": 2.667226817350835e-05, |
|
"loss": 3.8682, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.0736669284031694, |
|
"grad_norm": 3.8747775554656982, |
|
"learning_rate": 2.604550278419475e-05, |
|
"loss": 4.4182, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.07423798986365908, |
|
"grad_norm": 3.7661991119384766, |
|
"learning_rate": 2.54217953474771e-05, |
|
"loss": 4.1295, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07480905132414876, |
|
"grad_norm": 3.6781368255615234, |
|
"learning_rate": 2.4801358858873636e-05, |
|
"loss": 4.1132, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.07538011278463845, |
|
"grad_norm": 3.6991829872131348, |
|
"learning_rate": 2.4184405196876842e-05, |
|
"loss": 4.3835, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.07595117424512814, |
|
"grad_norm": 3.730729341506958, |
|
"learning_rate": 2.3571145050597088e-05, |
|
"loss": 4.2776, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.07652223570561782, |
|
"grad_norm": 3.956230401992798, |
|
"learning_rate": 2.296178784781251e-05, |
|
"loss": 4.8423, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.07709329716610751, |
|
"grad_norm": 3.769970417022705, |
|
"learning_rate": 2.2356541683449646e-05, |
|
"loss": 4.1285, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.0776643586265972, |
|
"grad_norm": 4.056671619415283, |
|
"learning_rate": 2.175561324851914e-05, |
|
"loss": 4.1466, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.07823542008708688, |
|
"grad_norm": 3.9425039291381836, |
|
"learning_rate": 2.1159207759531013e-05, |
|
"loss": 3.734, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.07880648154757655, |
|
"grad_norm": 3.9520437717437744, |
|
"learning_rate": 2.0567528888413382e-05, |
|
"loss": 3.4524, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.07937754300806624, |
|
"grad_norm": 3.9661831855773926, |
|
"learning_rate": 1.9980778692958684e-05, |
|
"loss": 3.6408, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.07994860446855592, |
|
"grad_norm": 4.106236934661865, |
|
"learning_rate": 1.9399157547821162e-05, |
|
"loss": 3.5565, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08051966592904561, |
|
"grad_norm": 4.123175621032715, |
|
"learning_rate": 1.882286407608904e-05, |
|
"loss": 3.8822, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.0810907273895353, |
|
"grad_norm": 4.1223859786987305, |
|
"learning_rate": 1.825209508145497e-05, |
|
"loss": 3.5751, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.08166178885002498, |
|
"grad_norm": 4.237730979919434, |
|
"learning_rate": 1.7687045481007746e-05, |
|
"loss": 3.8547, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.08223285031051467, |
|
"grad_norm": 4.241462707519531, |
|
"learning_rate": 1.712790823866826e-05, |
|
"loss": 3.5782, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.08280391177100435, |
|
"grad_norm": 4.347367763519287, |
|
"learning_rate": 1.657487429929254e-05, |
|
"loss": 3.3161, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.08337497323149404, |
|
"grad_norm": 4.680622577667236, |
|
"learning_rate": 1.602813252346427e-05, |
|
"loss": 3.7808, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.08394603469198372, |
|
"grad_norm": 4.457460403442383, |
|
"learning_rate": 1.5487869622999004e-05, |
|
"loss": 3.2924, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.08451709615247341, |
|
"grad_norm": 4.6479172706604, |
|
"learning_rate": 1.4954270097182317e-05, |
|
"loss": 3.3637, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.0850881576129631, |
|
"grad_norm": 5.357884883880615, |
|
"learning_rate": 1.4427516169763444e-05, |
|
"loss": 3.3677, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.08565921907345278, |
|
"grad_norm": 6.834778308868408, |
|
"learning_rate": 1.3907787726726029e-05, |
|
"loss": 3.6711, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08565921907345278, |
|
"eval_loss": 0.46051260828971863, |
|
"eval_runtime": 596.2501, |
|
"eval_samples_per_second": 4.948, |
|
"eval_steps_per_second": 1.238, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08623028053394247, |
|
"grad_norm": 3.791814088821411, |
|
"learning_rate": 1.339526225485725e-05, |
|
"loss": 3.4645, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.08680134199443215, |
|
"grad_norm": 3.796891212463379, |
|
"learning_rate": 1.2890114781136224e-05, |
|
"loss": 3.4475, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.08737240345492184, |
|
"grad_norm": 3.6403725147247314, |
|
"learning_rate": 1.239251781296245e-05, |
|
"loss": 3.1833, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.08794346491541152, |
|
"grad_norm": 3.6974196434020996, |
|
"learning_rate": 1.1902641279244715e-05, |
|
"loss": 3.3696, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.08851452637590121, |
|
"grad_norm": 3.578291893005371, |
|
"learning_rate": 1.1420652472370497e-05, |
|
"loss": 3.1136, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.0890855878363909, |
|
"grad_norm": 3.4341652393341064, |
|
"learning_rate": 1.0946715991075805e-05, |
|
"loss": 2.9641, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.08965664929688058, |
|
"grad_norm": 3.451934576034546, |
|
"learning_rate": 1.0480993684234815e-05, |
|
"loss": 3.1792, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.09022771075737027, |
|
"grad_norm": 3.3800251483917236, |
|
"learning_rate": 1.0023644595588671e-05, |
|
"loss": 3.3144, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.09079877221785995, |
|
"grad_norm": 3.479318618774414, |
|
"learning_rate": 9.57482490943216e-06, |
|
"loss": 3.1648, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.09136983367834964, |
|
"grad_norm": 3.491649866104126, |
|
"learning_rate": 9.134687897276934e-06, |
|
"loss": 3.3233, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09194089513883932, |
|
"grad_norm": 3.484065055847168, |
|
"learning_rate": 8.703383865509432e-06, |
|
"loss": 3.5239, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.09251195659932901, |
|
"grad_norm": 3.6290812492370605, |
|
"learning_rate": 8.281060104061394e-06, |
|
"loss": 3.2998, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.09308301805981868, |
|
"grad_norm": 3.6152350902557373, |
|
"learning_rate": 7.867860836110453e-06, |
|
"loss": 3.7931, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.09365407952030837, |
|
"grad_norm": 3.5166842937469482, |
|
"learning_rate": 7.463927168828087e-06, |
|
"loss": 3.5036, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.09422514098079805, |
|
"grad_norm": 3.266004800796509, |
|
"learning_rate": 7.069397045191617e-06, |
|
"loss": 2.7839, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.09479620244128774, |
|
"grad_norm": 3.5984013080596924, |
|
"learning_rate": 6.684405196876842e-06, |
|
"loss": 3.7792, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.09536726390177742, |
|
"grad_norm": 3.4622771739959717, |
|
"learning_rate": 6.309083098247264e-06, |
|
"loss": 3.4876, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.09593832536226711, |
|
"grad_norm": 3.397523880004883, |
|
"learning_rate": 5.943558921455733e-06, |
|
"loss": 3.202, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.0965093868227568, |
|
"grad_norm": 3.561593770980835, |
|
"learning_rate": 5.587957492673759e-06, |
|
"loss": 3.5857, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.09708044828324648, |
|
"grad_norm": 3.7165884971618652, |
|
"learning_rate": 5.2424002494635095e-06, |
|
"loss": 3.6163, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09765150974373617, |
|
"grad_norm": 3.710353374481201, |
|
"learning_rate": 4.9070051993069636e-06, |
|
"loss": 3.9078, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.09822257120422585, |
|
"grad_norm": 3.53379225730896, |
|
"learning_rate": 4.581886879306507e-06, |
|
"loss": 3.5316, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.09879363266471554, |
|
"grad_norm": 3.5342020988464355, |
|
"learning_rate": 4.2671563170705725e-06, |
|
"loss": 3.5556, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.09936469412520522, |
|
"grad_norm": 3.5446951389312744, |
|
"learning_rate": 3.962920992797834e-06, |
|
"loss": 3.4027, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.09993575558569491, |
|
"grad_norm": 3.7609660625457764, |
|
"learning_rate": 3.6692848025728216e-06, |
|
"loss": 4.0196, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1005068170461846, |
|
"grad_norm": 3.5949559211730957, |
|
"learning_rate": 3.38634802288549e-06, |
|
"loss": 3.7018, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.10107787850667428, |
|
"grad_norm": 3.4899206161499023, |
|
"learning_rate": 3.1142072763869042e-06, |
|
"loss": 3.4402, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.10164893996716397, |
|
"grad_norm": 3.4958362579345703, |
|
"learning_rate": 2.852955498892694e-06, |
|
"loss": 3.6609, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.10222000142765365, |
|
"grad_norm": 3.7367665767669678, |
|
"learning_rate": 2.6026819076455325e-06, |
|
"loss": 3.9386, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.10279106288814334, |
|
"grad_norm": 3.6101346015930176, |
|
"learning_rate": 2.36347197084755e-06, |
|
"loss": 3.589, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.10336212434863302, |
|
"grad_norm": 3.714442491531372, |
|
"learning_rate": 2.1354073784730253e-06, |
|
"loss": 3.9742, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.10393318580912271, |
|
"grad_norm": 3.8775899410247803, |
|
"learning_rate": 1.9185660143713184e-06, |
|
"loss": 4.4065, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.1045042472696124, |
|
"grad_norm": 4.041444301605225, |
|
"learning_rate": 1.7130219296696263e-06, |
|
"loss": 4.5485, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.10507530873010208, |
|
"grad_norm": 3.8321800231933594, |
|
"learning_rate": 1.5188453174845743e-06, |
|
"loss": 4.2913, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.10564637019059177, |
|
"grad_norm": 3.849231481552124, |
|
"learning_rate": 1.3361024889513333e-06, |
|
"loss": 4.6328, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.10621743165108145, |
|
"grad_norm": 3.960550546646118, |
|
"learning_rate": 1.16485585057844e-06, |
|
"loss": 4.6204, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.10678849311157114, |
|
"grad_norm": 3.7694177627563477, |
|
"learning_rate": 1.0051638829360127e-06, |
|
"loss": 3.6095, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.10735955457206081, |
|
"grad_norm": 3.936204671859741, |
|
"learning_rate": 8.570811206847189e-07, |
|
"loss": 4.067, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.1079306160325505, |
|
"grad_norm": 4.248170852661133, |
|
"learning_rate": 7.206581339521939e-07, |
|
"loss": 4.3996, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.10850167749304018, |
|
"grad_norm": 4.024265766143799, |
|
"learning_rate": 5.959415110634375e-07, |
|
"loss": 3.9334, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10907273895352987, |
|
"grad_norm": 4.193387508392334, |
|
"learning_rate": 4.829738426309099e-07, |
|
"loss": 3.9381, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.10964380041401955, |
|
"grad_norm": 4.102433204650879, |
|
"learning_rate": 3.817937070098914e-07, |
|
"loss": 3.7026, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.11021486187450924, |
|
"grad_norm": 4.2977142333984375, |
|
"learning_rate": 2.9243565712400384e-07, |
|
"loss": 3.4472, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.11078592333499893, |
|
"grad_norm": 4.350393295288086, |
|
"learning_rate": 2.1493020866542365e-07, |
|
"loss": 3.5126, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.11135698479548861, |
|
"grad_norm": 4.65367317199707, |
|
"learning_rate": 1.4930382967379363e-07, |
|
"loss": 3.6799, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.1119280462559783, |
|
"grad_norm": 4.893271446228027, |
|
"learning_rate": 9.557893149741924e-08, |
|
"loss": 3.6397, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.11249910771646798, |
|
"grad_norm": 4.931285381317139, |
|
"learning_rate": 5.377386113981197e-08, |
|
"loss": 3.6552, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.11307016917695767, |
|
"grad_norm": 5.327000141143799, |
|
"learning_rate": 2.3902894994198286e-08, |
|
"loss": 3.3647, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.11364123063744735, |
|
"grad_norm": 6.226569175720215, |
|
"learning_rate": 5.976233968155164e-09, |
|
"loss": 3.279, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.11421229209793704, |
|
"grad_norm": 8.364439010620117, |
|
"learning_rate": 0.0, |
|
"loss": 3.8711, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11421229209793704, |
|
"eval_loss": 0.4560202658176422, |
|
"eval_runtime": 596.8378, |
|
"eval_samples_per_second": 4.943, |
|
"eval_steps_per_second": 1.237, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.849985165910344e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|