|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.999848249174855, |
|
"global_step": 105432, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9810311468568605e-08, |
|
"loss": 0.6518, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.962062293713722e-08, |
|
"loss": 0.6229, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9430934405705826e-08, |
|
"loss": 0.5878, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.924124587427444e-08, |
|
"loss": 0.5654, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9051557342843047e-08, |
|
"loss": 0.5387, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.886186881141166e-08, |
|
"loss": 0.512, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.867218027998027e-08, |
|
"loss": 0.4914, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.848249174854888e-08, |
|
"loss": 0.4605, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.829280321711749e-08, |
|
"loss": 0.445, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.81031146856861e-08, |
|
"loss": 0.4245, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.791342615425471e-08, |
|
"loss": 0.4061, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.772373762282332e-08, |
|
"loss": 0.3907, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.753404909139193e-08, |
|
"loss": 0.3671, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.7344360559960544e-08, |
|
"loss": 0.3672, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.715467202852915e-08, |
|
"loss": 0.3369, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.6964983497097764e-08, |
|
"loss": 0.33, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.677529496566637e-08, |
|
"loss": 0.3152, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.6585606434234985e-08, |
|
"loss": 0.2996, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.639591790280359e-08, |
|
"loss": 0.2835, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.6206229371372206e-08, |
|
"loss": 0.2747, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.601654083994081e-08, |
|
"loss": 0.2597, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.582685230850942e-08, |
|
"loss": 0.2509, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.5637163777078034e-08, |
|
"loss": 0.2321, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.544747524564664e-08, |
|
"loss": 0.2216, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.5257786714215255e-08, |
|
"loss": 0.2138, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.506809818278386e-08, |
|
"loss": 0.1964, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.4878409651352476e-08, |
|
"loss": 0.183, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.468872111992108e-08, |
|
"loss": 0.1738, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.4499032588489697e-08, |
|
"loss": 0.1628, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.4309344057058304e-08, |
|
"loss": 0.155, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.411965552562692e-08, |
|
"loss": 0.1433, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.3929966994195524e-08, |
|
"loss": 0.1336, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.374027846276414e-08, |
|
"loss": 0.125, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.3550589931332745e-08, |
|
"loss": 0.115, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.336090139990136e-08, |
|
"loss": 0.1095, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.317121286846997e-08, |
|
"loss": 0.1024, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.298152433703858e-08, |
|
"loss": 0.0947, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2791835805607194e-08, |
|
"loss": 0.0908, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.26021472741758e-08, |
|
"loss": 0.0832, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.2412458742744414e-08, |
|
"loss": 0.0846, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.222277021131302e-08, |
|
"loss": 0.0722, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.2033081679881635e-08, |
|
"loss": 0.0673, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.184339314845024e-08, |
|
"loss": 0.0656, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1653704617018856e-08, |
|
"loss": 0.0585, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.146401608558746e-08, |
|
"loss": 0.0589, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.127432755415608e-08, |
|
"loss": 0.0543, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.1084639022724684e-08, |
|
"loss": 0.0548, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.08949504912933e-08, |
|
"loss": 0.0496, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.0705261959861905e-08, |
|
"loss": 0.055, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.051557342843052e-08, |
|
"loss": 0.0437, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.0325884896999126e-08, |
|
"loss": 0.0429, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.013619636556774e-08, |
|
"loss": 0.0451, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.03802574425935745, |
|
"eval_runtime": 313.8581, |
|
"eval_samples_per_second": 125.974, |
|
"eval_steps_per_second": 15.749, |
|
"step": 26359 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.9946507834136347e-08, |
|
"loss": 0.0423, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.975681930270496e-08, |
|
"loss": 0.0437, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.956713077127357e-08, |
|
"loss": 0.0409, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.937744223984218e-08, |
|
"loss": 0.034, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.918775370841079e-08, |
|
"loss": 0.0421, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.89980651769794e-08, |
|
"loss": 0.038, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.880837664554801e-08, |
|
"loss": 0.0391, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.861868811411662e-08, |
|
"loss": 0.0341, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.842899958268523e-08, |
|
"loss": 0.0438, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.8239311051253844e-08, |
|
"loss": 0.0398, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.804962251982245e-08, |
|
"loss": 0.0402, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.7859933988391064e-08, |
|
"loss": 0.0424, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.767024545695967e-08, |
|
"loss": 0.0381, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.748055692552828e-08, |
|
"loss": 0.027, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.729086839409689e-08, |
|
"loss": 0.0339, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.71011798626655e-08, |
|
"loss": 0.0318, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.691149133123411e-08, |
|
"loss": 0.0415, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.672180279980272e-08, |
|
"loss": 0.0323, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.6532114268371334e-08, |
|
"loss": 0.0349, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.634242573693994e-08, |
|
"loss": 0.0278, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.6152737205508555e-08, |
|
"loss": 0.0367, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.596304867407716e-08, |
|
"loss": 0.0292, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.5773360142645776e-08, |
|
"loss": 0.0322, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.558367161121438e-08, |
|
"loss": 0.0326, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.5393983079782997e-08, |
|
"loss": 0.0288, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.5204294548351604e-08, |
|
"loss": 0.0398, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.501460601692022e-08, |
|
"loss": 0.0265, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.4824917485488824e-08, |
|
"loss": 0.0372, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.463522895405744e-08, |
|
"loss": 0.042, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 3.4445540422626045e-08, |
|
"loss": 0.0383, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 3.425585189119466e-08, |
|
"loss": 0.0397, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 3.4066163359763266e-08, |
|
"loss": 0.0319, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.387647482833188e-08, |
|
"loss": 0.0286, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.368678629690049e-08, |
|
"loss": 0.033, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.3497097765469094e-08, |
|
"loss": 0.0361, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.330740923403771e-08, |
|
"loss": 0.0329, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.3117720702606315e-08, |
|
"loss": 0.0343, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.292803217117493e-08, |
|
"loss": 0.0302, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.2738343639743536e-08, |
|
"loss": 0.0237, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.254865510831215e-08, |
|
"loss": 0.0321, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.2358966576880757e-08, |
|
"loss": 0.0227, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.216927804544937e-08, |
|
"loss": 0.0324, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.197958951401798e-08, |
|
"loss": 0.0262, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.178990098258659e-08, |
|
"loss": 0.0337, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.16002124511552e-08, |
|
"loss": 0.0314, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.141052391972381e-08, |
|
"loss": 0.0433, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.122083538829242e-08, |
|
"loss": 0.0351, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.103114685686103e-08, |
|
"loss": 0.02, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.084145832542964e-08, |
|
"loss": 0.023, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.0651769793998254e-08, |
|
"loss": 0.0311, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.046208126256686e-08, |
|
"loss": 0.0231, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.0272392731135474e-08, |
|
"loss": 0.0277, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.008270419970408e-08, |
|
"loss": 0.0233, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.029595419764518738, |
|
"eval_runtime": 313.3391, |
|
"eval_samples_per_second": 126.183, |
|
"eval_steps_per_second": 15.775, |
|
"step": 52718 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.9893015668272695e-08, |
|
"loss": 0.0279, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 2.9703327136841306e-08, |
|
"loss": 0.0324, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.9513638605409913e-08, |
|
"loss": 0.0245, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 2.9323950073978527e-08, |
|
"loss": 0.0397, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 2.9134261542547134e-08, |
|
"loss": 0.0306, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.8944573011115747e-08, |
|
"loss": 0.0215, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.8754884479684354e-08, |
|
"loss": 0.0227, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.8565195948252968e-08, |
|
"loss": 0.0246, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.8375507416821575e-08, |
|
"loss": 0.0318, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.818581888539019e-08, |
|
"loss": 0.0454, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.7996130353958796e-08, |
|
"loss": 0.0236, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.780644182252741e-08, |
|
"loss": 0.032, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.7616753291096017e-08, |
|
"loss": 0.0315, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.742706475966463e-08, |
|
"loss": 0.0367, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.7237376228233238e-08, |
|
"loss": 0.0244, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.704768769680185e-08, |
|
"loss": 0.0268, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.685799916537046e-08, |
|
"loss": 0.0197, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.6668310633939072e-08, |
|
"loss": 0.037, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.647862210250768e-08, |
|
"loss": 0.0239, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.6288933571076293e-08, |
|
"loss": 0.0392, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.60992450396449e-08, |
|
"loss": 0.0269, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.5909556508213514e-08, |
|
"loss": 0.016, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.571986797678212e-08, |
|
"loss": 0.0293, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.553017944535073e-08, |
|
"loss": 0.0368, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.5340490913919342e-08, |
|
"loss": 0.0173, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.5150802382487952e-08, |
|
"loss": 0.037, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.4961113851056563e-08, |
|
"loss": 0.0328, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.4771425319625177e-08, |
|
"loss": 0.0312, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.4581736788193787e-08, |
|
"loss": 0.0435, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.4392048256762397e-08, |
|
"loss": 0.0302, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.4202359725331008e-08, |
|
"loss": 0.0327, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.4012671193899618e-08, |
|
"loss": 0.0266, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.382298266246823e-08, |
|
"loss": 0.0242, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.363329413103684e-08, |
|
"loss": 0.0203, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.3443605599605446e-08, |
|
"loss": 0.025, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.3253917068174057e-08, |
|
"loss": 0.0373, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.3064228536742667e-08, |
|
"loss": 0.0357, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.2874540005311277e-08, |
|
"loss": 0.0294, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.2684851473879888e-08, |
|
"loss": 0.0361, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.2495162942448498e-08, |
|
"loss": 0.0271, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.230547441101711e-08, |
|
"loss": 0.0417, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.211578587958572e-08, |
|
"loss": 0.0348, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.192609734815433e-08, |
|
"loss": 0.0322, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.173640881672294e-08, |
|
"loss": 0.0245, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.154672028529155e-08, |
|
"loss": 0.0393, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.135703175386016e-08, |
|
"loss": 0.0275, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.116734322242877e-08, |
|
"loss": 0.0221, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 2.097765469099738e-08, |
|
"loss": 0.0401, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.0787966159565992e-08, |
|
"loss": 0.0392, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 2.0598277628134602e-08, |
|
"loss": 0.0271, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.0408589096703213e-08, |
|
"loss": 0.027, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.0218900565271823e-08, |
|
"loss": 0.0267, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 2.0029212033840434e-08, |
|
"loss": 0.0379, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.02849128097295761, |
|
"eval_runtime": 312.9013, |
|
"eval_samples_per_second": 126.359, |
|
"eval_steps_per_second": 15.797, |
|
"step": 79077 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.9839523502409044e-08, |
|
"loss": 0.0335, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.9649834970977654e-08, |
|
"loss": 0.0283, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.946014643954626e-08, |
|
"loss": 0.0296, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.9270457908114872e-08, |
|
"loss": 0.0174, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.9080769376683482e-08, |
|
"loss": 0.0228, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.8891080845252093e-08, |
|
"loss": 0.0271, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.8701392313820703e-08, |
|
"loss": 0.0178, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.8511703782389314e-08, |
|
"loss": 0.0293, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.8322015250957924e-08, |
|
"loss": 0.0405, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.8132326719526538e-08, |
|
"loss": 0.0273, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.7942638188095148e-08, |
|
"loss": 0.0292, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.775294965666376e-08, |
|
"loss": 0.0233, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.756326112523237e-08, |
|
"loss": 0.0327, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.737357259380098e-08, |
|
"loss": 0.046, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.718388406236959e-08, |
|
"loss": 0.0195, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.69941955309382e-08, |
|
"loss": 0.0266, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.680450699950681e-08, |
|
"loss": 0.0241, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.661481846807542e-08, |
|
"loss": 0.0169, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.642512993664403e-08, |
|
"loss": 0.0283, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.6235441405212642e-08, |
|
"loss": 0.0369, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.6045752873781252e-08, |
|
"loss": 0.0295, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.5856064342349863e-08, |
|
"loss": 0.0308, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.5666375810918473e-08, |
|
"loss": 0.0299, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.5476687279487084e-08, |
|
"loss": 0.0218, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 1.528699874805569e-08, |
|
"loss": 0.0207, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.50973102166243e-08, |
|
"loss": 0.0214, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.490762168519291e-08, |
|
"loss": 0.0302, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 1.4717933153761522e-08, |
|
"loss": 0.0246, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 1.4528244622330132e-08, |
|
"loss": 0.0241, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 1.4338556090898743e-08, |
|
"loss": 0.0516, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 1.4148867559467353e-08, |
|
"loss": 0.0328, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1.3959179028035964e-08, |
|
"loss": 0.0495, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 1.3769490496604574e-08, |
|
"loss": 0.0216, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.3579801965173184e-08, |
|
"loss": 0.0389, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.3390113433741795e-08, |
|
"loss": 0.0189, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.3200424902310405e-08, |
|
"loss": 0.0179, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.3010736370879016e-08, |
|
"loss": 0.032, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 1.2821047839447626e-08, |
|
"loss": 0.0329, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.2631359308016237e-08, |
|
"loss": 0.0292, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.2441670776584847e-08, |
|
"loss": 0.026, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.2251982245153457e-08, |
|
"loss": 0.0323, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.2062293713722068e-08, |
|
"loss": 0.0223, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 1.1872605182290678e-08, |
|
"loss": 0.0243, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 1.1682916650859289e-08, |
|
"loss": 0.0167, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 1.1493228119427899e-08, |
|
"loss": 0.0222, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.130353958799651e-08, |
|
"loss": 0.0218, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.111385105656512e-08, |
|
"loss": 0.0304, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.092416252513373e-08, |
|
"loss": 0.0357, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 1.073447399370234e-08, |
|
"loss": 0.0377, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 1.0544785462270951e-08, |
|
"loss": 0.0301, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 1.0355096930839562e-08, |
|
"loss": 0.0274, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 1.016540839940817e-08, |
|
"loss": 0.0276, |
|
"step": 105000 |
|
} |
|
], |
|
"max_steps": 131795, |
|
"num_train_epochs": 5, |
|
"total_flos": 1.6644037006596096e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|