|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.05735918320523116, |
|
"eval_steps": 10, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001147183664104623, |
|
"eval_loss": 1.873344898223877, |
|
"eval_runtime": 12.7647, |
|
"eval_samples_per_second": 515.797, |
|
"eval_steps_per_second": 8.069, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.002294367328209246, |
|
"eval_loss": 1.8726389408111572, |
|
"eval_runtime": 12.8667, |
|
"eval_samples_per_second": 511.709, |
|
"eval_steps_per_second": 8.005, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0034415509923138693, |
|
"eval_loss": 1.8714078664779663, |
|
"eval_runtime": 12.9103, |
|
"eval_samples_per_second": 509.979, |
|
"eval_steps_per_second": 7.978, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.004588734656418492, |
|
"eval_loss": 1.8696790933609009, |
|
"eval_runtime": 12.947, |
|
"eval_samples_per_second": 508.534, |
|
"eval_steps_per_second": 7.955, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0057359183205231154, |
|
"eval_loss": 1.8675329685211182, |
|
"eval_runtime": 12.9458, |
|
"eval_samples_per_second": 508.582, |
|
"eval_steps_per_second": 7.956, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.006883101984627739, |
|
"eval_loss": 1.8649154901504517, |
|
"eval_runtime": 13.0432, |
|
"eval_samples_per_second": 504.785, |
|
"eval_steps_per_second": 7.897, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.008030285648732363, |
|
"eval_loss": 1.8619294166564941, |
|
"eval_runtime": 13.0638, |
|
"eval_samples_per_second": 503.988, |
|
"eval_steps_per_second": 7.884, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.009177469312836984, |
|
"eval_loss": 1.8583979606628418, |
|
"eval_runtime": 13.0482, |
|
"eval_samples_per_second": 504.592, |
|
"eval_steps_per_second": 7.894, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.010324652976941608, |
|
"eval_loss": 1.85438871383667, |
|
"eval_runtime": 13.0615, |
|
"eval_samples_per_second": 504.075, |
|
"eval_steps_per_second": 7.886, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.011471836641046231, |
|
"grad_norm": 9.938580513000488, |
|
"learning_rate": 3.8226299694189603e-07, |
|
"loss": 3.1046, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.011471836641046231, |
|
"eval_loss": 1.849947214126587, |
|
"eval_runtime": 13.0663, |
|
"eval_samples_per_second": 503.89, |
|
"eval_steps_per_second": 7.883, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.012619020305150854, |
|
"eval_loss": 1.8451412916183472, |
|
"eval_runtime": 12.9771, |
|
"eval_samples_per_second": 507.357, |
|
"eval_steps_per_second": 7.937, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.013766203969255477, |
|
"eval_loss": 1.8399487733840942, |
|
"eval_runtime": 13.0209, |
|
"eval_samples_per_second": 505.648, |
|
"eval_steps_per_second": 7.91, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0149133876333601, |
|
"eval_loss": 1.8342881202697754, |
|
"eval_runtime": 13.0369, |
|
"eval_samples_per_second": 505.028, |
|
"eval_steps_per_second": 7.901, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.016060571297464726, |
|
"eval_loss": 1.8283486366271973, |
|
"eval_runtime": 13.0149, |
|
"eval_samples_per_second": 505.88, |
|
"eval_steps_per_second": 7.914, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.017207754961569347, |
|
"eval_loss": 1.822334885597229, |
|
"eval_runtime": 13.0213, |
|
"eval_samples_per_second": 505.632, |
|
"eval_steps_per_second": 7.91, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01835493862567397, |
|
"eval_loss": 1.8158738613128662, |
|
"eval_runtime": 13.0599, |
|
"eval_samples_per_second": 504.14, |
|
"eval_steps_per_second": 7.887, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.019502122289778594, |
|
"eval_loss": 1.8090614080429077, |
|
"eval_runtime": 13.034, |
|
"eval_samples_per_second": 505.14, |
|
"eval_steps_per_second": 7.902, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.020649305953883215, |
|
"eval_loss": 1.8015782833099365, |
|
"eval_runtime": 13.0665, |
|
"eval_samples_per_second": 503.885, |
|
"eval_steps_per_second": 7.883, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.02179648961798784, |
|
"eval_loss": 1.793796420097351, |
|
"eval_runtime": 13.0555, |
|
"eval_samples_per_second": 504.31, |
|
"eval_steps_per_second": 7.889, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.022943673282092462, |
|
"grad_norm": 4.906337738037109, |
|
"learning_rate": 7.645259938837921e-07, |
|
"loss": 3.0303, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.022943673282092462, |
|
"eval_loss": 1.785815715789795, |
|
"eval_runtime": 12.9925, |
|
"eval_samples_per_second": 506.754, |
|
"eval_steps_per_second": 7.928, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.024090856946197087, |
|
"eval_loss": 1.7775053977966309, |
|
"eval_runtime": 13.0639, |
|
"eval_samples_per_second": 503.986, |
|
"eval_steps_per_second": 7.884, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.025238040610301708, |
|
"eval_loss": 1.7692992687225342, |
|
"eval_runtime": 13.0129, |
|
"eval_samples_per_second": 505.96, |
|
"eval_steps_per_second": 7.915, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.026385224274406333, |
|
"eval_loss": 1.760453224182129, |
|
"eval_runtime": 13.0078, |
|
"eval_samples_per_second": 506.158, |
|
"eval_steps_per_second": 7.918, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.027532407938510955, |
|
"eval_loss": 1.751396656036377, |
|
"eval_runtime": 12.9957, |
|
"eval_samples_per_second": 506.628, |
|
"eval_steps_per_second": 7.926, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.02867959160261558, |
|
"eval_loss": 1.7417218685150146, |
|
"eval_runtime": 12.9774, |
|
"eval_samples_per_second": 507.344, |
|
"eval_steps_per_second": 7.937, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0298267752667202, |
|
"eval_loss": 1.7319914102554321, |
|
"eval_runtime": 13.0219, |
|
"eval_samples_per_second": 505.611, |
|
"eval_steps_per_second": 7.91, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.030973958930824826, |
|
"eval_loss": 1.7227253913879395, |
|
"eval_runtime": 13.0026, |
|
"eval_samples_per_second": 506.361, |
|
"eval_steps_per_second": 7.922, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.03212114259492945, |
|
"eval_loss": 1.7133797407150269, |
|
"eval_runtime": 12.9757, |
|
"eval_samples_per_second": 507.409, |
|
"eval_steps_per_second": 7.938, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.03326832625903407, |
|
"eval_loss": 1.704041600227356, |
|
"eval_runtime": 12.9845, |
|
"eval_samples_per_second": 507.065, |
|
"eval_steps_per_second": 7.933, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.034415509923138694, |
|
"grad_norm": 4.665822505950928, |
|
"learning_rate": 1.1467889908256882e-06, |
|
"loss": 2.9459, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.034415509923138694, |
|
"eval_loss": 1.6940686702728271, |
|
"eval_runtime": 13.0019, |
|
"eval_samples_per_second": 506.387, |
|
"eval_steps_per_second": 7.922, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.035562693587243316, |
|
"eval_loss": 1.683342695236206, |
|
"eval_runtime": 13.0065, |
|
"eval_samples_per_second": 506.209, |
|
"eval_steps_per_second": 7.919, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.03670987725134794, |
|
"eval_loss": 1.6724653244018555, |
|
"eval_runtime": 13.0129, |
|
"eval_samples_per_second": 505.96, |
|
"eval_steps_per_second": 7.915, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.037857060915452566, |
|
"eval_loss": 1.6614341735839844, |
|
"eval_runtime": 12.9921, |
|
"eval_samples_per_second": 506.769, |
|
"eval_steps_per_second": 7.928, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.03900424457955719, |
|
"eval_loss": 1.6510112285614014, |
|
"eval_runtime": 13.0242, |
|
"eval_samples_per_second": 505.52, |
|
"eval_steps_per_second": 7.908, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.04015142824366181, |
|
"eval_loss": 1.6401513814926147, |
|
"eval_runtime": 12.9214, |
|
"eval_samples_per_second": 509.542, |
|
"eval_steps_per_second": 7.971, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.04129861190776643, |
|
"eval_loss": 1.6295816898345947, |
|
"eval_runtime": 12.9563, |
|
"eval_samples_per_second": 508.171, |
|
"eval_steps_per_second": 7.95, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.04244579557187106, |
|
"eval_loss": 1.6187150478363037, |
|
"eval_runtime": 12.9758, |
|
"eval_samples_per_second": 507.405, |
|
"eval_steps_per_second": 7.938, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.04359297923597568, |
|
"eval_loss": 1.607272982597351, |
|
"eval_runtime": 12.9876, |
|
"eval_samples_per_second": 506.947, |
|
"eval_steps_per_second": 7.931, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.0447401629000803, |
|
"eval_loss": 1.5961676836013794, |
|
"eval_runtime": 12.9782, |
|
"eval_samples_per_second": 507.313, |
|
"eval_steps_per_second": 7.936, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.045887346564184923, |
|
"grad_norm": 4.870114326477051, |
|
"learning_rate": 1.5290519877675841e-06, |
|
"loss": 2.7813, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.045887346564184923, |
|
"eval_loss": 1.5848218202590942, |
|
"eval_runtime": 12.9783, |
|
"eval_samples_per_second": 507.309, |
|
"eval_steps_per_second": 7.936, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.04703453022828955, |
|
"eval_loss": 1.5734797716140747, |
|
"eval_runtime": 12.9739, |
|
"eval_samples_per_second": 507.482, |
|
"eval_steps_per_second": 7.939, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.04818171389239417, |
|
"eval_loss": 1.562021255493164, |
|
"eval_runtime": 12.9388, |
|
"eval_samples_per_second": 508.855, |
|
"eval_steps_per_second": 7.961, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.049328897556498795, |
|
"eval_loss": 1.5495364665985107, |
|
"eval_runtime": 12.9412, |
|
"eval_samples_per_second": 508.764, |
|
"eval_steps_per_second": 7.959, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.050476081220603417, |
|
"eval_loss": 1.5375314950942993, |
|
"eval_runtime": 12.9686, |
|
"eval_samples_per_second": 507.687, |
|
"eval_steps_per_second": 7.942, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.051623264884708045, |
|
"eval_loss": 1.525598168373108, |
|
"eval_runtime": 12.9695, |
|
"eval_samples_per_second": 507.651, |
|
"eval_steps_per_second": 7.942, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.052770448548812667, |
|
"eval_loss": 1.5132672786712646, |
|
"eval_runtime": 12.8961, |
|
"eval_samples_per_second": 510.543, |
|
"eval_steps_per_second": 7.987, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.05391763221291729, |
|
"eval_loss": 1.5012215375900269, |
|
"eval_runtime": 12.9428, |
|
"eval_samples_per_second": 508.7, |
|
"eval_steps_per_second": 7.958, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.05506481587702191, |
|
"eval_loss": 1.4892219305038452, |
|
"eval_runtime": 12.9208, |
|
"eval_samples_per_second": 509.567, |
|
"eval_steps_per_second": 7.972, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.05621199954112653, |
|
"eval_loss": 1.4768636226654053, |
|
"eval_runtime": 12.9423, |
|
"eval_samples_per_second": 508.721, |
|
"eval_steps_per_second": 7.958, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.05735918320523116, |
|
"grad_norm": 4.155641555786133, |
|
"learning_rate": 1.9113149847094803e-06, |
|
"loss": 2.6308, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05735918320523116, |
|
"eval_loss": 1.4640088081359863, |
|
"eval_runtime": 12.8729, |
|
"eval_samples_per_second": 511.462, |
|
"eval_steps_per_second": 8.001, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 26151, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|