|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 39.89333333333333, |
|
"eval_steps": 500, |
|
"global_step": 7480, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005333333333333333, |
|
"grad_norm": 3.9845798015594482, |
|
"learning_rate": 1.3368983957219251e-07, |
|
"loss": 1.2155, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.9973333333333333, |
|
"eval_loss": 0.6746753454208374, |
|
"eval_runtime": 12.9251, |
|
"eval_samples_per_second": 86.963, |
|
"eval_steps_per_second": 14.545, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.7129796147346497, |
|
"eval_runtime": 12.5123, |
|
"eval_samples_per_second": 89.831, |
|
"eval_steps_per_second": 15.025, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.997333333333333, |
|
"eval_loss": 0.8005833029747009, |
|
"eval_runtime": 12.5146, |
|
"eval_samples_per_second": 89.815, |
|
"eval_steps_per_second": 15.022, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 3.7333333333333334, |
|
"grad_norm": 0.7766222357749939, |
|
"learning_rate": 9.358288770053476e-05, |
|
"loss": 0.582, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.8906850814819336, |
|
"eval_runtime": 12.5313, |
|
"eval_samples_per_second": 89.695, |
|
"eval_steps_per_second": 15.002, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.997333333333334, |
|
"eval_loss": 0.9965974688529968, |
|
"eval_runtime": 12.4874, |
|
"eval_samples_per_second": 90.011, |
|
"eval_steps_per_second": 15.055, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.1142816543579102, |
|
"eval_runtime": 12.5004, |
|
"eval_samples_per_second": 89.917, |
|
"eval_steps_per_second": 15.039, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 6.997333333333334, |
|
"eval_loss": 1.200727939605713, |
|
"eval_runtime": 12.5034, |
|
"eval_samples_per_second": 89.896, |
|
"eval_steps_per_second": 15.036, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 7.466666666666667, |
|
"grad_norm": 0.6246238946914673, |
|
"learning_rate": 0.0001871657754010695, |
|
"loss": 0.2905, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.2675864696502686, |
|
"eval_runtime": 12.484, |
|
"eval_samples_per_second": 90.035, |
|
"eval_steps_per_second": 15.059, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.997333333333334, |
|
"eval_loss": 1.3635159730911255, |
|
"eval_runtime": 12.4913, |
|
"eval_samples_per_second": 89.983, |
|
"eval_steps_per_second": 15.05, |
|
"step": 1687 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.4521616697311401, |
|
"eval_runtime": 12.5047, |
|
"eval_samples_per_second": 89.886, |
|
"eval_steps_per_second": 15.034, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 10.997333333333334, |
|
"eval_loss": 1.4860386848449707, |
|
"eval_runtime": 12.4767, |
|
"eval_samples_per_second": 90.088, |
|
"eval_steps_per_second": 15.068, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"grad_norm": 0.3173273503780365, |
|
"learning_rate": 0.00019501438995110992, |
|
"loss": 0.1313, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.5470738410949707, |
|
"eval_runtime": 12.4572, |
|
"eval_samples_per_second": 90.229, |
|
"eval_steps_per_second": 15.092, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 12.997333333333334, |
|
"eval_loss": 1.5745112895965576, |
|
"eval_runtime": 12.4452, |
|
"eval_samples_per_second": 90.316, |
|
"eval_steps_per_second": 15.106, |
|
"step": 2437 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 1.6154626607894897, |
|
"eval_runtime": 12.4425, |
|
"eval_samples_per_second": 90.335, |
|
"eval_steps_per_second": 15.109, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 14.933333333333334, |
|
"grad_norm": 0.269999235868454, |
|
"learning_rate": 0.000177467306667847, |
|
"loss": 0.0448, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 14.997333333333334, |
|
"eval_loss": 1.6521568298339844, |
|
"eval_runtime": 12.453, |
|
"eval_samples_per_second": 90.26, |
|
"eval_steps_per_second": 15.097, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.693098783493042, |
|
"eval_runtime": 12.4351, |
|
"eval_samples_per_second": 90.39, |
|
"eval_steps_per_second": 15.119, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.997333333333334, |
|
"eval_loss": 1.7404099702835083, |
|
"eval_runtime": 12.4667, |
|
"eval_samples_per_second": 90.16, |
|
"eval_steps_per_second": 15.08, |
|
"step": 3187 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 1.7414445877075195, |
|
"eval_runtime": 12.4548, |
|
"eval_samples_per_second": 90.246, |
|
"eval_steps_per_second": 15.095, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 18.666666666666668, |
|
"grad_norm": 0.16562360525131226, |
|
"learning_rate": 0.0001495750499965753, |
|
"loss": 0.0268, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 18.997333333333334, |
|
"eval_loss": 1.7563754320144653, |
|
"eval_runtime": 12.9626, |
|
"eval_samples_per_second": 86.711, |
|
"eval_steps_per_second": 14.503, |
|
"step": 3562 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 1.768137812614441, |
|
"eval_runtime": 12.4413, |
|
"eval_samples_per_second": 90.344, |
|
"eval_steps_per_second": 15.111, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 20.997333333333334, |
|
"eval_loss": 1.7924139499664307, |
|
"eval_runtime": 12.441, |
|
"eval_samples_per_second": 90.346, |
|
"eval_steps_per_second": 15.111, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 1.832397222518921, |
|
"eval_runtime": 12.4418, |
|
"eval_samples_per_second": 90.34, |
|
"eval_steps_per_second": 15.11, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"grad_norm": 0.10293902456760406, |
|
"learning_rate": 0.0001150624200350232, |
|
"loss": 0.0157, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 22.997333333333334, |
|
"eval_loss": 1.8358964920043945, |
|
"eval_runtime": 12.424, |
|
"eval_samples_per_second": 90.47, |
|
"eval_steps_per_second": 15.132, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 1.8424901962280273, |
|
"eval_runtime": 12.4549, |
|
"eval_samples_per_second": 90.246, |
|
"eval_steps_per_second": 15.094, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.997333333333334, |
|
"eval_loss": 1.8898138999938965, |
|
"eval_runtime": 12.4368, |
|
"eval_samples_per_second": 90.377, |
|
"eval_steps_per_second": 15.116, |
|
"step": 4687 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 1.9125876426696777, |
|
"eval_runtime": 12.4412, |
|
"eval_samples_per_second": 90.345, |
|
"eval_steps_per_second": 15.111, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 26.133333333333333, |
|
"grad_norm": 0.07291631400585175, |
|
"learning_rate": 7.853831770392755e-05, |
|
"loss": 0.0061, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 26.997333333333334, |
|
"eval_loss": 1.9254367351531982, |
|
"eval_runtime": 12.4413, |
|
"eval_samples_per_second": 90.345, |
|
"eval_steps_per_second": 15.111, |
|
"step": 5062 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 1.975632667541504, |
|
"eval_runtime": 12.4478, |
|
"eval_samples_per_second": 90.297, |
|
"eval_steps_per_second": 15.103, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 28.997333333333334, |
|
"eval_loss": 1.9888043403625488, |
|
"eval_runtime": 12.4518, |
|
"eval_samples_per_second": 90.268, |
|
"eval_steps_per_second": 15.098, |
|
"step": 5437 |
|
}, |
|
{ |
|
"epoch": 29.866666666666667, |
|
"grad_norm": 0.000998605857603252, |
|
"learning_rate": 4.4880260858357746e-05, |
|
"loss": 0.0011, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 2.0155458450317383, |
|
"eval_runtime": 12.4346, |
|
"eval_samples_per_second": 90.393, |
|
"eval_steps_per_second": 15.119, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 30.997333333333334, |
|
"eval_loss": 2.048884153366089, |
|
"eval_runtime": 12.435, |
|
"eval_samples_per_second": 90.39, |
|
"eval_steps_per_second": 15.119, |
|
"step": 5812 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 2.070535898208618, |
|
"eval_runtime": 12.426, |
|
"eval_samples_per_second": 90.455, |
|
"eval_steps_per_second": 15.13, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 32.99733333333333, |
|
"eval_loss": 2.087083339691162, |
|
"eval_runtime": 12.428, |
|
"eval_samples_per_second": 90.441, |
|
"eval_steps_per_second": 15.127, |
|
"step": 6187 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"grad_norm": 0.0002252462727483362, |
|
"learning_rate": 1.8583028637387568e-05, |
|
"loss": 0.0001, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 2.099353551864624, |
|
"eval_runtime": 12.4444, |
|
"eval_samples_per_second": 90.322, |
|
"eval_steps_per_second": 15.107, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 34.99733333333333, |
|
"eval_loss": 2.1083507537841797, |
|
"eval_runtime": 12.431, |
|
"eval_samples_per_second": 90.419, |
|
"eval_steps_per_second": 15.123, |
|
"step": 6562 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 2.114396333694458, |
|
"eval_runtime": 12.8335, |
|
"eval_samples_per_second": 87.583, |
|
"eval_steps_per_second": 14.649, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 36.99733333333333, |
|
"eval_loss": 2.1184046268463135, |
|
"eval_runtime": 12.4403, |
|
"eval_samples_per_second": 90.352, |
|
"eval_steps_per_second": 15.112, |
|
"step": 6937 |
|
}, |
|
{ |
|
"epoch": 37.333333333333336, |
|
"grad_norm": 0.00023204727040138096, |
|
"learning_rate": 3.1584176769466346e-06, |
|
"loss": 0.0, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 2.12043833732605, |
|
"eval_runtime": 12.6027, |
|
"eval_samples_per_second": 89.187, |
|
"eval_steps_per_second": 14.917, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 38.99733333333333, |
|
"eval_loss": 2.121260643005371, |
|
"eval_runtime": 12.4106, |
|
"eval_samples_per_second": 90.568, |
|
"eval_steps_per_second": 15.148, |
|
"step": 7312 |
|
}, |
|
{ |
|
"epoch": 39.89333333333333, |
|
"eval_loss": 2.1213064193725586, |
|
"eval_runtime": 12.42, |
|
"eval_samples_per_second": 90.499, |
|
"eval_steps_per_second": 15.137, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 39.89333333333333, |
|
"step": 7480, |
|
"total_flos": 1.743333613371392e+18, |
|
"train_loss": 0.10288844243324496, |
|
"train_runtime": 9416.1259, |
|
"train_samples_per_second": 19.108, |
|
"train_steps_per_second": 0.794 |
|
} |
|
], |
|
"logging_steps": 700, |
|
"max_steps": 7480, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.743333613371392e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|