|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7584, |
|
"global_step": 1896, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9624e-05, |
|
"loss": 0.9388, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9232e-05, |
|
"loss": 0.9522, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8832000000000002e-05, |
|
"loss": 0.8005, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8432000000000002e-05, |
|
"loss": 0.6908, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8032e-05, |
|
"loss": 0.7927, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.6659167604049494, |
|
"eval_loss": 0.7351371645927429, |
|
"eval_runtime": 434.6411, |
|
"eval_samples_per_second": 2.045, |
|
"eval_steps_per_second": 2.045, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.764e-05, |
|
"loss": 0.5962, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.724e-05, |
|
"loss": 0.6989, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.684e-05, |
|
"loss": 0.7051, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6440000000000002e-05, |
|
"loss": 0.6664, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.6040000000000002e-05, |
|
"loss": 0.6547, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.7041619797525309, |
|
"eval_loss": 0.6934666037559509, |
|
"eval_runtime": 436.2176, |
|
"eval_samples_per_second": 2.038, |
|
"eval_steps_per_second": 2.038, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.5640000000000003e-05, |
|
"loss": 0.665, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5240000000000001e-05, |
|
"loss": 0.654, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4840000000000002e-05, |
|
"loss": 0.6714, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.444e-05, |
|
"loss": 0.7395, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4040000000000001e-05, |
|
"loss": 0.5393, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.7142857142857143, |
|
"eval_loss": 0.621578574180603, |
|
"eval_runtime": 436.3187, |
|
"eval_samples_per_second": 2.038, |
|
"eval_steps_per_second": 2.038, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3640000000000002e-05, |
|
"loss": 0.5185, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3240000000000002e-05, |
|
"loss": 0.6009, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.284e-05, |
|
"loss": 0.6588, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2440000000000001e-05, |
|
"loss": 0.6022, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.204e-05, |
|
"loss": 0.7316, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.734533183352081, |
|
"eval_loss": 0.5916205644607544, |
|
"eval_runtime": 436.6514, |
|
"eval_samples_per_second": 2.036, |
|
"eval_steps_per_second": 2.036, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.164e-05, |
|
"loss": 0.6086, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1240000000000002e-05, |
|
"loss": 0.5806, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0840000000000001e-05, |
|
"loss": 0.5992, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0440000000000002e-05, |
|
"loss": 0.5807, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.004e-05, |
|
"loss": 0.5667, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.734533183352081, |
|
"eval_loss": 0.5785398483276367, |
|
"eval_runtime": 436.2096, |
|
"eval_samples_per_second": 2.038, |
|
"eval_steps_per_second": 2.038, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.640000000000001e-06, |
|
"loss": 0.4989, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.240000000000001e-06, |
|
"loss": 0.6015, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.848e-06, |
|
"loss": 0.5728, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.448000000000001e-06, |
|
"loss": 0.6285, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.048e-06, |
|
"loss": 0.498, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.7435320584926884, |
|
"eval_loss": 0.5632913708686829, |
|
"eval_runtime": 436.3374, |
|
"eval_samples_per_second": 2.037, |
|
"eval_steps_per_second": 2.037, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.648e-06, |
|
"loss": 0.5134, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.248000000000001e-06, |
|
"loss": 0.4582, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.848e-06, |
|
"loss": 0.534, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.448000000000001e-06, |
|
"loss": 0.6765, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.048e-06, |
|
"loss": 0.6598, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.7457817772778402, |
|
"eval_loss": 0.565944254398346, |
|
"eval_runtime": 435.7582, |
|
"eval_samples_per_second": 2.04, |
|
"eval_steps_per_second": 2.04, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.648e-06, |
|
"loss": 0.5006, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.248000000000001e-06, |
|
"loss": 0.579, |
|
"step": 1850 |
|
} |
|
], |
|
"max_steps": 2500, |
|
"num_train_epochs": 1, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|