|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.08815232722143865, |
|
"eval_steps": 500, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01763046544428773, |
|
"grad_norm": 1.5435048341751099, |
|
"learning_rate": 4.9706158909261876e-05, |
|
"loss": 0.8407, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01763046544428773, |
|
"eval_accuracy": 0.8252411503248731, |
|
"eval_loss": 0.7565935850143433, |
|
"eval_runtime": 2895.6955, |
|
"eval_samples_per_second": 32.989, |
|
"eval_steps_per_second": 0.516, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03526093088857546, |
|
"grad_norm": 1.5116485357284546, |
|
"learning_rate": 4.9412317818523744e-05, |
|
"loss": 0.5964, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03526093088857546, |
|
"eval_accuracy": 0.8373293281429335, |
|
"eval_loss": 0.6963507533073425, |
|
"eval_runtime": 2899.1751, |
|
"eval_samples_per_second": 32.949, |
|
"eval_steps_per_second": 0.515, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05289139633286319, |
|
"grad_norm": 1.4373358488082886, |
|
"learning_rate": 4.911847672778562e-05, |
|
"loss": 0.5661, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.05289139633286319, |
|
"eval_accuracy": 0.8443953465863471, |
|
"eval_loss": 0.6656736731529236, |
|
"eval_runtime": 2944.9636, |
|
"eval_samples_per_second": 32.437, |
|
"eval_steps_per_second": 0.507, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.07052186177715092, |
|
"grad_norm": 1.216012716293335, |
|
"learning_rate": 4.882463563704749e-05, |
|
"loss": 0.5402, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07052186177715092, |
|
"eval_accuracy": 0.8482718545347777, |
|
"eval_loss": 0.6440214514732361, |
|
"eval_runtime": 2944.5243, |
|
"eval_samples_per_second": 32.442, |
|
"eval_steps_per_second": 0.507, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08815232722143865, |
|
"grad_norm": 1.0847452878952026, |
|
"learning_rate": 4.853079454630936e-05, |
|
"loss": 0.5237, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.08815232722143865, |
|
"eval_accuracy": 0.8508165457808422, |
|
"eval_loss": 0.6308088898658752, |
|
"eval_runtime": 2933.5042, |
|
"eval_samples_per_second": 32.564, |
|
"eval_steps_per_second": 0.509, |
|
"step": 2500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 85080, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 3.70943641780224e+18, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|