|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.010905601116733554, |
|
"eval_steps": 100, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010905601116733554, |
|
"eval_accuracy": 0.5034676821874995, |
|
"eval_loss": 2.249495029449463, |
|
"eval_runtime": 250.8322, |
|
"eval_samples_per_second": 22.182, |
|
"eval_steps_per_second": 0.088, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.002181120223346711, |
|
"eval_accuracy": 0.6792460132466641, |
|
"eval_loss": 1.3580471277236938, |
|
"eval_runtime": 250.935, |
|
"eval_samples_per_second": 22.173, |
|
"eval_steps_per_second": 0.088, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0032716803350200663, |
|
"eval_accuracy": 0.7194358306786471, |
|
"eval_loss": 1.1734950542449951, |
|
"eval_runtime": 249.8616, |
|
"eval_samples_per_second": 22.268, |
|
"eval_steps_per_second": 0.088, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.004362240446693422, |
|
"eval_accuracy": 0.7406081066868597, |
|
"eval_loss": 1.0811182260513306, |
|
"eval_runtime": 250.2334, |
|
"eval_samples_per_second": 22.235, |
|
"eval_steps_per_second": 0.088, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.005452800558366777, |
|
"grad_norm": 1.5124988555908203, |
|
"learning_rate": 4.990911999069389e-05, |
|
"loss": 1.8503, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.005452800558366777, |
|
"eval_accuracy": 0.7539559460484646, |
|
"eval_loss": 1.0170289278030396, |
|
"eval_runtime": 250.2283, |
|
"eval_samples_per_second": 22.236, |
|
"eval_steps_per_second": 0.088, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0065433606700401326, |
|
"eval_accuracy": 0.7639956787495118, |
|
"eval_loss": 0.9728907346725464, |
|
"eval_runtime": 250.2409, |
|
"eval_samples_per_second": 22.235, |
|
"eval_steps_per_second": 0.088, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.007633920781713488, |
|
"eval_accuracy": 0.7704801005740811, |
|
"eval_loss": 0.9435333013534546, |
|
"eval_runtime": 250.3167, |
|
"eval_samples_per_second": 22.228, |
|
"eval_steps_per_second": 0.088, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.008724480893386843, |
|
"eval_accuracy": 0.7747122964825798, |
|
"eval_loss": 0.924051821231842, |
|
"eval_runtime": 250.2963, |
|
"eval_samples_per_second": 22.23, |
|
"eval_steps_per_second": 0.088, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.009815041005060199, |
|
"eval_accuracy": 0.7814711185942613, |
|
"eval_loss": 0.8970186114311218, |
|
"eval_runtime": 250.1321, |
|
"eval_samples_per_second": 22.244, |
|
"eval_steps_per_second": 0.088, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.010905601116733554, |
|
"grad_norm": 1.2721333503723145, |
|
"learning_rate": 4.981823998138778e-05, |
|
"loss": 0.9214, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.010905601116733554, |
|
"eval_accuracy": 0.7872277846758073, |
|
"eval_loss": 0.8725214004516602, |
|
"eval_runtime": 249.8439, |
|
"eval_samples_per_second": 22.27, |
|
"eval_steps_per_second": 0.088, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.010905601116733554, |
|
"step": 1000, |
|
"total_flos": 1.3069163715939533e+18, |
|
"train_loss": 1.3858390502929687, |
|
"train_runtime": 13072.5982, |
|
"train_samples_per_second": 1346.745, |
|
"train_steps_per_second": 21.043 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 275088, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3069163715939533e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|