|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.002319468068656255, |
|
"eval_steps": 8, |
|
"global_step": 30, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 7.731560228854182e-05, |
|
"eval_loss": 0.7148739695549011, |
|
"eval_runtime": 494.7064, |
|
"eval_samples_per_second": 11.009, |
|
"eval_steps_per_second": 5.504, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0002319468068656255, |
|
"grad_norm": 4.81842041015625, |
|
"learning_rate": 6e-05, |
|
"loss": 11.6946, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.000463893613731251, |
|
"grad_norm": 6.081270217895508, |
|
"learning_rate": 0.00012, |
|
"loss": 9.5971, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0006185248183083346, |
|
"eval_loss": 0.7004389762878418, |
|
"eval_runtime": 497.7994, |
|
"eval_samples_per_second": 10.94, |
|
"eval_steps_per_second": 5.47, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0006958404205968764, |
|
"grad_norm": 10.870305061340332, |
|
"learning_rate": 0.00018, |
|
"loss": 8.6051, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.000927787227462502, |
|
"grad_norm": 9.37214183807373, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 7.6237, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0011597340343281275, |
|
"grad_norm": 13.013519287109375, |
|
"learning_rate": 0.00017071067811865476, |
|
"loss": 6.9112, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0012370496366166692, |
|
"eval_loss": 0.7165130376815796, |
|
"eval_runtime": 498.1829, |
|
"eval_samples_per_second": 10.932, |
|
"eval_steps_per_second": 5.466, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0013916808411937528, |
|
"grad_norm": 63.50639724731445, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 6.4336, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0016236276480593784, |
|
"grad_norm": 4.184946537017822, |
|
"learning_rate": 8.435655349597689e-05, |
|
"loss": 4.8627, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.001855574454925004, |
|
"grad_norm": 5.131363391876221, |
|
"learning_rate": 4.12214747707527e-05, |
|
"loss": 5.2544, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.001855574454925004, |
|
"eval_loss": 0.645858108997345, |
|
"eval_runtime": 498.1385, |
|
"eval_samples_per_second": 10.933, |
|
"eval_steps_per_second": 5.466, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0020875212617906293, |
|
"grad_norm": 4.872713565826416, |
|
"learning_rate": 1.0899347581163221e-05, |
|
"loss": 5.5263, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.002319468068656255, |
|
"grad_norm": 3.9103922843933105, |
|
"learning_rate": 0.0, |
|
"loss": 4.5022, |
|
"step": 30 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 30, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5957676102057984.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|