|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 11.48936170212766, |
|
"eval_steps": 500, |
|
"global_step": 2700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.425531914893617, |
|
"grad_norm": 0.7200157046318054, |
|
"learning_rate": 0.0001418439716312057, |
|
"loss": 1.039, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.851063829787234, |
|
"grad_norm": 0.5347419381141663, |
|
"learning_rate": 0.0002836879432624114, |
|
"loss": 0.6682, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2765957446808511, |
|
"grad_norm": 0.7219013571739197, |
|
"learning_rate": 0.000425531914893617, |
|
"loss": 0.5873, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.702127659574468, |
|
"grad_norm": 0.9977912306785583, |
|
"learning_rate": 0.0005673758865248228, |
|
"loss": 0.5932, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"grad_norm": 0.946759819984436, |
|
"learning_rate": 0.0007092198581560284, |
|
"loss": 0.5843, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.5531914893617023, |
|
"grad_norm": 0.9490156769752502, |
|
"learning_rate": 0.000851063829787234, |
|
"loss": 0.5616, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.978723404255319, |
|
"grad_norm": 0.9529285430908203, |
|
"learning_rate": 0.0009929078014184398, |
|
"loss": 0.5976, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.404255319148936, |
|
"grad_norm": 1.0601491928100586, |
|
"learning_rate": 0.0009972024146827632, |
|
"loss": 0.5265, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.829787234042553, |
|
"grad_norm": 1.1251301765441895, |
|
"learning_rate": 0.0009882482608435923, |
|
"loss": 0.5463, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.25531914893617, |
|
"grad_norm": 1.0466716289520264, |
|
"learning_rate": 0.0009732407954960695, |
|
"loss": 0.4658, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.680851063829787, |
|
"grad_norm": 0.9589976072311401, |
|
"learning_rate": 0.0009523660814897424, |
|
"loss": 0.4432, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.1063829787234045, |
|
"grad_norm": 1.2019531726837158, |
|
"learning_rate": 0.0009258829239380381, |
|
"loss": 0.4233, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.531914893617021, |
|
"grad_norm": 0.7805734276771545, |
|
"learning_rate": 0.0008941196615473928, |
|
"loss": 0.3387, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.957446808510638, |
|
"grad_norm": 0.6454946994781494, |
|
"learning_rate": 0.0008574700958675344, |
|
"loss": 0.3554, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.382978723404255, |
|
"grad_norm": 0.8706166744232178, |
|
"learning_rate": 0.0008163886089321493, |
|
"loss": 0.2621, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.808510638297872, |
|
"grad_norm": 0.8278286457061768, |
|
"learning_rate": 0.0007713845298215226, |
|
"loss": 0.267, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.23404255319149, |
|
"grad_norm": 0.6279350519180298, |
|
"learning_rate": 0.0007230158199906162, |
|
"loss": 0.2148, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.659574468085106, |
|
"grad_norm": 0.5926242470741272, |
|
"learning_rate": 0.0006718821556520151, |
|
"loss": 0.1777, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.085106382978724, |
|
"grad_norm": 0.6583319306373596, |
|
"learning_rate": 0.0006186174929784868, |
|
"loss": 0.1703, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.51063829787234, |
|
"grad_norm": 0.6788440942764282, |
|
"learning_rate": 0.0005638822083019267, |
|
"loss": 0.1186, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.936170212765958, |
|
"grad_norm": 0.6308868527412415, |
|
"learning_rate": 0.0005083549107546504, |
|
"loss": 0.1129, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 9.361702127659575, |
|
"grad_norm": 0.46226853132247925, |
|
"learning_rate": 0.000452724028860069, |
|
"loss": 0.0775, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9.787234042553191, |
|
"grad_norm": 0.5111752152442932, |
|
"learning_rate": 0.00039767927538236195, |
|
"loss": 0.0675, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 10.212765957446809, |
|
"grad_norm": 0.2776525020599365, |
|
"learning_rate": 0.00034390309625410685, |
|
"loss": 0.0573, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"grad_norm": 0.22762399911880493, |
|
"learning_rate": 0.00029206220959823183, |
|
"loss": 0.0432, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.063829787234043, |
|
"grad_norm": 0.11530320346355438, |
|
"learning_rate": 0.00024279933974365658, |
|
"loss": 0.0432, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 11.48936170212766, |
|
"grad_norm": 0.20928817987442017, |
|
"learning_rate": 0.0001967252487164663, |
|
"loss": 0.0325, |
|
"step": 2700 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 3525, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0806803429374771e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|