|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.004024934469035676, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.012467234517838e-05, |
|
"eval_loss": 2.1559741497039795, |
|
"eval_runtime": 857.823, |
|
"eval_samples_per_second": 24.391, |
|
"eval_steps_per_second": 12.196, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0002012467234517838, |
|
"grad_norm": 6.170618534088135, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4469, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0004024934469035676, |
|
"grad_norm": 1.0294487476348877, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2263, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0006037401703553514, |
|
"grad_norm": 0.6725739240646362, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2202, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0008049868938071352, |
|
"grad_norm": 1.0152796506881714, |
|
"learning_rate": 0.0002, |
|
"loss": 1.29, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.001006233617258919, |
|
"grad_norm": 0.8606889843940735, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3785, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.001006233617258919, |
|
"eval_loss": 1.4589308500289917, |
|
"eval_runtime": 858.7739, |
|
"eval_samples_per_second": 24.364, |
|
"eval_steps_per_second": 12.182, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0012074803407107028, |
|
"grad_norm": 0.7305423021316528, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2205, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0014087270641624865, |
|
"grad_norm": 1.4533841609954834, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2138, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0016099737876142705, |
|
"grad_norm": 1.0131192207336426, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3712, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0018112205110660542, |
|
"grad_norm": 0.9000447988510132, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1396, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.002012467234517838, |
|
"grad_norm": 1.9898265600204468, |
|
"learning_rate": 0.0002, |
|
"loss": 1.229, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.002012467234517838, |
|
"eval_loss": 1.4349274635314941, |
|
"eval_runtime": 858.4486, |
|
"eval_samples_per_second": 24.373, |
|
"eval_steps_per_second": 12.187, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.002213713957969622, |
|
"grad_norm": 1.271390676498413, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3306, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0024149606814214057, |
|
"grad_norm": 0.9250841736793518, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1564, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0026162074048731894, |
|
"grad_norm": 1.0612225532531738, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2448, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.002817454128324973, |
|
"grad_norm": 0.643467366695404, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1702, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.003018700851776757, |
|
"grad_norm": 1.3284869194030762, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2826, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.003018700851776757, |
|
"eval_loss": 1.407882809638977, |
|
"eval_runtime": 858.6574, |
|
"eval_samples_per_second": 24.367, |
|
"eval_steps_per_second": 12.184, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.003219947575228541, |
|
"grad_norm": 1.3226466178894043, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3085, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0034211942986803246, |
|
"grad_norm": 1.9192092418670654, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1933, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0036224410221321083, |
|
"grad_norm": 1.0368775129318237, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2231, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.003823687745583892, |
|
"grad_norm": 1.5248116254806519, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2734, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.004024934469035676, |
|
"grad_norm": 0.9554014801979065, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1498, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.004024934469035676, |
|
"eval_loss": 1.397660255432129, |
|
"eval_runtime": 858.6574, |
|
"eval_samples_per_second": 24.367, |
|
"eval_steps_per_second": 12.184, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.68217396985856e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|