|
{ |
|
"best_metric": 0.6009775400161743, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.10875475802066341, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001087547580206634, |
|
"eval_loss": 2.6140365600585938, |
|
"eval_runtime": 22.8682, |
|
"eval_samples_per_second": 16.967, |
|
"eval_steps_per_second": 4.242, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010875475802066341, |
|
"grad_norm": 1.15413498878479, |
|
"learning_rate": 4.2000000000000004e-05, |
|
"loss": 0.6819, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.021750951604132682, |
|
"grad_norm": 1.6452891826629639, |
|
"learning_rate": 8.400000000000001e-05, |
|
"loss": 0.6379, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03262642740619902, |
|
"grad_norm": 2.625800848007202, |
|
"learning_rate": 0.000126, |
|
"loss": 0.4516, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.043501903208265365, |
|
"grad_norm": 6.286452770233154, |
|
"learning_rate": 0.00016800000000000002, |
|
"loss": 0.5501, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.054377379010331704, |
|
"grad_norm": 4.992573261260986, |
|
"learning_rate": 0.00021, |
|
"loss": 0.4475, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.054377379010331704, |
|
"eval_loss": 0.6009775400161743, |
|
"eval_runtime": 22.7637, |
|
"eval_samples_per_second": 17.045, |
|
"eval_steps_per_second": 4.261, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06525285481239804, |
|
"grad_norm": 1.2332814931869507, |
|
"learning_rate": 0.00020974422527728155, |
|
"loss": 0.2856, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07612833061446438, |
|
"grad_norm": 0.9268720149993896, |
|
"learning_rate": 0.0002089781472178649, |
|
"loss": 0.2699, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08700380641653073, |
|
"grad_norm": 1.5051002502441406, |
|
"learning_rate": 0.0002077054980770496, |
|
"loss": 0.2242, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09787928221859707, |
|
"grad_norm": 4.559469223022461, |
|
"learning_rate": 0.00020593247807352348, |
|
"loss": 0.2854, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10875475802066341, |
|
"grad_norm": 2.309359073638916, |
|
"learning_rate": 0.00020366772518252038, |
|
"loss": 0.4822, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10875475802066341, |
|
"eval_loss": 0.6521262526512146, |
|
"eval_runtime": 22.8192, |
|
"eval_samples_per_second": 17.003, |
|
"eval_steps_per_second": 4.251, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 1 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.804021269528576e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|