|
{ |
|
"best_metric": 74.67606866807955, |
|
"best_model_checkpoint": "/root/turkic_qa/ru_kaz_models/ru_kaz_xlm_roberta_base_model/checkpoint-4416", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 5520, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 552, |
|
"train_exact_match": 24.275724275724276, |
|
"train_f1": 40.76875114599373, |
|
"train_runtime": 11.4713, |
|
"train_samples_per_second": 88.569, |
|
"train_steps_per_second": 3.225 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 50.65106201171875, |
|
"learning_rate": 5e-06, |
|
"loss": 4.6936, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 24.4375, |
|
"eval_f1": 40.89175449073484, |
|
"eval_runtime": 36.7427, |
|
"eval_samples_per_second": 89.297, |
|
"eval_steps_per_second": 3.212, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1104, |
|
"train_exact_match": 56.54345654345654, |
|
"train_f1": 72.7903191757748, |
|
"train_runtime": 11.4595, |
|
"train_samples_per_second": 89.707, |
|
"train_steps_per_second": 3.229 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 61.147613525390625, |
|
"learning_rate": 1e-05, |
|
"loss": 2.162, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 48.28125, |
|
"eval_f1": 67.10891985034017, |
|
"eval_runtime": 36.689, |
|
"eval_samples_per_second": 89.427, |
|
"eval_steps_per_second": 3.216, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1656, |
|
"train_exact_match": 60.33966033966034, |
|
"train_f1": 77.53847505132588, |
|
"train_runtime": 11.4178, |
|
"train_samples_per_second": 89.072, |
|
"train_steps_per_second": 3.241 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 43.21469497680664, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 1.4851, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 54.59375, |
|
"eval_f1": 72.32243072865627, |
|
"eval_runtime": 36.5153, |
|
"eval_samples_per_second": 89.853, |
|
"eval_steps_per_second": 3.232, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2208, |
|
"train_exact_match": 68.13186813186813, |
|
"train_f1": 83.71981586843339, |
|
"train_runtime": 11.3106, |
|
"train_samples_per_second": 89.65, |
|
"train_steps_per_second": 3.271 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 39.521087646484375, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.1864, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 55.6875, |
|
"eval_f1": 73.33848461420202, |
|
"eval_runtime": 36.4825, |
|
"eval_samples_per_second": 89.934, |
|
"eval_steps_per_second": 3.234, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 2760, |
|
"train_exact_match": 72.82717282717283, |
|
"train_f1": 87.11695605040637, |
|
"train_runtime": 11.478, |
|
"train_samples_per_second": 89.04, |
|
"train_steps_per_second": 3.224 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 14.2109956741333, |
|
"learning_rate": 6.25e-06, |
|
"loss": 1.0004, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 57.5, |
|
"eval_f1": 74.39778409830554, |
|
"eval_runtime": 36.7472, |
|
"eval_samples_per_second": 89.286, |
|
"eval_steps_per_second": 3.211, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 3312, |
|
"train_exact_match": 76.62337662337663, |
|
"train_f1": 89.20109871672176, |
|
"train_runtime": 11.3687, |
|
"train_samples_per_second": 89.896, |
|
"train_steps_per_second": 3.255 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 37.63686752319336, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8721, |
|
"step": 3312 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 57.84375, |
|
"eval_f1": 74.55139589840982, |
|
"eval_runtime": 36.3591, |
|
"eval_samples_per_second": 90.239, |
|
"eval_steps_per_second": 3.245, |
|
"step": 3312 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 3864, |
|
"train_exact_match": 78.42157842157842, |
|
"train_f1": 91.01806696135736, |
|
"train_runtime": 11.4573, |
|
"train_samples_per_second": 89.725, |
|
"train_steps_per_second": 3.229 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 21.571279525756836, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.7755, |
|
"step": 3864 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 57.6875, |
|
"eval_f1": 74.44520627477633, |
|
"eval_runtime": 36.4488, |
|
"eval_samples_per_second": 90.017, |
|
"eval_steps_per_second": 3.237, |
|
"step": 3864 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 4416, |
|
"train_exact_match": 79.12087912087912, |
|
"train_f1": 91.65370167331513, |
|
"train_runtime": 11.3648, |
|
"train_samples_per_second": 90.103, |
|
"train_steps_per_second": 3.256 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 57.543540954589844, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.7089, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 58.09375, |
|
"eval_f1": 74.67606866807955, |
|
"eval_runtime": 36.2701, |
|
"eval_samples_per_second": 90.46, |
|
"eval_steps_per_second": 3.253, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 4968, |
|
"train_exact_match": 80.21978021978022, |
|
"train_f1": 92.17230025305071, |
|
"train_runtime": 11.629, |
|
"train_samples_per_second": 88.916, |
|
"train_steps_per_second": 3.182 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 21.385902404785156, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.6522, |
|
"step": 4968 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 58.3125, |
|
"eval_f1": 74.6689953301795, |
|
"eval_runtime": 36.9735, |
|
"eval_samples_per_second": 88.739, |
|
"eval_steps_per_second": 3.191, |
|
"step": 4968 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 5520, |
|
"train_exact_match": 80.21978021978022, |
|
"train_f1": 91.90698139768105, |
|
"train_runtime": 11.5232, |
|
"train_samples_per_second": 89.472, |
|
"train_steps_per_second": 3.211 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 32.45027160644531, |
|
"learning_rate": 0.0, |
|
"loss": 0.6146, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 57.75, |
|
"eval_f1": 74.50735271348769, |
|
"eval_runtime": 36.529, |
|
"eval_samples_per_second": 89.819, |
|
"eval_steps_per_second": 3.23, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 5520, |
|
"total_flos": 3.025228525300224e+16, |
|
"train_loss": 1.4150862375895181, |
|
"train_runtime": 3533.8403, |
|
"train_samples_per_second": 43.683, |
|
"train_steps_per_second": 1.562 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5520, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 3.025228525300224e+16, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|