med-alex's picture
End of training
1f0f14c verified
{
"best_metric": 74.67606866807955,
"best_model_checkpoint": "/root/turkic_qa/ru_kaz_models/ru_kaz_xlm_roberta_base_model/checkpoint-4416",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 5520,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"step": 552,
"train_exact_match": 24.275724275724276,
"train_f1": 40.76875114599373,
"train_runtime": 11.4713,
"train_samples_per_second": 88.569,
"train_steps_per_second": 3.225
},
{
"epoch": 1.0,
"grad_norm": 50.65106201171875,
"learning_rate": 5e-06,
"loss": 4.6936,
"step": 552
},
{
"epoch": 1.0,
"eval_exact_match": 24.4375,
"eval_f1": 40.89175449073484,
"eval_runtime": 36.7427,
"eval_samples_per_second": 89.297,
"eval_steps_per_second": 3.212,
"step": 552
},
{
"epoch": 2.0,
"step": 1104,
"train_exact_match": 56.54345654345654,
"train_f1": 72.7903191757748,
"train_runtime": 11.4595,
"train_samples_per_second": 89.707,
"train_steps_per_second": 3.229
},
{
"epoch": 2.0,
"grad_norm": 61.147613525390625,
"learning_rate": 1e-05,
"loss": 2.162,
"step": 1104
},
{
"epoch": 2.0,
"eval_exact_match": 48.28125,
"eval_f1": 67.10891985034017,
"eval_runtime": 36.689,
"eval_samples_per_second": 89.427,
"eval_steps_per_second": 3.216,
"step": 1104
},
{
"epoch": 3.0,
"step": 1656,
"train_exact_match": 60.33966033966034,
"train_f1": 77.53847505132588,
"train_runtime": 11.4178,
"train_samples_per_second": 89.072,
"train_steps_per_second": 3.241
},
{
"epoch": 3.0,
"grad_norm": 43.21469497680664,
"learning_rate": 8.750000000000001e-06,
"loss": 1.4851,
"step": 1656
},
{
"epoch": 3.0,
"eval_exact_match": 54.59375,
"eval_f1": 72.32243072865627,
"eval_runtime": 36.5153,
"eval_samples_per_second": 89.853,
"eval_steps_per_second": 3.232,
"step": 1656
},
{
"epoch": 4.0,
"step": 2208,
"train_exact_match": 68.13186813186813,
"train_f1": 83.71981586843339,
"train_runtime": 11.3106,
"train_samples_per_second": 89.65,
"train_steps_per_second": 3.271
},
{
"epoch": 4.0,
"grad_norm": 39.521087646484375,
"learning_rate": 7.500000000000001e-06,
"loss": 1.1864,
"step": 2208
},
{
"epoch": 4.0,
"eval_exact_match": 55.6875,
"eval_f1": 73.33848461420202,
"eval_runtime": 36.4825,
"eval_samples_per_second": 89.934,
"eval_steps_per_second": 3.234,
"step": 2208
},
{
"epoch": 5.0,
"step": 2760,
"train_exact_match": 72.82717282717283,
"train_f1": 87.11695605040637,
"train_runtime": 11.478,
"train_samples_per_second": 89.04,
"train_steps_per_second": 3.224
},
{
"epoch": 5.0,
"grad_norm": 14.2109956741333,
"learning_rate": 6.25e-06,
"loss": 1.0004,
"step": 2760
},
{
"epoch": 5.0,
"eval_exact_match": 57.5,
"eval_f1": 74.39778409830554,
"eval_runtime": 36.7472,
"eval_samples_per_second": 89.286,
"eval_steps_per_second": 3.211,
"step": 2760
},
{
"epoch": 6.0,
"step": 3312,
"train_exact_match": 76.62337662337663,
"train_f1": 89.20109871672176,
"train_runtime": 11.3687,
"train_samples_per_second": 89.896,
"train_steps_per_second": 3.255
},
{
"epoch": 6.0,
"grad_norm": 37.63686752319336,
"learning_rate": 5e-06,
"loss": 0.8721,
"step": 3312
},
{
"epoch": 6.0,
"eval_exact_match": 57.84375,
"eval_f1": 74.55139589840982,
"eval_runtime": 36.3591,
"eval_samples_per_second": 90.239,
"eval_steps_per_second": 3.245,
"step": 3312
},
{
"epoch": 7.0,
"step": 3864,
"train_exact_match": 78.42157842157842,
"train_f1": 91.01806696135736,
"train_runtime": 11.4573,
"train_samples_per_second": 89.725,
"train_steps_per_second": 3.229
},
{
"epoch": 7.0,
"grad_norm": 21.571279525756836,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.7755,
"step": 3864
},
{
"epoch": 7.0,
"eval_exact_match": 57.6875,
"eval_f1": 74.44520627477633,
"eval_runtime": 36.4488,
"eval_samples_per_second": 90.017,
"eval_steps_per_second": 3.237,
"step": 3864
},
{
"epoch": 8.0,
"step": 4416,
"train_exact_match": 79.12087912087912,
"train_f1": 91.65370167331513,
"train_runtime": 11.3648,
"train_samples_per_second": 90.103,
"train_steps_per_second": 3.256
},
{
"epoch": 8.0,
"grad_norm": 57.543540954589844,
"learning_rate": 2.5e-06,
"loss": 0.7089,
"step": 4416
},
{
"epoch": 8.0,
"eval_exact_match": 58.09375,
"eval_f1": 74.67606866807955,
"eval_runtime": 36.2701,
"eval_samples_per_second": 90.46,
"eval_steps_per_second": 3.253,
"step": 4416
},
{
"epoch": 9.0,
"step": 4968,
"train_exact_match": 80.21978021978022,
"train_f1": 92.17230025305071,
"train_runtime": 11.629,
"train_samples_per_second": 88.916,
"train_steps_per_second": 3.182
},
{
"epoch": 9.0,
"grad_norm": 21.385902404785156,
"learning_rate": 1.25e-06,
"loss": 0.6522,
"step": 4968
},
{
"epoch": 9.0,
"eval_exact_match": 58.3125,
"eval_f1": 74.6689953301795,
"eval_runtime": 36.9735,
"eval_samples_per_second": 88.739,
"eval_steps_per_second": 3.191,
"step": 4968
},
{
"epoch": 10.0,
"step": 5520,
"train_exact_match": 80.21978021978022,
"train_f1": 91.90698139768105,
"train_runtime": 11.5232,
"train_samples_per_second": 89.472,
"train_steps_per_second": 3.211
},
{
"epoch": 10.0,
"grad_norm": 32.45027160644531,
"learning_rate": 0.0,
"loss": 0.6146,
"step": 5520
},
{
"epoch": 10.0,
"eval_exact_match": 57.75,
"eval_f1": 74.50735271348769,
"eval_runtime": 36.529,
"eval_samples_per_second": 89.819,
"eval_steps_per_second": 3.23,
"step": 5520
},
{
"epoch": 10.0,
"step": 5520,
"total_flos": 3.025228525300224e+16,
"train_loss": 1.4150862375895181,
"train_runtime": 3533.8403,
"train_samples_per_second": 43.683,
"train_steps_per_second": 1.562
}
],
"logging_steps": 500,
"max_steps": 5520,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 3.025228525300224e+16,
"train_batch_size": 28,
"trial_name": null,
"trial_params": null
}