|
{ |
|
"best_metric": 14.6014, |
|
"best_model_checkpoint": "./ko-zh/checkpoint-7500", |
|
"epoch": 7.2337575351640995, |
|
"eval_steps": 1500, |
|
"global_step": 13500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.0954, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5161, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.966051059206953e-05, |
|
"loss": 1.3824, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_bleu": 11.4445, |
|
"eval_gen_len": 15.774, |
|
"eval_loss": 1.301085352897644, |
|
"eval_runtime": 894.2268, |
|
"eval_samples_per_second": 16.693, |
|
"eval_steps_per_second": 1.043, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.9321021184139056e-05, |
|
"loss": 1.245, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.898153177620858e-05, |
|
"loss": 1.0757, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.8642042368278115e-05, |
|
"loss": 1.0646, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_bleu": 13.1811, |
|
"eval_gen_len": 15.6757, |
|
"eval_loss": 1.1915582418441772, |
|
"eval_runtime": 882.4536, |
|
"eval_samples_per_second": 16.915, |
|
"eval_steps_per_second": 1.057, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 4.830255296034764e-05, |
|
"loss": 1.051, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.796306355241716e-05, |
|
"loss": 0.9135, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.7623574144486695e-05, |
|
"loss": 0.8071, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_bleu": 14.1901, |
|
"eval_gen_len": 15.2832, |
|
"eval_loss": 1.186390995979309, |
|
"eval_runtime": 873.6287, |
|
"eval_samples_per_second": 17.086, |
|
"eval_steps_per_second": 1.068, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 4.728408473655622e-05, |
|
"loss": 0.8236, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.694459532862575e-05, |
|
"loss": 0.8252, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 4.6605105920695275e-05, |
|
"loss": 0.6496, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_bleu": 14.3496, |
|
"eval_gen_len": 15.5238, |
|
"eval_loss": 1.1978733539581299, |
|
"eval_runtime": 857.9115, |
|
"eval_samples_per_second": 17.399, |
|
"eval_steps_per_second": 1.088, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.62656165127648e-05, |
|
"loss": 0.6263, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 4.5926127104834335e-05, |
|
"loss": 0.6425, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 4.558663769690386e-05, |
|
"loss": 0.6365, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_bleu": 14.6014, |
|
"eval_gen_len": 15.5634, |
|
"eval_loss": 1.251100778579712, |
|
"eval_runtime": 857.2309, |
|
"eval_samples_per_second": 17.413, |
|
"eval_steps_per_second": 1.088, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 4.524714828897338e-05, |
|
"loss": 0.4573, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 4.4907658881042915e-05, |
|
"loss": 0.4783, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 4.456816947311244e-05, |
|
"loss": 0.4942, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"eval_bleu": 14.3411, |
|
"eval_gen_len": 15.4888, |
|
"eval_loss": 1.252127766609192, |
|
"eval_runtime": 854.4386, |
|
"eval_samples_per_second": 17.47, |
|
"eval_steps_per_second": 1.092, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 4.422868006518197e-05, |
|
"loss": 0.4455, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 4.3889190657251494e-05, |
|
"loss": 0.3442, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 4.354970124932102e-05, |
|
"loss": 0.3632, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"eval_bleu": 14.204, |
|
"eval_gen_len": 15.4075, |
|
"eval_loss": 1.3326140642166138, |
|
"eval_runtime": 847.6182, |
|
"eval_samples_per_second": 17.611, |
|
"eval_steps_per_second": 1.101, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 4.3210211841390554e-05, |
|
"loss": 0.3762, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 4.2870722433460074e-05, |
|
"loss": 0.2971, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 4.253123302552961e-05, |
|
"loss": 0.2601, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_bleu": 14.1714, |
|
"eval_gen_len": 15.4783, |
|
"eval_loss": 1.4028061628341675, |
|
"eval_runtime": 854.4224, |
|
"eval_samples_per_second": 17.47, |
|
"eval_steps_per_second": 1.092, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 4.2191743617599134e-05, |
|
"loss": 0.2697, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 4.185225420966866e-05, |
|
"loss": 0.2834, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 4.151276480173819e-05, |
|
"loss": 0.1919, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"eval_bleu": 13.9406, |
|
"eval_gen_len": 15.4543, |
|
"eval_loss": 1.4764152765274048, |
|
"eval_runtime": 851.0189, |
|
"eval_samples_per_second": 17.54, |
|
"eval_steps_per_second": 1.096, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"step": 13500, |
|
"total_flos": 1.872400119496704e+18, |
|
"train_loss": 0.7265107291892723, |
|
"train_runtime": 49668.6266, |
|
"train_samples_per_second": 96.178, |
|
"train_steps_per_second": 1.503 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 74640, |
|
"num_train_epochs": 40, |
|
"save_steps": 1500, |
|
"total_flos": 1.872400119496704e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|