|
{ |
|
"best_metric": 0.773286467486819, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-31/checkpoint-2140", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 2140, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 7.312666416168213, |
|
"learning_rate": 0.00024913095817095677, |
|
"loss": 0.5995, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.715875805506737, |
|
"eval_f1": 0.4300822561692127, |
|
"eval_loss": 0.5564725995063782, |
|
"eval_mcc": 0.2978271442275633, |
|
"eval_precision": 0.648936170212766, |
|
"eval_recall": 0.3216168717047452, |
|
"eval_runtime": 3.1678, |
|
"eval_samples_per_second": 538.866, |
|
"eval_steps_per_second": 17.047, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.9955796003341675, |
|
"learning_rate": 0.00022421786235386112, |
|
"loss": 0.533, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7445811364967779, |
|
"eval_f1": 0.5514403292181069, |
|
"eval_loss": 0.5220016837120056, |
|
"eval_mcc": 0.39114524585919325, |
|
"eval_precision": 0.6650124069478908, |
|
"eval_recall": 0.4710017574692443, |
|
"eval_runtime": 3.1849, |
|
"eval_samples_per_second": 535.964, |
|
"eval_steps_per_second": 16.955, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.413587808609009, |
|
"learning_rate": 0.00019930476653676543, |
|
"loss": 0.5076, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7504393673110721, |
|
"eval_f1": 0.4766584766584766, |
|
"eval_loss": 0.5150462985038757, |
|
"eval_mcc": 0.3981605013144147, |
|
"eval_precision": 0.7918367346938775, |
|
"eval_recall": 0.3409490333919156, |
|
"eval_runtime": 3.1543, |
|
"eval_samples_per_second": 541.159, |
|
"eval_steps_per_second": 17.119, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 4.792643070220947, |
|
"learning_rate": 0.00017439167071966975, |
|
"loss": 0.4936, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7568834212067955, |
|
"eval_f1": 0.5077105575326216, |
|
"eval_loss": 0.5015135407447815, |
|
"eval_mcc": 0.4152737161319117, |
|
"eval_precision": 0.781021897810219, |
|
"eval_recall": 0.37609841827768015, |
|
"eval_runtime": 3.1947, |
|
"eval_samples_per_second": 534.327, |
|
"eval_steps_per_second": 16.903, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 7.65413236618042, |
|
"learning_rate": 0.00014947857490257407, |
|
"loss": 0.4718, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7662565905096661, |
|
"eval_f1": 0.5741728922091782, |
|
"eval_loss": 0.4888531267642975, |
|
"eval_mcc": 0.4422168885701626, |
|
"eval_precision": 0.7309782608695652, |
|
"eval_recall": 0.4727592267135325, |
|
"eval_runtime": 3.1595, |
|
"eval_samples_per_second": 540.283, |
|
"eval_steps_per_second": 17.092, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 7.2619218826293945, |
|
"learning_rate": 0.00012456547908547839, |
|
"loss": 0.4651, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7627416520210897, |
|
"eval_f1": 0.5837615621788285, |
|
"eval_loss": 0.48531395196914673, |
|
"eval_mcc": 0.43661648172458356, |
|
"eval_precision": 0.7029702970297029, |
|
"eval_recall": 0.4991212653778559, |
|
"eval_runtime": 3.3544, |
|
"eval_samples_per_second": 508.885, |
|
"eval_steps_per_second": 16.098, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 9.458930969238281, |
|
"learning_rate": 9.965238326838272e-05, |
|
"loss": 0.4553, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7633274751025191, |
|
"eval_f1": 0.536697247706422, |
|
"eval_loss": 0.48649853467941284, |
|
"eval_mcc": 0.4325666465390762, |
|
"eval_precision": 0.7722772277227723, |
|
"eval_recall": 0.4112478031634446, |
|
"eval_runtime": 3.1637, |
|
"eval_samples_per_second": 539.56, |
|
"eval_steps_per_second": 17.069, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 5.576714038848877, |
|
"learning_rate": 7.473928745128703e-05, |
|
"loss": 0.4423, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7668424135910955, |
|
"eval_f1": 0.5738758029978586, |
|
"eval_loss": 0.48117396235466003, |
|
"eval_mcc": 0.4435339132892942, |
|
"eval_precision": 0.7342465753424657, |
|
"eval_recall": 0.4710017574692443, |
|
"eval_runtime": 3.2168, |
|
"eval_samples_per_second": 530.659, |
|
"eval_steps_per_second": 16.787, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 5.577918529510498, |
|
"learning_rate": 4.982619163419136e-05, |
|
"loss": 0.4406, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7721148213239601, |
|
"eval_f1": 0.6026557711950972, |
|
"eval_loss": 0.46937549114227295, |
|
"eval_mcc": 0.4605923234016756, |
|
"eval_precision": 0.7195121951219512, |
|
"eval_recall": 0.5184534270650264, |
|
"eval_runtime": 3.1822, |
|
"eval_samples_per_second": 536.419, |
|
"eval_steps_per_second": 16.969, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 4.4660234451293945, |
|
"learning_rate": 2.491309581709568e-05, |
|
"loss": 0.4322, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.773286467486819, |
|
"eval_f1": 0.6063072227873855, |
|
"eval_loss": 0.47590258717536926, |
|
"eval_mcc": 0.4639025999570181, |
|
"eval_precision": 0.7198067632850241, |
|
"eval_recall": 0.523725834797891, |
|
"eval_runtime": 3.3263, |
|
"eval_samples_per_second": 513.185, |
|
"eval_steps_per_second": 16.234, |
|
"step": 2140 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2354, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 11, |
|
"save_steps": 500, |
|
"total_flos": 5247756644400.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.995655146834002, |
|
"learning_rate": 0.00027404405398805245, |
|
"num_train_epochs": 11, |
|
"temperature": 14 |
|
} |
|
} |
|
|