|
{ |
|
"best_metric": 0.7932044522554189, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-22/checkpoint-2568", |
|
"epoch": 14.0, |
|
"eval_steps": 500, |
|
"global_step": 2996, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 6.217133522033691, |
|
"learning_rate": 0.00032487441871616396, |
|
"loss": 0.5489, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7164616285881664, |
|
"eval_f1": 0.39800995024875624, |
|
"eval_loss": 0.5112811923027039, |
|
"eval_mcc": 0.2945528643838721, |
|
"eval_precision": 0.6808510638297872, |
|
"eval_recall": 0.281195079086116, |
|
"eval_runtime": 3.1646, |
|
"eval_samples_per_second": 539.397, |
|
"eval_steps_per_second": 17.064, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.936843991279602, |
|
"learning_rate": 0.00030777576509952376, |
|
"loss": 0.4958, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7363796133567663, |
|
"eval_f1": 0.4966442953020134, |
|
"eval_loss": 0.4881761372089386, |
|
"eval_mcc": 0.3597855187518067, |
|
"eval_precision": 0.683076923076923, |
|
"eval_recall": 0.39015817223198596, |
|
"eval_runtime": 3.9406, |
|
"eval_samples_per_second": 433.184, |
|
"eval_steps_per_second": 13.704, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.401139497756958, |
|
"learning_rate": 0.0002906771114828835, |
|
"loss": 0.481, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7246631517281781, |
|
"eval_f1": 0.35792349726775957, |
|
"eval_loss": 0.4936811029911041, |
|
"eval_mcc": 0.32418684508290907, |
|
"eval_precision": 0.803680981595092, |
|
"eval_recall": 0.23022847100175747, |
|
"eval_runtime": 3.1821, |
|
"eval_samples_per_second": 536.439, |
|
"eval_steps_per_second": 16.97, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.7682132720947266, |
|
"learning_rate": 0.00027357845786624336, |
|
"loss": 0.47, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7510251903925015, |
|
"eval_f1": 0.47595561035758327, |
|
"eval_loss": 0.4672105610370636, |
|
"eval_mcc": 0.4002104345910371, |
|
"eval_precision": 0.7975206611570248, |
|
"eval_recall": 0.3391915641476274, |
|
"eval_runtime": 3.3008, |
|
"eval_samples_per_second": 517.142, |
|
"eval_steps_per_second": 16.359, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 4.235007286071777, |
|
"learning_rate": 0.0002564798042496031, |
|
"loss": 0.4564, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7639132981839485, |
|
"eval_f1": 0.6385650224215247, |
|
"eval_loss": 0.46502017974853516, |
|
"eval_mcc": 0.46359962281417727, |
|
"eval_precision": 0.652014652014652, |
|
"eval_recall": 0.6256590509666081, |
|
"eval_runtime": 3.1354, |
|
"eval_samples_per_second": 544.434, |
|
"eval_steps_per_second": 17.223, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.1712186336517334, |
|
"learning_rate": 0.0002393811506329629, |
|
"loss": 0.4494, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7650849443468073, |
|
"eval_f1": 0.6125603864734299, |
|
"eval_loss": 0.45874524116516113, |
|
"eval_mcc": 0.4509702487241507, |
|
"eval_precision": 0.6802575107296137, |
|
"eval_recall": 0.5571177504393673, |
|
"eval_runtime": 3.3104, |
|
"eval_samples_per_second": 515.646, |
|
"eval_steps_per_second": 16.312, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 5.33981466293335, |
|
"learning_rate": 0.0002222824970163227, |
|
"loss": 0.4385, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7721148213239601, |
|
"eval_f1": 0.5682574916759155, |
|
"eval_loss": 0.4540397524833679, |
|
"eval_mcc": 0.4563006637386888, |
|
"eval_precision": 0.7710843373493976, |
|
"eval_recall": 0.44991212653778556, |
|
"eval_runtime": 3.165, |
|
"eval_samples_per_second": 539.344, |
|
"eval_steps_per_second": 17.062, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.767106294631958, |
|
"learning_rate": 0.00020518384339968248, |
|
"loss": 0.431, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7727006444053895, |
|
"eval_f1": 0.5773420479302832, |
|
"eval_loss": 0.4424216151237488, |
|
"eval_mcc": 0.4580968046120123, |
|
"eval_precision": 0.7593123209169055, |
|
"eval_recall": 0.46572934973637964, |
|
"eval_runtime": 4.1398, |
|
"eval_samples_per_second": 412.342, |
|
"eval_steps_per_second": 13.044, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 7.568024635314941, |
|
"learning_rate": 0.0001880851897830423, |
|
"loss": 0.4269, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7691857059168131, |
|
"eval_f1": 0.5553047404063206, |
|
"eval_loss": 0.4476867616176605, |
|
"eval_mcc": 0.4484665614833703, |
|
"eval_precision": 0.7760252365930599, |
|
"eval_recall": 0.43233743409490333, |
|
"eval_runtime": 3.1524, |
|
"eval_samples_per_second": 541.497, |
|
"eval_steps_per_second": 17.13, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.2477447986602783, |
|
"learning_rate": 0.00017098653616640208, |
|
"loss": 0.4186, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7768014059753954, |
|
"eval_f1": 0.6193806193806194, |
|
"eval_loss": 0.45719364285469055, |
|
"eval_mcc": 0.4744794188942744, |
|
"eval_precision": 0.7175925925925926, |
|
"eval_recall": 0.5448154657293497, |
|
"eval_runtime": 3.2524, |
|
"eval_samples_per_second": 524.847, |
|
"eval_steps_per_second": 16.603, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.3632404804229736, |
|
"learning_rate": 0.00015388788254976188, |
|
"loss": 0.4109, |
|
"step": 2354 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.773286467486819, |
|
"eval_f1": 0.5587229190421892, |
|
"eval_loss": 0.44749346375465393, |
|
"eval_mcc": 0.45996910450651607, |
|
"eval_precision": 0.7954545454545454, |
|
"eval_recall": 0.4305799648506151, |
|
"eval_runtime": 3.1492, |
|
"eval_samples_per_second": 542.04, |
|
"eval_steps_per_second": 17.147, |
|
"step": 2354 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 2.5821080207824707, |
|
"learning_rate": 0.00013678922893312168, |
|
"loss": 0.4098, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7932044522554189, |
|
"eval_f1": 0.6672950047125353, |
|
"eval_loss": 0.43375280499458313, |
|
"eval_mcc": 0.5213017315650612, |
|
"eval_precision": 0.7195121951219512, |
|
"eval_recall": 0.6221441124780316, |
|
"eval_runtime": 3.3029, |
|
"eval_samples_per_second": 516.82, |
|
"eval_steps_per_second": 16.349, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 8.042132377624512, |
|
"learning_rate": 0.00011969057531648145, |
|
"loss": 0.4051, |
|
"step": 2782 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7803163444639719, |
|
"eval_f1": 0.6290801186943621, |
|
"eval_loss": 0.44343388080596924, |
|
"eval_mcc": 0.48417076857505637, |
|
"eval_precision": 0.7194570135746606, |
|
"eval_recall": 0.5588752196836555, |
|
"eval_runtime": 3.1568, |
|
"eval_samples_per_second": 540.741, |
|
"eval_steps_per_second": 17.106, |
|
"step": 2782 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 6.81894063949585, |
|
"learning_rate": 0.00010259192169984124, |
|
"loss": 0.3981, |
|
"step": 2996 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7932044522554189, |
|
"eval_f1": 0.6569484936831876, |
|
"eval_loss": 0.42848843336105347, |
|
"eval_mcc": 0.5172287110706476, |
|
"eval_precision": 0.7347826086956522, |
|
"eval_recall": 0.5940246045694201, |
|
"eval_runtime": 3.2377, |
|
"eval_samples_per_second": 527.23, |
|
"eval_steps_per_second": 16.679, |
|
"step": 2996 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4280, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7346859302160.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.8988863482372167, |
|
"learning_rate": 0.00034197307233280416, |
|
"num_train_epochs": 20, |
|
"temperature": 20 |
|
} |
|
} |
|
|