|
{ |
|
"best_metric": 0.7955477445811365, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-16/checkpoint-3210", |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 4280, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 7.164041519165039, |
|
"learning_rate": 0.0004107110800529193, |
|
"loss": 0.5976, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7182190978324546, |
|
"eval_f1": 0.49208025343189016, |
|
"eval_loss": 0.5573095679283142, |
|
"eval_mcc": 0.32024447377810733, |
|
"eval_precision": 0.6164021164021164, |
|
"eval_recall": 0.4094903339191564, |
|
"eval_runtime": 3.1533, |
|
"eval_samples_per_second": 541.343, |
|
"eval_steps_per_second": 17.125, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.810741424560547, |
|
"learning_rate": 0.00038909470741855514, |
|
"loss": 0.535, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7299355594610427, |
|
"eval_f1": 0.5719591457753018, |
|
"eval_loss": 0.5309674143791199, |
|
"eval_mcc": 0.3769093274057024, |
|
"eval_precision": 0.6062992125984252, |
|
"eval_recall": 0.5413005272407733, |
|
"eval_runtime": 3.2455, |
|
"eval_samples_per_second": 525.953, |
|
"eval_steps_per_second": 16.638, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.5110363960266113, |
|
"learning_rate": 0.00036747833478419095, |
|
"loss": 0.51, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7527826596367897, |
|
"eval_f1": 0.5290178571428571, |
|
"eval_loss": 0.5084466934204102, |
|
"eval_mcc": 0.4042060476032326, |
|
"eval_precision": 0.7247706422018348, |
|
"eval_recall": 0.4165202108963093, |
|
"eval_runtime": 3.1369, |
|
"eval_samples_per_second": 544.167, |
|
"eval_steps_per_second": 17.214, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.1826329231262207, |
|
"learning_rate": 0.0003458619621498268, |
|
"loss": 0.4995, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7451669595782073, |
|
"eval_f1": 0.49241540256709454, |
|
"eval_loss": 0.5046694278717041, |
|
"eval_mcc": 0.38160728386386483, |
|
"eval_precision": 0.7326388888888888, |
|
"eval_recall": 0.37082601054481545, |
|
"eval_runtime": 3.3202, |
|
"eval_samples_per_second": 514.123, |
|
"eval_steps_per_second": 16.264, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 6.0804123878479, |
|
"learning_rate": 0.0003242455895154626, |
|
"loss": 0.4853, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7574692442882249, |
|
"eval_f1": 0.6393728222996515, |
|
"eval_loss": 0.4947313964366913, |
|
"eval_mcc": 0.45673224487908876, |
|
"eval_precision": 0.6338514680483592, |
|
"eval_recall": 0.6449912126537786, |
|
"eval_runtime": 3.1788, |
|
"eval_samples_per_second": 536.989, |
|
"eval_steps_per_second": 16.987, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.1514229774475098, |
|
"learning_rate": 0.0003026292168810984, |
|
"loss": 0.4724, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7650849443468073, |
|
"eval_f1": 0.6598812553011025, |
|
"eval_loss": 0.477894127368927, |
|
"eval_mcc": 0.4814730312998762, |
|
"eval_precision": 0.6377049180327868, |
|
"eval_recall": 0.6836555360281195, |
|
"eval_runtime": 3.2099, |
|
"eval_samples_per_second": 531.785, |
|
"eval_steps_per_second": 16.823, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 5.599643230438232, |
|
"learning_rate": 0.0002810128442467343, |
|
"loss": 0.46, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.773286467486819, |
|
"eval_f1": 0.5798045602605862, |
|
"eval_loss": 0.47905173897743225, |
|
"eval_mcc": 0.45971678817188244, |
|
"eval_precision": 0.7585227272727273, |
|
"eval_recall": 0.46924428822495606, |
|
"eval_runtime": 3.1625, |
|
"eval_samples_per_second": 539.761, |
|
"eval_steps_per_second": 17.075, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.2664639949798584, |
|
"learning_rate": 0.0002593964716123701, |
|
"loss": 0.4428, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7885178676039836, |
|
"eval_f1": 0.6386386386386386, |
|
"eval_loss": 0.4652605950832367, |
|
"eval_mcc": 0.5028818263800968, |
|
"eval_precision": 0.7418604651162791, |
|
"eval_recall": 0.5606326889279437, |
|
"eval_runtime": 3.2551, |
|
"eval_samples_per_second": 524.404, |
|
"eval_steps_per_second": 16.589, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 5.06269645690918, |
|
"learning_rate": 0.00023778009897800593, |
|
"loss": 0.4442, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7773872290568249, |
|
"eval_f1": 0.6008403361344538, |
|
"eval_loss": 0.47534701228141785, |
|
"eval_mcc": 0.4716667178372095, |
|
"eval_precision": 0.7467362924281984, |
|
"eval_recall": 0.5026362038664324, |
|
"eval_runtime": 3.1553, |
|
"eval_samples_per_second": 541.002, |
|
"eval_steps_per_second": 17.114, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 5.0700459480285645, |
|
"learning_rate": 0.00021616372634364174, |
|
"loss": 0.4307, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7838312829525483, |
|
"eval_f1": 0.6666666666666666, |
|
"eval_loss": 0.47036686539649963, |
|
"eval_mcc": 0.5073397319926002, |
|
"eval_precision": 0.6858736059479554, |
|
"eval_recall": 0.648506151142355, |
|
"eval_runtime": 3.3854, |
|
"eval_samples_per_second": 504.231, |
|
"eval_steps_per_second": 15.951, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 6.991399765014648, |
|
"learning_rate": 0.00019454735370927757, |
|
"loss": 0.4243, |
|
"step": 2354 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7867603983596954, |
|
"eval_f1": 0.636, |
|
"eval_loss": 0.4695989191532135, |
|
"eval_mcc": 0.49868088180967785, |
|
"eval_precision": 0.7378190255220418, |
|
"eval_recall": 0.5588752196836555, |
|
"eval_runtime": 3.1405, |
|
"eval_samples_per_second": 543.539, |
|
"eval_steps_per_second": 17.195, |
|
"step": 2354 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 5.208732604980469, |
|
"learning_rate": 0.0001729309810749134, |
|
"loss": 0.4198, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7896895137668424, |
|
"eval_f1": 0.666046511627907, |
|
"eval_loss": 0.45460793375968933, |
|
"eval_mcc": 0.5152127188733497, |
|
"eval_precision": 0.7075098814229249, |
|
"eval_recall": 0.6291739894551845, |
|
"eval_runtime": 3.8567, |
|
"eval_samples_per_second": 442.612, |
|
"eval_steps_per_second": 14.002, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 16.56888771057129, |
|
"learning_rate": 0.0001513146084405492, |
|
"loss": 0.4199, |
|
"step": 2782 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7908611599297012, |
|
"eval_f1": 0.6641580432737535, |
|
"eval_loss": 0.46026891469955444, |
|
"eval_mcc": 0.5161067927608803, |
|
"eval_precision": 0.7145748987854251, |
|
"eval_recall": 0.6203866432337434, |
|
"eval_runtime": 3.1632, |
|
"eval_samples_per_second": 539.639, |
|
"eval_steps_per_second": 17.071, |
|
"step": 2782 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 6.45969820022583, |
|
"learning_rate": 0.00012969823580618505, |
|
"loss": 0.4135, |
|
"step": 2996 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.789103690685413, |
|
"eval_f1": 0.6622889305816135, |
|
"eval_loss": 0.4559510350227356, |
|
"eval_mcc": 0.5124487501229456, |
|
"eval_precision": 0.710261569416499, |
|
"eval_recall": 0.6203866432337434, |
|
"eval_runtime": 3.2281, |
|
"eval_samples_per_second": 528.794, |
|
"eval_steps_per_second": 16.728, |
|
"step": 2996 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.7556620836257935, |
|
"learning_rate": 0.00010808186317182087, |
|
"loss": 0.4066, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7955477445811365, |
|
"eval_f1": 0.6666666666666667, |
|
"eval_loss": 0.4586973190307617, |
|
"eval_mcc": 0.5249372343364972, |
|
"eval_precision": 0.7301255230125523, |
|
"eval_recall": 0.6133567662565905, |
|
"eval_runtime": 3.1701, |
|
"eval_samples_per_second": 538.473, |
|
"eval_steps_per_second": 17.034, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 7.368743896484375, |
|
"learning_rate": 8.64654905374567e-05, |
|
"loss": 0.4014, |
|
"step": 3424 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7955477445811365, |
|
"eval_f1": 0.6812785388127853, |
|
"eval_loss": 0.47202157974243164, |
|
"eval_mcc": 0.5320125467778077, |
|
"eval_precision": 0.7091254752851711, |
|
"eval_recall": 0.655536028119508, |
|
"eval_runtime": 3.2344, |
|
"eval_samples_per_second": 527.767, |
|
"eval_steps_per_second": 16.696, |
|
"step": 3424 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 7.043065071105957, |
|
"learning_rate": 6.484911790309252e-05, |
|
"loss": 0.3956, |
|
"step": 3638 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7937902753368483, |
|
"eval_f1": 0.6752767527675276, |
|
"eval_loss": 0.46733909845352173, |
|
"eval_mcc": 0.5261527137420708, |
|
"eval_precision": 0.7106796116504854, |
|
"eval_recall": 0.6432337434094904, |
|
"eval_runtime": 3.1521, |
|
"eval_samples_per_second": 541.552, |
|
"eval_steps_per_second": 17.132, |
|
"step": 3638 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 5.637269973754883, |
|
"learning_rate": 4.323274526872835e-05, |
|
"loss": 0.3939, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7932044522554189, |
|
"eval_f1": 0.6716279069767441, |
|
"eval_loss": 0.459559828042984, |
|
"eval_mcc": 0.5233763006864838, |
|
"eval_precision": 0.7134387351778656, |
|
"eval_recall": 0.6344463971880492, |
|
"eval_runtime": 3.1932, |
|
"eval_samples_per_second": 534.573, |
|
"eval_steps_per_second": 16.911, |
|
"step": 3852 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.5581817626953125, |
|
"learning_rate": 2.1616372634364176e-05, |
|
"loss": 0.3877, |
|
"step": 4066 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7902753368482718, |
|
"eval_f1": 0.6709558823529412, |
|
"eval_loss": 0.4660806357860565, |
|
"eval_mcc": 0.5186992097593167, |
|
"eval_precision": 0.7032755298651252, |
|
"eval_recall": 0.6414762741652021, |
|
"eval_runtime": 3.1559, |
|
"eval_samples_per_second": 540.898, |
|
"eval_steps_per_second": 17.111, |
|
"step": 4066 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 9.65938663482666, |
|
"learning_rate": 0.0, |
|
"loss": 0.388, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7902753368482718, |
|
"eval_f1": 0.6792114695340501, |
|
"eval_loss": 0.4644205868244171, |
|
"eval_mcc": 0.5237383263210195, |
|
"eval_precision": 0.6928702010968921, |
|
"eval_recall": 0.6660808435852372, |
|
"eval_runtime": 3.8864, |
|
"eval_samples_per_second": 439.228, |
|
"eval_steps_per_second": 13.895, |
|
"step": 4280 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4280, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 10495513288800.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.9950627760257441, |
|
"learning_rate": 0.0004323274526872835, |
|
"num_train_epochs": 20, |
|
"temperature": 49 |
|
} |
|
} |
|
|