|
{ |
|
"best_metric": 0.7217340363210311, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-5/checkpoint-2782", |
|
"epoch": 14.0, |
|
"eval_steps": 500, |
|
"global_step": 2996, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.161219835281372, |
|
"learning_rate": 0.0001946740481873714, |
|
"loss": 0.2896, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.27429890632629395, |
|
"eval_mcc": 0.0, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 3.1537, |
|
"eval_samples_per_second": 541.276, |
|
"eval_steps_per_second": 17.123, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.0970500707626343, |
|
"learning_rate": 0.00017969912140372742, |
|
"loss": 0.2734, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_f1": 0.006980802792321117, |
|
"eval_loss": 0.2710207402706146, |
|
"eval_mcc": 0.01713474628469157, |
|
"eval_precision": 0.5, |
|
"eval_recall": 0.0035149384885764497, |
|
"eval_runtime": 3.9301, |
|
"eval_samples_per_second": 434.342, |
|
"eval_steps_per_second": 13.74, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.7171841859817505, |
|
"learning_rate": 0.0001647241946200835, |
|
"loss": 0.2685, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6678383128295254, |
|
"eval_f1": 0.010471204188481676, |
|
"eval_loss": 0.27606382966041565, |
|
"eval_mcc": 0.042836865711728934, |
|
"eval_precision": 0.75, |
|
"eval_recall": 0.005272407732864675, |
|
"eval_runtime": 3.1441, |
|
"eval_samples_per_second": 542.926, |
|
"eval_steps_per_second": 17.175, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.334978699684143, |
|
"learning_rate": 0.00014974926783643954, |
|
"loss": 0.266, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6795547744581136, |
|
"eval_f1": 0.10180623973727422, |
|
"eval_loss": 0.26485475897789, |
|
"eval_mcc": 0.14513196526792949, |
|
"eval_precision": 0.775, |
|
"eval_recall": 0.054481546572934976, |
|
"eval_runtime": 3.1938, |
|
"eval_samples_per_second": 534.478, |
|
"eval_steps_per_second": 16.908, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.377930998802185, |
|
"learning_rate": 0.0001347743410527956, |
|
"loss": 0.2643, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6918570591681312, |
|
"eval_f1": 0.18827160493827164, |
|
"eval_loss": 0.26378217339515686, |
|
"eval_mcc": 0.20505841470507494, |
|
"eval_precision": 0.7721518987341772, |
|
"eval_recall": 0.10720562390158173, |
|
"eval_runtime": 3.1292, |
|
"eval_samples_per_second": 545.505, |
|
"eval_steps_per_second": 17.257, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.2771140336990356, |
|
"learning_rate": 0.00011979941426915163, |
|
"loss": 0.263, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6936145284124194, |
|
"eval_f1": 0.18662519440124417, |
|
"eval_loss": 0.26149189472198486, |
|
"eval_mcc": 0.2156164618376391, |
|
"eval_precision": 0.8108108108108109, |
|
"eval_recall": 0.1054481546572935, |
|
"eval_runtime": 3.2914, |
|
"eval_samples_per_second": 518.626, |
|
"eval_steps_per_second": 16.406, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.073453426361084, |
|
"learning_rate": 0.00010482448748550767, |
|
"loss": 0.2612, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6994727592267135, |
|
"eval_f1": 0.21439509954058195, |
|
"eval_loss": 0.2620287537574768, |
|
"eval_mcc": 0.24129962353457945, |
|
"eval_precision": 0.8333333333333334, |
|
"eval_recall": 0.12302284710017575, |
|
"eval_runtime": 3.1567, |
|
"eval_samples_per_second": 540.751, |
|
"eval_steps_per_second": 17.106, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.2691621780395508, |
|
"learning_rate": 8.984956070186371e-05, |
|
"loss": 0.2597, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.69302870533099, |
|
"eval_f1": 0.17088607594936708, |
|
"eval_loss": 0.2611652910709381, |
|
"eval_mcc": 0.21751991027491313, |
|
"eval_precision": 0.8571428571428571, |
|
"eval_recall": 0.09490333919156414, |
|
"eval_runtime": 3.2468, |
|
"eval_samples_per_second": 525.752, |
|
"eval_steps_per_second": 16.632, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 1.0226393938064575, |
|
"learning_rate": 7.487463391821977e-05, |
|
"loss": 0.2597, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6977152899824253, |
|
"eval_f1": 0.19626168224299068, |
|
"eval_loss": 0.2611730098724365, |
|
"eval_mcc": 0.2374955820778862, |
|
"eval_precision": 0.863013698630137, |
|
"eval_recall": 0.11072056239015818, |
|
"eval_runtime": 3.1639, |
|
"eval_samples_per_second": 539.53, |
|
"eval_steps_per_second": 17.068, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.0377492904663086, |
|
"learning_rate": 5.989970713457581e-05, |
|
"loss": 0.2565, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7193907439953134, |
|
"eval_f1": 0.32248939179632247, |
|
"eval_loss": 0.260220468044281, |
|
"eval_mcc": 0.310001756502818, |
|
"eval_precision": 0.8260869565217391, |
|
"eval_recall": 0.20035149384885764, |
|
"eval_runtime": 3.2066, |
|
"eval_samples_per_second": 532.341, |
|
"eval_steps_per_second": 16.84, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 1.2514437437057495, |
|
"learning_rate": 4.4924780350931855e-05, |
|
"loss": 0.2555, |
|
"step": 2354 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.700058582308143, |
|
"eval_f1": 0.20743034055727552, |
|
"eval_loss": 0.26075002551078796, |
|
"eval_mcc": 0.2474956228703306, |
|
"eval_precision": 0.8701298701298701, |
|
"eval_recall": 0.11775043936731107, |
|
"eval_runtime": 3.1394, |
|
"eval_samples_per_second": 543.734, |
|
"eval_steps_per_second": 17.201, |
|
"step": 2354 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.8049026727676392, |
|
"learning_rate": 2.9949853567287906e-05, |
|
"loss": 0.2544, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.715875805506737, |
|
"eval_f1": 0.31593794076163606, |
|
"eval_loss": 0.2587771415710449, |
|
"eval_mcc": 0.29589835954792404, |
|
"eval_precision": 0.8, |
|
"eval_recall": 0.1968365553602812, |
|
"eval_runtime": 3.3501, |
|
"eval_samples_per_second": 509.539, |
|
"eval_steps_per_second": 16.119, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 2.94110369682312, |
|
"learning_rate": 1.4974926783643953e-05, |
|
"loss": 0.2544, |
|
"step": 2782 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7217340363210311, |
|
"eval_f1": 0.3356643356643356, |
|
"eval_loss": 0.2589167356491089, |
|
"eval_mcc": 0.31697199705587376, |
|
"eval_precision": 0.821917808219178, |
|
"eval_recall": 0.210896309314587, |
|
"eval_runtime": 3.165, |
|
"eval_samples_per_second": 539.337, |
|
"eval_steps_per_second": 17.062, |
|
"step": 2782 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 2.4334750175476074, |
|
"learning_rate": 0.0, |
|
"loss": 0.2528, |
|
"step": 2996 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7193907439953134, |
|
"eval_f1": 0.33379694019471484, |
|
"eval_loss": 0.2587186396121979, |
|
"eval_mcc": 0.307265896914303, |
|
"eval_precision": 0.8, |
|
"eval_recall": 0.210896309314587, |
|
"eval_runtime": 3.2491, |
|
"eval_samples_per_second": 525.383, |
|
"eval_steps_per_second": 16.62, |
|
"step": 2996 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2996, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 14, |
|
"save_steps": 500, |
|
"total_flos": 7346859302160.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.43581262355237016, |
|
"learning_rate": 0.00020964897497101535, |
|
"num_train_epochs": 14, |
|
"temperature": 35 |
|
} |
|
} |
|
|