xuancoblab2023's picture
Training in progress, epoch 7
74ee53a verified
raw
history blame
8.43 kB
{
"best_metric": 0.7217340363210311,
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-5/checkpoint-2782",
"epoch": 14.0,
"eval_steps": 500,
"global_step": 2996,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 2.161219835281372,
"learning_rate": 0.0001946740481873714,
"loss": 0.2896,
"step": 214
},
{
"epoch": 1.0,
"eval_accuracy": 0.6666666666666666,
"eval_f1": 0.0,
"eval_loss": 0.27429890632629395,
"eval_mcc": 0.0,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 3.1537,
"eval_samples_per_second": 541.276,
"eval_steps_per_second": 17.123,
"step": 214
},
{
"epoch": 2.0,
"grad_norm": 1.0970500707626343,
"learning_rate": 0.00017969912140372742,
"loss": 0.2734,
"step": 428
},
{
"epoch": 2.0,
"eval_accuracy": 0.6666666666666666,
"eval_f1": 0.006980802792321117,
"eval_loss": 0.2710207402706146,
"eval_mcc": 0.01713474628469157,
"eval_precision": 0.5,
"eval_recall": 0.0035149384885764497,
"eval_runtime": 3.9301,
"eval_samples_per_second": 434.342,
"eval_steps_per_second": 13.74,
"step": 428
},
{
"epoch": 3.0,
"grad_norm": 0.7171841859817505,
"learning_rate": 0.0001647241946200835,
"loss": 0.2685,
"step": 642
},
{
"epoch": 3.0,
"eval_accuracy": 0.6678383128295254,
"eval_f1": 0.010471204188481676,
"eval_loss": 0.27606382966041565,
"eval_mcc": 0.042836865711728934,
"eval_precision": 0.75,
"eval_recall": 0.005272407732864675,
"eval_runtime": 3.1441,
"eval_samples_per_second": 542.926,
"eval_steps_per_second": 17.175,
"step": 642
},
{
"epoch": 4.0,
"grad_norm": 1.334978699684143,
"learning_rate": 0.00014974926783643954,
"loss": 0.266,
"step": 856
},
{
"epoch": 4.0,
"eval_accuracy": 0.6795547744581136,
"eval_f1": 0.10180623973727422,
"eval_loss": 0.26485475897789,
"eval_mcc": 0.14513196526792949,
"eval_precision": 0.775,
"eval_recall": 0.054481546572934976,
"eval_runtime": 3.1938,
"eval_samples_per_second": 534.478,
"eval_steps_per_second": 16.908,
"step": 856
},
{
"epoch": 5.0,
"grad_norm": 1.377930998802185,
"learning_rate": 0.0001347743410527956,
"loss": 0.2643,
"step": 1070
},
{
"epoch": 5.0,
"eval_accuracy": 0.6918570591681312,
"eval_f1": 0.18827160493827164,
"eval_loss": 0.26378217339515686,
"eval_mcc": 0.20505841470507494,
"eval_precision": 0.7721518987341772,
"eval_recall": 0.10720562390158173,
"eval_runtime": 3.1292,
"eval_samples_per_second": 545.505,
"eval_steps_per_second": 17.257,
"step": 1070
},
{
"epoch": 6.0,
"grad_norm": 1.2771140336990356,
"learning_rate": 0.00011979941426915163,
"loss": 0.263,
"step": 1284
},
{
"epoch": 6.0,
"eval_accuracy": 0.6936145284124194,
"eval_f1": 0.18662519440124417,
"eval_loss": 0.26149189472198486,
"eval_mcc": 0.2156164618376391,
"eval_precision": 0.8108108108108109,
"eval_recall": 0.1054481546572935,
"eval_runtime": 3.2914,
"eval_samples_per_second": 518.626,
"eval_steps_per_second": 16.406,
"step": 1284
},
{
"epoch": 7.0,
"grad_norm": 1.073453426361084,
"learning_rate": 0.00010482448748550767,
"loss": 0.2612,
"step": 1498
},
{
"epoch": 7.0,
"eval_accuracy": 0.6994727592267135,
"eval_f1": 0.21439509954058195,
"eval_loss": 0.2620287537574768,
"eval_mcc": 0.24129962353457945,
"eval_precision": 0.8333333333333334,
"eval_recall": 0.12302284710017575,
"eval_runtime": 3.1567,
"eval_samples_per_second": 540.751,
"eval_steps_per_second": 17.106,
"step": 1498
},
{
"epoch": 8.0,
"grad_norm": 1.2691621780395508,
"learning_rate": 8.984956070186371e-05,
"loss": 0.2597,
"step": 1712
},
{
"epoch": 8.0,
"eval_accuracy": 0.69302870533099,
"eval_f1": 0.17088607594936708,
"eval_loss": 0.2611652910709381,
"eval_mcc": 0.21751991027491313,
"eval_precision": 0.8571428571428571,
"eval_recall": 0.09490333919156414,
"eval_runtime": 3.2468,
"eval_samples_per_second": 525.752,
"eval_steps_per_second": 16.632,
"step": 1712
},
{
"epoch": 9.0,
"grad_norm": 1.0226393938064575,
"learning_rate": 7.487463391821977e-05,
"loss": 0.2597,
"step": 1926
},
{
"epoch": 9.0,
"eval_accuracy": 0.6977152899824253,
"eval_f1": 0.19626168224299068,
"eval_loss": 0.2611730098724365,
"eval_mcc": 0.2374955820778862,
"eval_precision": 0.863013698630137,
"eval_recall": 0.11072056239015818,
"eval_runtime": 3.1639,
"eval_samples_per_second": 539.53,
"eval_steps_per_second": 17.068,
"step": 1926
},
{
"epoch": 10.0,
"grad_norm": 1.0377492904663086,
"learning_rate": 5.989970713457581e-05,
"loss": 0.2565,
"step": 2140
},
{
"epoch": 10.0,
"eval_accuracy": 0.7193907439953134,
"eval_f1": 0.32248939179632247,
"eval_loss": 0.260220468044281,
"eval_mcc": 0.310001756502818,
"eval_precision": 0.8260869565217391,
"eval_recall": 0.20035149384885764,
"eval_runtime": 3.2066,
"eval_samples_per_second": 532.341,
"eval_steps_per_second": 16.84,
"step": 2140
},
{
"epoch": 11.0,
"grad_norm": 1.2514437437057495,
"learning_rate": 4.4924780350931855e-05,
"loss": 0.2555,
"step": 2354
},
{
"epoch": 11.0,
"eval_accuracy": 0.700058582308143,
"eval_f1": 0.20743034055727552,
"eval_loss": 0.26075002551078796,
"eval_mcc": 0.2474956228703306,
"eval_precision": 0.8701298701298701,
"eval_recall": 0.11775043936731107,
"eval_runtime": 3.1394,
"eval_samples_per_second": 543.734,
"eval_steps_per_second": 17.201,
"step": 2354
},
{
"epoch": 12.0,
"grad_norm": 0.8049026727676392,
"learning_rate": 2.9949853567287906e-05,
"loss": 0.2544,
"step": 2568
},
{
"epoch": 12.0,
"eval_accuracy": 0.715875805506737,
"eval_f1": 0.31593794076163606,
"eval_loss": 0.2587771415710449,
"eval_mcc": 0.29589835954792404,
"eval_precision": 0.8,
"eval_recall": 0.1968365553602812,
"eval_runtime": 3.3501,
"eval_samples_per_second": 509.539,
"eval_steps_per_second": 16.119,
"step": 2568
},
{
"epoch": 13.0,
"grad_norm": 2.94110369682312,
"learning_rate": 1.4974926783643953e-05,
"loss": 0.2544,
"step": 2782
},
{
"epoch": 13.0,
"eval_accuracy": 0.7217340363210311,
"eval_f1": 0.3356643356643356,
"eval_loss": 0.2589167356491089,
"eval_mcc": 0.31697199705587376,
"eval_precision": 0.821917808219178,
"eval_recall": 0.210896309314587,
"eval_runtime": 3.165,
"eval_samples_per_second": 539.337,
"eval_steps_per_second": 17.062,
"step": 2782
},
{
"epoch": 14.0,
"grad_norm": 2.4334750175476074,
"learning_rate": 0.0,
"loss": 0.2528,
"step": 2996
},
{
"epoch": 14.0,
"eval_accuracy": 0.7193907439953134,
"eval_f1": 0.33379694019471484,
"eval_loss": 0.2587186396121979,
"eval_mcc": 0.307265896914303,
"eval_precision": 0.8,
"eval_recall": 0.210896309314587,
"eval_runtime": 3.2491,
"eval_samples_per_second": 525.383,
"eval_steps_per_second": 16.62,
"step": 2996
}
],
"logging_steps": 500,
"max_steps": 2996,
"num_input_tokens_seen": 0,
"num_train_epochs": 14,
"save_steps": 500,
"total_flos": 7346859302160.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": {
"alpha": 0.43581262355237016,
"learning_rate": 0.00020964897497101535,
"num_train_epochs": 14,
"temperature": 35
}
}