xuancoblab2023's picture
Training in progress, epoch 1
aa0035d verified
raw
history blame
6.2 kB
{
"best_metric": 0.8941450174486235,
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-4/checkpoint-5160",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 6450,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.9960685968399048,
"learning_rate": 0.0005204222579442675,
"loss": 0.3335,
"step": 645
},
{
"epoch": 1.0,
"eval_accuracy": 0.8900736719658783,
"eval_f1": 0.0070052539404553416,
"eval_loss": 0.3329962193965912,
"eval_mcc": 0.05593214040002362,
"eval_precision": 1.0,
"eval_recall": 0.0035149384885764497,
"eval_runtime": 9.9674,
"eval_samples_per_second": 517.487,
"eval_steps_per_second": 16.253,
"step": 645
},
{
"epoch": 2.0,
"grad_norm": 0.6790906190872192,
"learning_rate": 0.0004625975626171267,
"loss": 0.3255,
"step": 1290
},
{
"epoch": 2.0,
"eval_accuracy": 0.8883288096161303,
"eval_f1": 0.16034985422740525,
"eval_loss": 0.3358159363269806,
"eval_mcc": 0.17495541464580086,
"eval_precision": 0.4700854700854701,
"eval_recall": 0.09666080843585237,
"eval_runtime": 9.9229,
"eval_samples_per_second": 519.808,
"eval_steps_per_second": 16.326,
"step": 1290
},
{
"epoch": 3.0,
"grad_norm": 0.9260676503181458,
"learning_rate": 0.00040477286728998583,
"loss": 0.3287,
"step": 1935
},
{
"epoch": 3.0,
"eval_accuracy": 0.8896859247770453,
"eval_f1": 0.0,
"eval_loss": 0.31568145751953125,
"eval_mcc": 0.0,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 9.9841,
"eval_samples_per_second": 516.623,
"eval_steps_per_second": 16.226,
"step": 1935
},
{
"epoch": 4.0,
"grad_norm": 5.0631103515625,
"learning_rate": 0.000346948171962845,
"loss": 0.3215,
"step": 2580
},
{
"epoch": 4.0,
"eval_accuracy": 0.8912369135323769,
"eval_f1": 0.15639097744360905,
"eval_loss": 0.3260652720928192,
"eval_mcc": 0.18961519340791813,
"eval_precision": 0.5416666666666666,
"eval_recall": 0.0913884007029877,
"eval_runtime": 9.9378,
"eval_samples_per_second": 519.03,
"eval_steps_per_second": 16.301,
"step": 2580
},
{
"epoch": 5.0,
"grad_norm": 3.190526008605957,
"learning_rate": 0.0002891234766357042,
"loss": 0.3187,
"step": 3225
},
{
"epoch": 5.0,
"eval_accuracy": 0.8906552927491276,
"eval_f1": 0.05369127516778523,
"eval_loss": 0.3109687268733978,
"eval_mcc": 0.11167217315148596,
"eval_precision": 0.5925925925925926,
"eval_recall": 0.028119507908611598,
"eval_runtime": 9.9324,
"eval_samples_per_second": 519.31,
"eval_steps_per_second": 16.31,
"step": 3225
},
{
"epoch": 6.0,
"grad_norm": 0.6695261597633362,
"learning_rate": 0.00023129878130856335,
"loss": 0.3131,
"step": 3870
},
{
"epoch": 6.0,
"eval_accuracy": 0.892594028693292,
"eval_f1": 0.13975155279503107,
"eval_loss": 0.30786898732185364,
"eval_mcc": 0.18986913581302958,
"eval_precision": 0.6,
"eval_recall": 0.07908611599297012,
"eval_runtime": 9.931,
"eval_samples_per_second": 519.384,
"eval_steps_per_second": 16.313,
"step": 3870
},
{
"epoch": 7.0,
"grad_norm": 1.1901545524597168,
"learning_rate": 0.0001734740859814225,
"loss": 0.3069,
"step": 4515
},
{
"epoch": 7.0,
"eval_accuracy": 0.8924001550988755,
"eval_f1": 0.2280945757997218,
"eval_loss": 0.3023030459880829,
"eval_mcc": 0.24105558868602414,
"eval_precision": 0.5466666666666666,
"eval_recall": 0.14411247803163443,
"eval_runtime": 9.9209,
"eval_samples_per_second": 519.913,
"eval_steps_per_second": 16.329,
"step": 4515
},
{
"epoch": 8.0,
"grad_norm": 7.957598686218262,
"learning_rate": 0.00011564939065428167,
"loss": 0.3002,
"step": 5160
},
{
"epoch": 8.0,
"eval_accuracy": 0.8941450174486235,
"eval_f1": 0.2132564841498559,
"eval_loss": 0.3005247414112091,
"eval_mcc": 0.24231018721764655,
"eval_precision": 0.592,
"eval_recall": 0.13005272407732865,
"eval_runtime": 9.9245,
"eval_samples_per_second": 519.726,
"eval_steps_per_second": 16.323,
"step": 5160
},
{
"epoch": 9.0,
"grad_norm": 1.0052604675292969,
"learning_rate": 5.7824695327140836e-05,
"loss": 0.2941,
"step": 5805
},
{
"epoch": 9.0,
"eval_accuracy": 0.8941450174486235,
"eval_f1": 0.24793388429752067,
"eval_loss": 0.2993471622467041,
"eval_mcc": 0.26182249981567285,
"eval_precision": 0.5732484076433121,
"eval_recall": 0.15817223198594024,
"eval_runtime": 9.9496,
"eval_samples_per_second": 518.413,
"eval_steps_per_second": 16.282,
"step": 5805
},
{
"epoch": 10.0,
"grad_norm": 1.0274789333343506,
"learning_rate": 0.0,
"loss": 0.2938,
"step": 6450
},
{
"epoch": 10.0,
"eval_accuracy": 0.8937572702597906,
"eval_f1": 0.24725274725274723,
"eval_loss": 0.2995382249355316,
"eval_mcc": 0.25943271866011525,
"eval_precision": 0.5660377358490566,
"eval_recall": 0.15817223198594024,
"eval_runtime": 9.9566,
"eval_samples_per_second": 518.046,
"eval_steps_per_second": 16.271,
"step": 6450
}
],
"logging_steps": 500,
"max_steps": 6450,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 15869425326000.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": {
"alpha": 0.976928179899354,
"learning_rate": 0.0005782469532714083,
"num_train_epochs": 10,
"temperature": 24
}
}