|
{ |
|
"best_metric": 0.8414872798434442, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-14/checkpoint-960", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 960, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.5439341068267822, |
|
"learning_rate": 0.00025636357557273684, |
|
"loss": 0.5365, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7935420743639922, |
|
"eval_f1": 0.8087035358114234, |
|
"eval_loss": 0.4461754560470581, |
|
"eval_precision": 0.7533783783783784, |
|
"eval_recall": 0.87279843444227, |
|
"eval_runtime": 25.9122, |
|
"eval_samples_per_second": 39.441, |
|
"eval_steps_per_second": 1.235, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.693763732910156, |
|
"learning_rate": 0.00022787873384243272, |
|
"loss": 0.4548, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8003913894324853, |
|
"eval_f1": 0.8216783216783217, |
|
"eval_loss": 0.43167614936828613, |
|
"eval_precision": 0.7424960505529226, |
|
"eval_recall": 0.9197651663405088, |
|
"eval_runtime": 25.6332, |
|
"eval_samples_per_second": 39.87, |
|
"eval_steps_per_second": 1.248, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.1225955486297607, |
|
"learning_rate": 0.00019939389211212863, |
|
"loss": 0.4276, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.812133072407045, |
|
"eval_f1": 0.8282647584973166, |
|
"eval_loss": 0.4346017837524414, |
|
"eval_precision": 0.7627677100494233, |
|
"eval_recall": 0.9060665362035225, |
|
"eval_runtime": 25.7026, |
|
"eval_samples_per_second": 39.762, |
|
"eval_steps_per_second": 1.245, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.9123752117156982, |
|
"learning_rate": 0.00017090905038182454, |
|
"loss": 0.4092, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8228962818003914, |
|
"eval_f1": 0.8297271872060207, |
|
"eval_loss": 0.3994919955730438, |
|
"eval_precision": 0.7989130434782609, |
|
"eval_recall": 0.863013698630137, |
|
"eval_runtime": 25.5493, |
|
"eval_samples_per_second": 40.001, |
|
"eval_steps_per_second": 1.252, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.3602194786071777, |
|
"learning_rate": 0.00014242420865152045, |
|
"loss": 0.3965, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.824853228962818, |
|
"eval_f1": 0.8306527909176915, |
|
"eval_loss": 0.407778263092041, |
|
"eval_precision": 0.8040293040293041, |
|
"eval_recall": 0.8590998043052838, |
|
"eval_runtime": 25.3461, |
|
"eval_samples_per_second": 40.322, |
|
"eval_steps_per_second": 1.263, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.538515567779541, |
|
"learning_rate": 0.00011393936692121636, |
|
"loss": 0.3968, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8317025440313112, |
|
"eval_f1": 0.8477876106194692, |
|
"eval_loss": 0.4041251838207245, |
|
"eval_precision": 0.7738287560581584, |
|
"eval_recall": 0.9373776908023483, |
|
"eval_runtime": 25.09, |
|
"eval_samples_per_second": 40.733, |
|
"eval_steps_per_second": 1.275, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.204845428466797, |
|
"learning_rate": 8.545452519091227e-05, |
|
"loss": 0.3854, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8365949119373777, |
|
"eval_f1": 0.8499550763701708, |
|
"eval_loss": 0.39838555455207825, |
|
"eval_precision": 0.7857142857142857, |
|
"eval_recall": 0.9256360078277887, |
|
"eval_runtime": 24.9237, |
|
"eval_samples_per_second": 41.005, |
|
"eval_steps_per_second": 1.284, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.975243091583252, |
|
"learning_rate": 5.696968346060818e-05, |
|
"loss": 0.3772, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.837573385518591, |
|
"eval_f1": 0.8520499108734402, |
|
"eval_loss": 0.39499443769454956, |
|
"eval_precision": 0.7823240589198036, |
|
"eval_recall": 0.9354207436399217, |
|
"eval_runtime": 24.9105, |
|
"eval_samples_per_second": 41.027, |
|
"eval_steps_per_second": 1.285, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 3.0168142318725586, |
|
"learning_rate": 2.848484173030409e-05, |
|
"loss": 0.372, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8405088062622309, |
|
"eval_f1": 0.8500459981600736, |
|
"eval_loss": 0.3911450505256653, |
|
"eval_precision": 0.8020833333333334, |
|
"eval_recall": 0.9041095890410958, |
|
"eval_runtime": 24.8944, |
|
"eval_samples_per_second": 41.053, |
|
"eval_steps_per_second": 1.285, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.654553174972534, |
|
"learning_rate": 0.0, |
|
"loss": 0.3739, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8414872798434442, |
|
"eval_f1": 0.852994555353902, |
|
"eval_loss": 0.39094072580337524, |
|
"eval_precision": 0.7952622673434856, |
|
"eval_recall": 0.9197651663405088, |
|
"eval_runtime": 24.9774, |
|
"eval_samples_per_second": 40.917, |
|
"eval_steps_per_second": 1.281, |
|
"step": 960 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 960, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 2356951972800.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.8320855687119669, |
|
"learning_rate": 0.0002848484173030409, |
|
"num_train_epochs": 10, |
|
"temperature": 8 |
|
} |
|
} |
|
|