|
{ |
|
"best_metric": 0.7475538160469667, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-1344", |
|
"epoch": 9.0, |
|
"eval_steps": 500, |
|
"global_step": 1728, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.0371571779251099, |
|
"learning_rate": 0.0001305282668790618, |
|
"loss": 0.3216, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5499021526418787, |
|
"eval_f1": 0.23841059602649006, |
|
"eval_loss": 0.2955350875854492, |
|
"eval_mcc": 0.17350853577425898, |
|
"eval_precision": 0.7741935483870968, |
|
"eval_recall": 0.14090019569471623, |
|
"eval_runtime": 67.6225, |
|
"eval_samples_per_second": 15.113, |
|
"eval_steps_per_second": 0.473, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.1835968494415283, |
|
"learning_rate": 0.00011421223351917907, |
|
"loss": 0.2979, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5587084148727984, |
|
"eval_f1": 0.2642740619902121, |
|
"eval_loss": 0.29196101427078247, |
|
"eval_mcc": 0.19586518668589045, |
|
"eval_precision": 0.7941176470588235, |
|
"eval_recall": 0.15851272015655576, |
|
"eval_runtime": 67.5618, |
|
"eval_samples_per_second": 15.127, |
|
"eval_steps_per_second": 0.474, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.4907313585281372, |
|
"learning_rate": 9.789620015929635e-05, |
|
"loss": 0.288, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6555772994129159, |
|
"eval_f1": 0.5243243243243243, |
|
"eval_loss": 0.28221988677978516, |
|
"eval_mcc": 0.37311506920915233, |
|
"eval_precision": 0.8471615720524017, |
|
"eval_recall": 0.3796477495107632, |
|
"eval_runtime": 67.1649, |
|
"eval_samples_per_second": 15.216, |
|
"eval_steps_per_second": 0.476, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.161053419113159, |
|
"learning_rate": 8.158016679941362e-05, |
|
"loss": 0.2856, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5968688845401174, |
|
"eval_f1": 0.3757575757575758, |
|
"eval_loss": 0.2846841514110565, |
|
"eval_mcc": 0.274495316321839, |
|
"eval_precision": 0.8322147651006712, |
|
"eval_recall": 0.24266144814090018, |
|
"eval_runtime": 67.4167, |
|
"eval_samples_per_second": 15.159, |
|
"eval_steps_per_second": 0.475, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.5259861946105957, |
|
"learning_rate": 6.52641334395309e-05, |
|
"loss": 0.2841, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6829745596868885, |
|
"eval_f1": 0.5759162303664921, |
|
"eval_loss": 0.28138452768325806, |
|
"eval_mcc": 0.42395345222624214, |
|
"eval_precision": 0.8695652173913043, |
|
"eval_recall": 0.43052837573385516, |
|
"eval_runtime": 67.041, |
|
"eval_samples_per_second": 15.244, |
|
"eval_steps_per_second": 0.477, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.6680471897125244, |
|
"learning_rate": 4.8948100079648175e-05, |
|
"loss": 0.2809, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7270058708414873, |
|
"eval_f1": 0.6674612634088201, |
|
"eval_loss": 0.282234251499176, |
|
"eval_mcc": 0.48626311495705427, |
|
"eval_precision": 0.8536585365853658, |
|
"eval_recall": 0.547945205479452, |
|
"eval_runtime": 67.379, |
|
"eval_samples_per_second": 15.168, |
|
"eval_steps_per_second": 0.475, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.0517845153808594, |
|
"learning_rate": 3.263206671976545e-05, |
|
"loss": 0.2793, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7475538160469667, |
|
"eval_f1": 0.7041284403669726, |
|
"eval_loss": 0.28309565782546997, |
|
"eval_mcc": 0.5179241840022546, |
|
"eval_precision": 0.850415512465374, |
|
"eval_recall": 0.6007827788649707, |
|
"eval_runtime": 67.566, |
|
"eval_samples_per_second": 15.126, |
|
"eval_steps_per_second": 0.474, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.6533994674682617, |
|
"learning_rate": 1.6316033359882725e-05, |
|
"loss": 0.2774, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7221135029354208, |
|
"eval_f1": 0.6594724220623502, |
|
"eval_loss": 0.282230943441391, |
|
"eval_mcc": 0.4777338430619903, |
|
"eval_precision": 0.8513931888544891, |
|
"eval_recall": 0.538160469667319, |
|
"eval_runtime": 67.39, |
|
"eval_samples_per_second": 15.165, |
|
"eval_steps_per_second": 0.475, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.621838390827179, |
|
"learning_rate": 0.0, |
|
"loss": 0.2774, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7142857142857143, |
|
"eval_f1": 0.642156862745098, |
|
"eval_loss": 0.2810159921646118, |
|
"eval_mcc": 0.46831127867565187, |
|
"eval_precision": 0.8590163934426229, |
|
"eval_recall": 0.512720156555773, |
|
"eval_runtime": 67.055, |
|
"eval_samples_per_second": 15.241, |
|
"eval_steps_per_second": 0.477, |
|
"step": 1728 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1728, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 500, |
|
"total_flos": 2121256775520.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.4208671172864604, |
|
"learning_rate": 0.00014684430023894452, |
|
"num_train_epochs": 9, |
|
"per_device_train_batch_size": 16, |
|
"temperature": 48 |
|
} |
|
} |
|
|