|
{ |
|
"best_metric": 0.7521968365553603, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-38/checkpoint-642", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 856, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.823625087738037, |
|
"learning_rate": 0.00022026694526910956, |
|
"loss": 0.4997, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7129466900995899, |
|
"eval_f1": 0.40243902439024387, |
|
"eval_loss": 0.4626123011112213, |
|
"eval_mcc": 0.2854021761857005, |
|
"eval_precision": 0.6573705179282868, |
|
"eval_recall": 0.28998242530755713, |
|
"eval_runtime": 3.1456, |
|
"eval_samples_per_second": 542.661, |
|
"eval_steps_per_second": 17.167, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.7993404865264893, |
|
"learning_rate": 0.0001888002388020939, |
|
"loss": 0.4542, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7398945518453427, |
|
"eval_f1": 0.47641509433962265, |
|
"eval_loss": 0.4441252648830414, |
|
"eval_mcc": 0.366324992569054, |
|
"eval_precision": 0.7240143369175627, |
|
"eval_recall": 0.35500878734622143, |
|
"eval_runtime": 3.7126, |
|
"eval_samples_per_second": 459.782, |
|
"eval_steps_per_second": 14.545, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.832775354385376, |
|
"learning_rate": 0.00015733353233507826, |
|
"loss": 0.4371, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7521968365553603, |
|
"eval_f1": 0.5052631578947367, |
|
"eval_loss": 0.44420263171195984, |
|
"eval_mcc": 0.4015258565021214, |
|
"eval_precision": 0.7552447552447552, |
|
"eval_recall": 0.37961335676625657, |
|
"eval_runtime": 3.1594, |
|
"eval_samples_per_second": 540.296, |
|
"eval_steps_per_second": 17.092, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.8890116214752197, |
|
"learning_rate": 0.0001258668258680626, |
|
"loss": 0.4321, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7398945518453427, |
|
"eval_f1": 0.43367346938775514, |
|
"eval_loss": 0.4393432140350342, |
|
"eval_mcc": 0.3683014058572899, |
|
"eval_precision": 0.7906976744186046, |
|
"eval_recall": 0.29876977152899825, |
|
"eval_runtime": 3.8868, |
|
"eval_samples_per_second": 439.178, |
|
"eval_steps_per_second": 13.893, |
|
"step": 856 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1712, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"total_flos": 2099102657760.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.806613569484766, |
|
"learning_rate": 0.0002517336517361252, |
|
"num_train_epochs": 8, |
|
"temperature": 7 |
|
} |
|
} |
|
|