{ "best_metric": 0.8307240704500979, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-11/checkpoint-7660", "epoch": 10.0, "eval_steps": 500, "global_step": 7660, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.275282621383667, "learning_rate": 0.00013397987335151483, "loss": 0.5034, "step": 766 }, { "epoch": 1.0, "eval_accuracy": 0.7886497064579256, "eval_f1": 0.8153846153846154, "eval_loss": 0.4397984445095062, "eval_precision": 0.723823975720789, "eval_recall": 0.9334637964774951, "eval_runtime": 133.4478, "eval_samples_per_second": 7.658, "eval_steps_per_second": 1.918, "step": 766 }, { "epoch": 2.0, "grad_norm": 2.074667453765869, "learning_rate": 0.00011909322075690206, "loss": 0.442, "step": 1532 }, { "epoch": 2.0, "eval_accuracy": 0.8101761252446184, "eval_f1": 0.831889081455806, "eval_loss": 0.43401414155960083, "eval_precision": 0.7465007776049767, "eval_recall": 0.9393346379647749, "eval_runtime": 133.9782, "eval_samples_per_second": 7.628, "eval_steps_per_second": 1.911, "step": 1532 }, { "epoch": 3.0, "grad_norm": 2.7122271060943604, "learning_rate": 0.0001042065681622893, "loss": 0.4346, "step": 2298 }, { "epoch": 3.0, "eval_accuracy": 0.8140900195694716, "eval_f1": 0.8250460405156538, "eval_loss": 0.4545851945877075, "eval_precision": 0.7791304347826087, "eval_recall": 0.8767123287671232, "eval_runtime": 131.1842, "eval_samples_per_second": 7.791, "eval_steps_per_second": 1.951, "step": 2298 }, { "epoch": 4.0, "grad_norm": 3.8108601570129395, "learning_rate": 8.931991556767654e-05, "loss": 0.4272, "step": 3064 }, { "epoch": 4.0, "eval_accuracy": 0.8258317025440313, "eval_f1": 0.8372943327239488, "eval_loss": 0.4149971902370453, "eval_precision": 0.7855917667238422, "eval_recall": 0.8962818003913894, "eval_runtime": 131.3897, "eval_samples_per_second": 7.778, "eval_steps_per_second": 1.948, "step": 3064 }, { "epoch": 5.0, "grad_norm": 29.14157485961914, "learning_rate": 7.443326297306379e-05, "loss": 0.4139, "step": 3830 }, { "epoch": 5.0, "eval_accuracy": 0.8228962818003914, "eval_f1": 0.8353048225659692, "eval_loss": 0.43950268626213074, "eval_precision": 0.7806122448979592, "eval_recall": 0.898238747553816, "eval_runtime": 131.4228, "eval_samples_per_second": 7.776, "eval_steps_per_second": 1.948, "step": 3830 }, { "epoch": 6.0, "grad_norm": 0.7704387903213501, "learning_rate": 5.954661037845103e-05, "loss": 0.4072, "step": 4596 }, { "epoch": 6.0, "eval_accuracy": 0.8180039138943248, "eval_f1": 0.8318264014466545, "eval_loss": 0.4495176076889038, "eval_precision": 0.773109243697479, "eval_recall": 0.9001956947162426, "eval_runtime": 132.0859, "eval_samples_per_second": 7.737, "eval_steps_per_second": 1.938, "step": 4596 }, { "epoch": 7.0, "grad_norm": 6.204219818115234, "learning_rate": 4.465995778383827e-05, "loss": 0.3994, "step": 5362 }, { "epoch": 7.0, "eval_accuracy": 0.8238747553816047, "eval_f1": 0.8372513562386981, "eval_loss": 0.4403352737426758, "eval_precision": 0.7781512605042017, "eval_recall": 0.9060665362035225, "eval_runtime": 133.4911, "eval_samples_per_second": 7.656, "eval_steps_per_second": 1.918, "step": 5362 }, { "epoch": 8.0, "grad_norm": 6.954471588134766, "learning_rate": 2.9773305189225516e-05, "loss": 0.392, "step": 6128 }, { "epoch": 8.0, "eval_accuracy": 0.8268101761252447, "eval_f1": 0.8395285584768812, "eval_loss": 0.43856292963027954, "eval_precision": 0.7820945945945946, "eval_recall": 0.9060665362035225, "eval_runtime": 131.9827, "eval_samples_per_second": 7.743, "eval_steps_per_second": 1.94, "step": 6128 }, { "epoch": 9.0, "grad_norm": 0.39264997839927673, "learning_rate": 1.4886652594612758e-05, "loss": 0.3917, "step": 6894 }, { "epoch": 9.0, "eval_accuracy": 0.8287671232876712, "eval_f1": 0.8353715898400752, "eval_loss": 0.4313138425350189, "eval_precision": 0.8043478260869565, "eval_recall": 0.8688845401174168, "eval_runtime": 132.2212, "eval_samples_per_second": 7.729, "eval_steps_per_second": 1.936, "step": 6894 }, { "epoch": 10.0, "grad_norm": 0.9275560975074768, "learning_rate": 0.0, "loss": 0.3787, "step": 7660 }, { "epoch": 10.0, "eval_accuracy": 0.8307240704500979, "eval_f1": 0.840258541089566, "eval_loss": 0.4347919523715973, "eval_precision": 0.7954545454545454, "eval_recall": 0.8904109589041096, "eval_runtime": 133.1343, "eval_samples_per_second": 7.676, "eval_steps_per_second": 1.923, "step": 7660 } ], "logging_steps": 500, "max_steps": 7660, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 2356951972800.0, "train_batch_size": 4, "trial_name": null, "trial_params": { "alpha": 0.8157328229875264, "learning_rate": 0.00014886652594612757, "num_train_epochs": 10, "temperature": 20 } }