{ "best_metric": 0.5450097847358122, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-4/checkpoint-1532", "epoch": 7.0, "eval_steps": 500, "global_step": 5362, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.27224254608154297, "learning_rate": 0.00018865680984213388, "loss": 0.0871, "step": 766 }, { "epoch": 1.0, "eval_accuracy": 0.5391389432485323, "eval_f1": 0.19761499148211242, "eval_loss": 0.0780063048005104, "eval_precision": 0.7631578947368421, "eval_recall": 0.11350293542074363, "eval_runtime": 132.2112, "eval_samples_per_second": 7.73, "eval_steps_per_second": 1.936, "step": 766 }, { "epoch": 2.0, "grad_norm": 0.6950057744979858, "learning_rate": 0.00015721400820177824, "loss": 0.0802, "step": 1532 }, { "epoch": 2.0, "eval_accuracy": 0.5450097847358122, "eval_f1": 0.22885572139303484, "eval_loss": 0.07739852368831635, "eval_precision": 0.75, "eval_recall": 0.1350293542074364, "eval_runtime": 132.9676, "eval_samples_per_second": 7.686, "eval_steps_per_second": 1.925, "step": 1532 }, { "epoch": 3.0, "grad_norm": 0.19945429265499115, "learning_rate": 0.0001257712065614226, "loss": 0.0788, "step": 2298 }, { "epoch": 3.0, "eval_accuracy": 0.5283757338551859, "eval_f1": 0.15734265734265732, "eval_loss": 0.0775846391916275, "eval_precision": 0.7377049180327869, "eval_recall": 0.08806262230919765, "eval_runtime": 133.3593, "eval_samples_per_second": 7.664, "eval_steps_per_second": 1.92, "step": 2298 }, { "epoch": 4.0, "grad_norm": 0.47727036476135254, "learning_rate": 9.432840492106694e-05, "loss": 0.0776, "step": 3064 }, { "epoch": 4.0, "eval_accuracy": 0.5322896281800391, "eval_f1": 0.16724738675958187, "eval_loss": 0.07602300494909286, "eval_precision": 0.7619047619047619, "eval_recall": 0.09393346379647749, "eval_runtime": 132.0039, "eval_samples_per_second": 7.742, "eval_steps_per_second": 1.939, "step": 3064 }, { "epoch": 5.0, "grad_norm": 1.0770089626312256, "learning_rate": 6.28856032807113e-05, "loss": 0.077, "step": 3830 }, { "epoch": 5.0, "eval_accuracy": 0.5401174168297456, "eval_f1": 0.20068027210884357, "eval_loss": 0.07548870891332626, "eval_precision": 0.7662337662337663, "eval_recall": 0.11545988258317025, "eval_runtime": 138.6281, "eval_samples_per_second": 7.372, "eval_steps_per_second": 1.847, "step": 3830 }, { "epoch": 6.0, "grad_norm": 0.5817792415618896, "learning_rate": 3.144280164035565e-05, "loss": 0.076, "step": 4596 }, { "epoch": 6.0, "eval_accuracy": 0.541095890410959, "eval_f1": 0.20373514431239387, "eval_loss": 0.07543697953224182, "eval_precision": 0.7692307692307693, "eval_recall": 0.11741682974559686, "eval_runtime": 132.1275, "eval_samples_per_second": 7.735, "eval_steps_per_second": 1.938, "step": 4596 }, { "epoch": 7.0, "grad_norm": 0.6458355784416199, "learning_rate": 0.0, "loss": 0.0754, "step": 5362 }, { "epoch": 7.0, "eval_accuracy": 0.5391389432485323, "eval_f1": 0.19761499148211242, "eval_loss": 0.07505200803279877, "eval_precision": 0.7631578947368421, "eval_recall": 0.11350293542074363, "eval_runtime": 132.5302, "eval_samples_per_second": 7.711, "eval_steps_per_second": 1.932, "step": 5362 } ], "logging_steps": 500, "max_steps": 5362, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 500, "total_flos": 1649866380960.0, "train_batch_size": 4, "trial_name": null, "trial_params": { "alpha": 0.08859371316848064, "learning_rate": 0.00022009961148248953, "num_train_epochs": 7, "temperature": 26 } }