|
{ |
|
"best_metric": 0.7615686274509804, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-12/checkpoint-1120", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 1120, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.5787363052368164, |
|
"learning_rate": 0.00024237605415726302, |
|
"loss": 0.4097, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6658823529411765, |
|
"eval_f1": 0.009302325581395347, |
|
"eval_loss": 0.38458171486854553, |
|
"eval_mcc": 0.008873565094161137, |
|
"eval_precision": 0.4, |
|
"eval_recall": 0.004705882352941176, |
|
"eval_runtime": 1.8643, |
|
"eval_samples_per_second": 683.908, |
|
"eval_steps_per_second": 21.456, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.8726533055305481, |
|
"learning_rate": 0.0002077509035633683, |
|
"loss": 0.3859, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7027450980392157, |
|
"eval_f1": 0.2612085769980507, |
|
"eval_loss": 0.37138044834136963, |
|
"eval_mcc": 0.24722748455315502, |
|
"eval_precision": 0.7613636363636364, |
|
"eval_recall": 0.15764705882352942, |
|
"eval_runtime": 1.863, |
|
"eval_samples_per_second": 684.383, |
|
"eval_steps_per_second": 21.471, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.1823008060455322, |
|
"learning_rate": 0.00017312575296947358, |
|
"loss": 0.3761, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7419607843137255, |
|
"eval_f1": 0.5168869309838473, |
|
"eval_loss": 0.3662872314453125, |
|
"eval_mcc": 0.3765709652306714, |
|
"eval_precision": 0.6875, |
|
"eval_recall": 0.41411764705882353, |
|
"eval_runtime": 1.8612, |
|
"eval_samples_per_second": 685.044, |
|
"eval_steps_per_second": 21.492, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.8506985306739807, |
|
"learning_rate": 0.00013850060237557887, |
|
"loss": 0.3709, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7231372549019608, |
|
"eval_f1": 0.3796133567662565, |
|
"eval_loss": 0.36449602246284485, |
|
"eval_mcc": 0.315387850148385, |
|
"eval_precision": 0.75, |
|
"eval_recall": 0.2541176470588235, |
|
"eval_runtime": 1.8672, |
|
"eval_samples_per_second": 682.835, |
|
"eval_steps_per_second": 21.422, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.953569233417511, |
|
"learning_rate": 0.00010387545178168416, |
|
"loss": 0.3673, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7537254901960785, |
|
"eval_f1": 0.5552407932011331, |
|
"eval_loss": 0.3644358813762665, |
|
"eval_mcc": 0.41074948302085584, |
|
"eval_precision": 0.697508896797153, |
|
"eval_recall": 0.4611764705882353, |
|
"eval_runtime": 1.8615, |
|
"eval_samples_per_second": 684.938, |
|
"eval_steps_per_second": 21.488, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.1368662118911743, |
|
"learning_rate": 6.925030118778943e-05, |
|
"loss": 0.3635, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7482352941176471, |
|
"eval_f1": 0.5172932330827068, |
|
"eval_loss": 0.3630056381225586, |
|
"eval_mcc": 0.3915780041490244, |
|
"eval_precision": 0.7166666666666667, |
|
"eval_recall": 0.4047058823529412, |
|
"eval_runtime": 1.8707, |
|
"eval_samples_per_second": 681.562, |
|
"eval_steps_per_second": 21.382, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.311170220375061, |
|
"learning_rate": 3.4625150593894717e-05, |
|
"loss": 0.363, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7615686274509804, |
|
"eval_f1": 0.5694050991501417, |
|
"eval_loss": 0.3646318018436432, |
|
"eval_mcc": 0.43081867600233087, |
|
"eval_precision": 0.7153024911032029, |
|
"eval_recall": 0.47294117647058825, |
|
"eval_runtime": 1.8727, |
|
"eval_samples_per_second": 680.818, |
|
"eval_steps_per_second": 21.359, |
|
"step": 1120 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1280, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"total_flos": 2042288065440.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.6289598397405775, |
|
"learning_rate": 0.00027700120475115773, |
|
"num_train_epochs": 8, |
|
"temperature": 22 |
|
} |
|
} |
|
|