|
{ |
|
"best_metric": 0.8160469667318982, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-9/checkpoint-405", |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 486, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.9513042569160461, |
|
"learning_rate": 0.0005954099159667608, |
|
"loss": 0.499, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6829745596868885, |
|
"eval_f1": 0.6067961165048544, |
|
"eval_loss": 0.4608267843723297, |
|
"eval_precision": 0.7987220447284346, |
|
"eval_recall": 0.4892367906066536, |
|
"eval_runtime": 28.3141, |
|
"eval_samples_per_second": 36.095, |
|
"eval_steps_per_second": 1.13, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.7321057319641113, |
|
"learning_rate": 0.0004961749299723006, |
|
"loss": 0.4578, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7827788649706457, |
|
"eval_f1": 0.798913043478261, |
|
"eval_loss": 0.42703160643577576, |
|
"eval_precision": 0.7436762225969646, |
|
"eval_recall": 0.863013698630137, |
|
"eval_runtime": 28.5611, |
|
"eval_samples_per_second": 35.783, |
|
"eval_steps_per_second": 1.12, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.0545783042907715, |
|
"learning_rate": 0.00039693994397784046, |
|
"loss": 0.4299, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7954990215264188, |
|
"eval_f1": 0.8168273444347064, |
|
"eval_loss": 0.4317159950733185, |
|
"eval_precision": 0.7396825396825397, |
|
"eval_recall": 0.9119373776908023, |
|
"eval_runtime": 28.0555, |
|
"eval_samples_per_second": 36.428, |
|
"eval_steps_per_second": 1.141, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.1239681243896484, |
|
"learning_rate": 0.0002977049579833804, |
|
"loss": 0.4141, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8052837573385518, |
|
"eval_f1": 0.7995971802618329, |
|
"eval_loss": 0.41590413451194763, |
|
"eval_precision": 0.8236514522821576, |
|
"eval_recall": 0.776908023483366, |
|
"eval_runtime": 28.4853, |
|
"eval_samples_per_second": 35.878, |
|
"eval_steps_per_second": 1.123, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.6590023040771484, |
|
"learning_rate": 0.00019846997198892023, |
|
"loss": 0.4038, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8160469667318982, |
|
"eval_f1": 0.8256029684601114, |
|
"eval_loss": 0.40745633840560913, |
|
"eval_precision": 0.7848324514991182, |
|
"eval_recall": 0.8708414872798435, |
|
"eval_runtime": 28.3469, |
|
"eval_samples_per_second": 36.053, |
|
"eval_steps_per_second": 1.129, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.6547752618789673, |
|
"learning_rate": 9.923498599446012e-05, |
|
"loss": 0.3966, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8111545988258317, |
|
"eval_f1": 0.8259693417493238, |
|
"eval_loss": 0.40955477952957153, |
|
"eval_precision": 0.7658862876254181, |
|
"eval_recall": 0.8962818003913894, |
|
"eval_runtime": 27.9254, |
|
"eval_samples_per_second": 36.598, |
|
"eval_steps_per_second": 1.146, |
|
"step": 486 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 567, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 500, |
|
"total_flos": 1414171183680.0, |
|
"train_batch_size": 38, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.7591558093291033, |
|
"learning_rate": 0.0006946449019612209, |
|
"num_train_epochs": 7, |
|
"per_device_train_batch_size": 38, |
|
"temperature": 16 |
|
} |
|
} |
|
|