|
{ |
|
"best_metric": 0.6702544031311155, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-576", |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 576, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.663398027420044, |
|
"learning_rate": 0.00027867701883546326, |
|
"loss": 0.2787, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5499021526418787, |
|
"eval_f1": 0.24092409240924093, |
|
"eval_loss": 0.25340619683265686, |
|
"eval_precision": 0.7684210526315789, |
|
"eval_recall": 0.14285714285714285, |
|
"eval_runtime": 28.5207, |
|
"eval_samples_per_second": 35.834, |
|
"eval_steps_per_second": 1.122, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.7603411078453064, |
|
"learning_rate": 0.00024384239148103037, |
|
"loss": 0.2582, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5675146771037182, |
|
"eval_f1": 0.300632911392405, |
|
"eval_loss": 0.2482573688030243, |
|
"eval_precision": 0.7851239669421488, |
|
"eval_recall": 0.18590998043052837, |
|
"eval_runtime": 28.5782, |
|
"eval_samples_per_second": 35.762, |
|
"eval_steps_per_second": 1.12, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.6440157294273376, |
|
"learning_rate": 0.00020900776412659743, |
|
"loss": 0.2508, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6027397260273972, |
|
"eval_f1": 0.40117994100294985, |
|
"eval_loss": 0.25017455220222473, |
|
"eval_precision": 0.8143712574850299, |
|
"eval_recall": 0.26614481409001955, |
|
"eval_runtime": 28.8169, |
|
"eval_samples_per_second": 35.465, |
|
"eval_steps_per_second": 1.11, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.8489454388618469, |
|
"learning_rate": 0.00017417313677216454, |
|
"loss": 0.2472, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.576320939334638, |
|
"eval_f1": 0.304975922953451, |
|
"eval_loss": 0.2455543428659439, |
|
"eval_precision": 0.8482142857142857, |
|
"eval_recall": 0.18590998043052837, |
|
"eval_runtime": 28.3606, |
|
"eval_samples_per_second": 36.036, |
|
"eval_steps_per_second": 1.128, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.6578030586242676, |
|
"learning_rate": 0.00013933850941773163, |
|
"loss": 0.2444, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5949119373776908, |
|
"eval_f1": 0.37082066869300917, |
|
"eval_loss": 0.24357673525810242, |
|
"eval_precision": 0.8299319727891157, |
|
"eval_recall": 0.23874755381604695, |
|
"eval_runtime": 28.5893, |
|
"eval_samples_per_second": 35.748, |
|
"eval_steps_per_second": 1.119, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.576878547668457, |
|
"learning_rate": 0.00010450388206329872, |
|
"loss": 0.244, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6702544031311155, |
|
"eval_f1": 0.5536423841059602, |
|
"eval_loss": 0.24780145287513733, |
|
"eval_precision": 0.8565573770491803, |
|
"eval_recall": 0.4090019569471624, |
|
"eval_runtime": 28.8228, |
|
"eval_samples_per_second": 35.458, |
|
"eval_steps_per_second": 1.11, |
|
"step": 576 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 864, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9, |
|
"save_steps": 500, |
|
"total_flos": 1414171183680.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.3483609408663828, |
|
"learning_rate": 0.0003135116461898962, |
|
"num_train_epochs": 9, |
|
"temperature": 7 |
|
} |
|
} |
|
|