|
{ |
|
"best_metric": 0.8277886497064579, |
|
"best_model_checkpoint": "tiny-bert-sst2-distilled/run-27/checkpoint-240", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 240, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.0442044734954834, |
|
"learning_rate": 0.00047720243978875736, |
|
"loss": 0.5494, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7270058708414873, |
|
"eval_f1": 0.7787470261697065, |
|
"eval_loss": 0.4914495050907135, |
|
"eval_precision": 0.6546666666666666, |
|
"eval_recall": 0.9608610567514677, |
|
"eval_runtime": 28.018, |
|
"eval_samples_per_second": 36.476, |
|
"eval_steps_per_second": 0.571, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.9368475675582886, |
|
"learning_rate": 0.000357901829841568, |
|
"loss": 0.4641, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7720156555772995, |
|
"eval_f1": 0.8059950041631974, |
|
"eval_loss": 0.4658460021018982, |
|
"eval_precision": 0.7014492753623188, |
|
"eval_recall": 0.9471624266144814, |
|
"eval_runtime": 27.3749, |
|
"eval_samples_per_second": 37.333, |
|
"eval_steps_per_second": 0.584, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.3749494552612305, |
|
"learning_rate": 0.00023860121989437868, |
|
"loss": 0.4456, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8189823874755382, |
|
"eval_f1": 0.8262910798122065, |
|
"eval_loss": 0.4206434190273285, |
|
"eval_precision": 0.7942238267148014, |
|
"eval_recall": 0.8610567514677103, |
|
"eval_runtime": 27.471, |
|
"eval_samples_per_second": 37.203, |
|
"eval_steps_per_second": 0.582, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.301919937133789, |
|
"learning_rate": 0.00011930060994718934, |
|
"loss": 0.4162, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8228962818003914, |
|
"eval_f1": 0.8281101614434948, |
|
"eval_loss": 0.4116476774215698, |
|
"eval_precision": 0.8044280442804428, |
|
"eval_recall": 0.8532289628180039, |
|
"eval_runtime": 27.5336, |
|
"eval_samples_per_second": 37.118, |
|
"eval_steps_per_second": 0.581, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.5170879364013672, |
|
"learning_rate": 0.0, |
|
"loss": 0.3983, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8277886497064579, |
|
"eval_f1": 0.8376383763837638, |
|
"eval_loss": 0.4083719253540039, |
|
"eval_precision": 0.7923211169284468, |
|
"eval_recall": 0.8884540117416829, |
|
"eval_runtime": 27.5311, |
|
"eval_samples_per_second": 37.122, |
|
"eval_steps_per_second": 0.581, |
|
"step": 240 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 240, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 1178475986400.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.8647030881639047, |
|
"learning_rate": 0.0005965030497359467, |
|
"num_train_epochs": 5, |
|
"temperature": 29 |
|
} |
|
} |
|
|