|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.8, |
|
"eval_steps": 22, |
|
"global_step": 110, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4888888888888889, |
|
"grad_norm": 0.9318796396255493, |
|
"learning_rate": 0.00018857142857142857, |
|
"loss": 2.4418, |
|
"mean_token_accuracy": 0.5003274670194231, |
|
"step": 11, |
|
"timestamp_in_seconds": 1740058570.7495685 |
|
}, |
|
{ |
|
"epoch": 0.9777777777777777, |
|
"grad_norm": 0.8860308527946472, |
|
"learning_rate": 0.00016761904761904763, |
|
"loss": 1.9864, |
|
"mean_token_accuracy": 0.5491961038627919, |
|
"step": 22, |
|
"timestamp_in_seconds": 1740058588.2654634 |
|
}, |
|
{ |
|
"contract_score": 0.5170776833233711, |
|
"epoch": 0.9777777777777777, |
|
"eval_loss": 2.119361162185669, |
|
"eval_mean_token_accuracy": 0.5128270433443488, |
|
"eval_runtime": 0.524, |
|
"eval_samples_per_second": 38.169, |
|
"eval_steps_per_second": 5.725, |
|
"step": 22, |
|
"timestamp_in_seconds": 1740058617.4410768 |
|
}, |
|
{ |
|
"epoch": 1.4444444444444444, |
|
"grad_norm": 0.8875806927680969, |
|
"learning_rate": 0.00014666666666666666, |
|
"loss": 1.7433, |
|
"mean_token_accuracy": 0.5918375980378316, |
|
"step": 33, |
|
"timestamp_in_seconds": 1740058634.3158836 |
|
}, |
|
{ |
|
"epoch": 1.9333333333333333, |
|
"grad_norm": 1.0898572206497192, |
|
"learning_rate": 0.00012571428571428572, |
|
"loss": 1.8075, |
|
"mean_token_accuracy": 0.5785033192624862, |
|
"step": 44, |
|
"timestamp_in_seconds": 1740058651.833792 |
|
}, |
|
{ |
|
"contract_score": 0.5216960497800869, |
|
"epoch": 1.9333333333333333, |
|
"eval_loss": 2.1378540992736816, |
|
"eval_mean_token_accuracy": 0.4983097909781991, |
|
"eval_runtime": 0.5221, |
|
"eval_samples_per_second": 38.303, |
|
"eval_steps_per_second": 5.745, |
|
"step": 44, |
|
"timestamp_in_seconds": 1740058680.6121447 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 1.2661283016204834, |
|
"learning_rate": 0.00010476190476190477, |
|
"loss": 1.4937, |
|
"mean_token_accuracy": 0.6320650446268925, |
|
"step": 55, |
|
"timestamp_in_seconds": 1740058697.4347959 |
|
}, |
|
{ |
|
"epoch": 2.888888888888889, |
|
"grad_norm": 1.691213607788086, |
|
"learning_rate": 8.380952380952382e-05, |
|
"loss": 1.409, |
|
"mean_token_accuracy": 0.6488769100266372, |
|
"step": 66, |
|
"timestamp_in_seconds": 1740058714.934673 |
|
}, |
|
{ |
|
"contract_score": 0.5374702803883722, |
|
"epoch": 2.888888888888889, |
|
"eval_loss": 2.2828011512756348, |
|
"eval_mean_token_accuracy": 0.4983097909781991, |
|
"eval_runtime": 0.522, |
|
"eval_samples_per_second": 38.312, |
|
"eval_steps_per_second": 5.747, |
|
"step": 66, |
|
"timestamp_in_seconds": 1740058746.603091 |
|
}, |
|
{ |
|
"epoch": 3.3555555555555556, |
|
"grad_norm": 2.212834119796753, |
|
"learning_rate": 6.285714285714286e-05, |
|
"loss": 1.0352, |
|
"mean_token_accuracy": 0.7221250653501003, |
|
"step": 77, |
|
"timestamp_in_seconds": 1740058763.4824302 |
|
}, |
|
{ |
|
"epoch": 3.8444444444444446, |
|
"grad_norm": 2.533773899078369, |
|
"learning_rate": 4.190476190476191e-05, |
|
"loss": 0.9292, |
|
"mean_token_accuracy": 0.7455536097074219, |
|
"step": 88, |
|
"timestamp_in_seconds": 1740058781.1095395 |
|
}, |
|
{ |
|
"contract_score": 0.5145041195676524, |
|
"epoch": 3.8444444444444446, |
|
"eval_loss": 2.5186023712158203, |
|
"eval_mean_token_accuracy": 0.4868165874886475, |
|
"eval_runtime": 0.5208, |
|
"eval_samples_per_second": 38.406, |
|
"eval_steps_per_second": 5.761, |
|
"step": 88, |
|
"timestamp_in_seconds": 1740058807.5439005 |
|
}, |
|
{ |
|
"epoch": 4.311111111111111, |
|
"grad_norm": 2.1104252338409424, |
|
"learning_rate": 2.0952380952380954e-05, |
|
"loss": 0.8056, |
|
"mean_token_accuracy": 0.7859161157789674, |
|
"step": 99, |
|
"timestamp_in_seconds": 1740058824.42298 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 2.379983901977539, |
|
"learning_rate": 0.0, |
|
"loss": 0.644, |
|
"mean_token_accuracy": 0.8199670519498358, |
|
"step": 110, |
|
"timestamp_in_seconds": 1740058842.0086482 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_loss": 2.7059178352355957, |
|
"eval_mean_token_accuracy": 0.4916755852569576, |
|
"eval_runtime": 0.5201, |
|
"eval_samples_per_second": 38.456, |
|
"eval_steps_per_second": 5.768, |
|
"step": 110, |
|
"timestamp_in_seconds": 1740058842.5604758 |
|
} |
|
], |
|
"logging_steps": 11, |
|
"max_steps": 110, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 22, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5949225070012416.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|