|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 10, |
|
"global_step": 60, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 1.7679167985916138, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3858, |
|
"mean_token_accuracy": 0.5069678752841977, |
|
"step": 5, |
|
"timestamp_in_seconds": 1739801896.2577183 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 0.8494528532028198, |
|
"learning_rate": 0.00019047619047619048, |
|
"loss": 1.8622, |
|
"mean_token_accuracy": 0.5660446086812065, |
|
"step": 10, |
|
"timestamp_in_seconds": 1739801910.776645 |
|
}, |
|
{ |
|
"contract_score": 0.5378601332314197, |
|
"epoch": 0.8888888888888888, |
|
"eval_loss": 1.835336446762085, |
|
"eval_mean_token_accuracy": 0.5587292609351433, |
|
"eval_runtime": 1.0359, |
|
"eval_samples_per_second": 9.653, |
|
"eval_steps_per_second": 1.931, |
|
"step": 10, |
|
"timestamp_in_seconds": 1739801926.8582673 |
|
}, |
|
{ |
|
"epoch": 1.2666666666666666, |
|
"grad_norm": 0.8751085996627808, |
|
"learning_rate": 0.00018095238095238095, |
|
"loss": 1.5911, |
|
"mean_token_accuracy": 0.5871365485092056, |
|
"step": 15, |
|
"timestamp_in_seconds": 1739801938.9171028 |
|
}, |
|
{ |
|
"epoch": 1.7111111111111112, |
|
"grad_norm": 0.9368126392364502, |
|
"learning_rate": 0.00017142857142857143, |
|
"loss": 1.6128, |
|
"mean_token_accuracy": 0.5925216216537559, |
|
"step": 20, |
|
"timestamp_in_seconds": 1739801953.6685803 |
|
}, |
|
{ |
|
"contract_score": 0.5344367718449038, |
|
"epoch": 1.7111111111111112, |
|
"eval_loss": 1.8780924081802368, |
|
"eval_mean_token_accuracy": 0.5514234539969833, |
|
"eval_runtime": 1.04, |
|
"eval_samples_per_second": 9.616, |
|
"eval_steps_per_second": 1.923, |
|
"step": 20, |
|
"timestamp_in_seconds": 1739801968.0753272 |
|
}, |
|
{ |
|
"epoch": 2.088888888888889, |
|
"grad_norm": 1.0232841968536377, |
|
"learning_rate": 0.00016190476190476192, |
|
"loss": 1.4347, |
|
"mean_token_accuracy": 0.628910414833075, |
|
"step": 25, |
|
"timestamp_in_seconds": 1739801980.2135594 |
|
}, |
|
{ |
|
"epoch": 2.533333333333333, |
|
"grad_norm": 1.1748850345611572, |
|
"learning_rate": 0.00015238095238095237, |
|
"loss": 1.2063, |
|
"mean_token_accuracy": 0.6789763947731228, |
|
"step": 30, |
|
"timestamp_in_seconds": 1739801994.0725858 |
|
}, |
|
{ |
|
"contract_score": 0.5540201261867204, |
|
"epoch": 2.533333333333333, |
|
"eval_loss": 2.122205972671509, |
|
"eval_mean_token_accuracy": 0.5495380844645551, |
|
"eval_runtime": 1.0386, |
|
"eval_samples_per_second": 9.629, |
|
"eval_steps_per_second": 1.926, |
|
"step": 30, |
|
"timestamp_in_seconds": 1739802010.3303163 |
|
}, |
|
{ |
|
"epoch": 2.977777777777778, |
|
"grad_norm": 2.4268102645874023, |
|
"learning_rate": 0.00014285714285714287, |
|
"loss": 0.946, |
|
"mean_token_accuracy": 0.7298750072916594, |
|
"step": 35, |
|
"timestamp_in_seconds": 1739802024.8007138 |
|
}, |
|
{ |
|
"epoch": 3.3555555555555556, |
|
"grad_norm": 1.7162376642227173, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.6421, |
|
"mean_token_accuracy": 0.8255746848231004, |
|
"step": 40, |
|
"timestamp_in_seconds": 1739802037.2875772 |
|
}, |
|
{ |
|
"contract_score": 0.5557923053647376, |
|
"epoch": 3.3555555555555556, |
|
"eval_loss": 2.638225555419922, |
|
"eval_mean_token_accuracy": 0.5211161387631975, |
|
"eval_runtime": 1.0404, |
|
"eval_samples_per_second": 9.612, |
|
"eval_steps_per_second": 1.922, |
|
"step": 40, |
|
"timestamp_in_seconds": 1739802054.9630625 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"grad_norm": 1.785975456237793, |
|
"learning_rate": 0.0001238095238095238, |
|
"loss": 0.4697, |
|
"mean_token_accuracy": 0.8655947583935519, |
|
"step": 45, |
|
"timestamp_in_seconds": 1739802069.1940033 |
|
}, |
|
{ |
|
"epoch": 4.177777777777778, |
|
"grad_norm": 1.2647113800048828, |
|
"learning_rate": 0.00011428571428571428, |
|
"loss": 0.3219, |
|
"mean_token_accuracy": 0.904521393501244, |
|
"step": 50, |
|
"timestamp_in_seconds": 1739802080.7175117 |
|
}, |
|
{ |
|
"contract_score": 0.5660714248941624, |
|
"epoch": 4.177777777777778, |
|
"eval_loss": 3.285521984100342, |
|
"eval_mean_token_accuracy": 0.5330411010558069, |
|
"eval_runtime": 1.0389, |
|
"eval_samples_per_second": 9.625, |
|
"eval_steps_per_second": 1.925, |
|
"step": 50, |
|
"timestamp_in_seconds": 1739802101.2406597 |
|
}, |
|
{ |
|
"epoch": 4.622222222222222, |
|
"grad_norm": 5.707488536834717, |
|
"learning_rate": 0.00010476190476190477, |
|
"loss": 0.1122, |
|
"mean_token_accuracy": 0.9662833272789735, |
|
"step": 55, |
|
"timestamp_in_seconds": 1739802115.3834107 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.8283370733261108, |
|
"learning_rate": 9.523809523809524e-05, |
|
"loss": 0.1454, |
|
"mean_token_accuracy": 0.9480247191005747, |
|
"step": 60, |
|
"timestamp_in_seconds": 1739802127.8953981 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 3.9155266284942627, |
|
"eval_mean_token_accuracy": 0.5329939668174962, |
|
"eval_runtime": 1.0401, |
|
"eval_samples_per_second": 9.615, |
|
"eval_steps_per_second": 1.923, |
|
"step": 60 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 110, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8303894012190720.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|