arabart-gec-lora / last-checkpoint /trainer_state.json
somaia02's picture
Training in progress, epoch 3, checkpoint
2d39cb9
raw
history blame
5.37 kB
{
"best_metric": 0.5994039177894592,
"best_model_checkpoint": "outputs/checkpoint-1839",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1839,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"learning_rate": 3.147441434337073e-05,
"loss": 2.727,
"step": 50
},
{
"epoch": 0.16,
"learning_rate": 3.705117131325856e-05,
"loss": 2.0215,
"step": 100
},
{
"epoch": 0.24,
"learning_rate": 4.031336501627827e-05,
"loss": 1.4483,
"step": 150
},
{
"epoch": 0.33,
"learning_rate": 4.262792828314637e-05,
"loss": 1.2309,
"step": 200
},
{
"epoch": 0.41,
"learning_rate": 4.442324303011218e-05,
"loss": 1.0732,
"step": 250
},
{
"epoch": 0.49,
"learning_rate": 4.589012198616609e-05,
"loss": 1.0297,
"step": 300
},
{
"epoch": 0.57,
"learning_rate": 4.713035047190436e-05,
"loss": 0.9676,
"step": 350
},
{
"epoch": 0.65,
"learning_rate": 4.82046852530342e-05,
"loss": 0.9311,
"step": 400
},
{
"epoch": 0.73,
"learning_rate": 4.915231568918581e-05,
"loss": 0.9144,
"step": 450
},
{
"epoch": 0.82,
"learning_rate": 5e-05,
"loss": 0.8811,
"step": 500
},
{
"epoch": 0.9,
"learning_rate": 4.971822886716504e-05,
"loss": 0.8284,
"step": 550
},
{
"epoch": 0.98,
"learning_rate": 4.943070730304773e-05,
"loss": 0.8349,
"step": 600
},
{
"epoch": 1.0,
"eval_loss": 0.7203035354614258,
"eval_runtime": 2.0613,
"eval_samples_per_second": 568.098,
"eval_steps_per_second": 17.95,
"step": 613
},
{
"epoch": 1.06,
"learning_rate": 4.914318573893042e-05,
"loss": 0.8284,
"step": 650
},
{
"epoch": 1.14,
"learning_rate": 4.8855664174813115e-05,
"loss": 0.8152,
"step": 700
},
{
"epoch": 1.22,
"learning_rate": 4.856814261069581e-05,
"loss": 0.8085,
"step": 750
},
{
"epoch": 1.31,
"learning_rate": 4.8280621046578495e-05,
"loss": 0.8156,
"step": 800
},
{
"epoch": 1.39,
"learning_rate": 4.799309948246118e-05,
"loss": 0.7773,
"step": 850
},
{
"epoch": 1.47,
"learning_rate": 4.770557791834388e-05,
"loss": 0.7913,
"step": 900
},
{
"epoch": 1.55,
"learning_rate": 4.741805635422657e-05,
"loss": 0.7787,
"step": 950
},
{
"epoch": 1.63,
"learning_rate": 4.713053479010926e-05,
"loss": 0.7589,
"step": 1000
},
{
"epoch": 1.71,
"learning_rate": 4.684301322599195e-05,
"loss": 0.7671,
"step": 1050
},
{
"epoch": 1.79,
"learning_rate": 4.655549166187464e-05,
"loss": 0.7461,
"step": 1100
},
{
"epoch": 1.88,
"learning_rate": 4.6267970097757336e-05,
"loss": 0.7469,
"step": 1150
},
{
"epoch": 1.96,
"learning_rate": 4.598044853364002e-05,
"loss": 0.743,
"step": 1200
},
{
"epoch": 2.0,
"eval_loss": 0.6314801573753357,
"eval_runtime": 2.0735,
"eval_samples_per_second": 564.759,
"eval_steps_per_second": 17.845,
"step": 1226
},
{
"epoch": 2.04,
"learning_rate": 4.5692926969522716e-05,
"loss": 0.7375,
"step": 1250
},
{
"epoch": 2.12,
"learning_rate": 4.540540540540541e-05,
"loss": 0.7342,
"step": 1300
},
{
"epoch": 2.2,
"learning_rate": 4.51178838412881e-05,
"loss": 0.7308,
"step": 1350
},
{
"epoch": 2.28,
"learning_rate": 4.483036227717079e-05,
"loss": 0.7157,
"step": 1400
},
{
"epoch": 2.37,
"learning_rate": 4.454284071305348e-05,
"loss": 0.7032,
"step": 1450
},
{
"epoch": 2.45,
"learning_rate": 4.425531914893617e-05,
"loss": 0.7114,
"step": 1500
},
{
"epoch": 2.53,
"learning_rate": 4.396779758481886e-05,
"loss": 0.7192,
"step": 1550
},
{
"epoch": 2.61,
"learning_rate": 4.3680276020701557e-05,
"loss": 0.7132,
"step": 1600
},
{
"epoch": 2.69,
"learning_rate": 4.339275445658424e-05,
"loss": 0.7106,
"step": 1650
},
{
"epoch": 2.77,
"learning_rate": 4.310523289246694e-05,
"loss": 0.6874,
"step": 1700
},
{
"epoch": 2.85,
"learning_rate": 4.281771132834963e-05,
"loss": 0.6983,
"step": 1750
},
{
"epoch": 2.94,
"learning_rate": 4.253018976423232e-05,
"loss": 0.6989,
"step": 1800
},
{
"epoch": 3.0,
"eval_loss": 0.5994039177894592,
"eval_runtime": 2.0675,
"eval_samples_per_second": 566.385,
"eval_steps_per_second": 17.896,
"step": 1839
}
],
"logging_steps": 50,
"max_steps": 9195,
"num_train_epochs": 15,
"save_steps": 500,
"total_flos": 3058064064249856.0,
"trial_name": null,
"trial_params": null
}