westlake-finetuned3 / checkpoint-155 /trainer_state.json
shaswatamitra's picture
Upload folder using huggingface_hub
a203be0 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 155,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.35,
"grad_norm": 3.5674822330474854,
"learning_rate": 0.0001375,
"loss": 3.066,
"step": 11
},
{
"epoch": 0.71,
"grad_norm": 2.487783670425415,
"learning_rate": 0.00019136690647482017,
"loss": 2.2172,
"step": 22
},
{
"epoch": 1.06,
"grad_norm": 3.1076672077178955,
"learning_rate": 0.00017553956834532374,
"loss": 1.4606,
"step": 33
},
{
"epoch": 1.42,
"grad_norm": 2.9141297340393066,
"learning_rate": 0.00015971223021582736,
"loss": 1.1038,
"step": 44
},
{
"epoch": 1.77,
"grad_norm": 2.831645965576172,
"learning_rate": 0.00014388489208633093,
"loss": 1.0646,
"step": 55
},
{
"epoch": 2.13,
"grad_norm": 2.672914743423462,
"learning_rate": 0.00012805755395683453,
"loss": 1.0186,
"step": 66
},
{
"epoch": 2.48,
"grad_norm": 2.9274446964263916,
"learning_rate": 0.00011223021582733813,
"loss": 0.8428,
"step": 77
},
{
"epoch": 2.84,
"grad_norm": 3.164165496826172,
"learning_rate": 9.640287769784174e-05,
"loss": 0.7633,
"step": 88
},
{
"epoch": 3.19,
"grad_norm": 4.559382438659668,
"learning_rate": 8.057553956834533e-05,
"loss": 0.6851,
"step": 99
},
{
"epoch": 3.55,
"grad_norm": 2.335391044616699,
"learning_rate": 6.474820143884892e-05,
"loss": 0.6521,
"step": 110
},
{
"epoch": 3.9,
"grad_norm": 3.2239022254943848,
"learning_rate": 4.892086330935252e-05,
"loss": 0.706,
"step": 121
},
{
"epoch": 4.26,
"grad_norm": 2.8791332244873047,
"learning_rate": 3.3093525179856116e-05,
"loss": 0.6047,
"step": 132
},
{
"epoch": 4.61,
"grad_norm": 3.5159878730773926,
"learning_rate": 1.7266187050359716e-05,
"loss": 0.4889,
"step": 143
},
{
"epoch": 4.97,
"grad_norm": 3.642343044281006,
"learning_rate": 1.4388489208633094e-06,
"loss": 0.585,
"step": 154
}
],
"logging_steps": 11,
"max_steps": 155,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 6751637581824000.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}