16-clusters-balanced-7 / trainer_state.json
MHGanainy's picture
MHGanainy/16-clusters-balanced-7
5943cb1 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 1673,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05977286312014345,
"grad_norm": 0.18261943757534027,
"learning_rate": 6.666666666666667e-06,
"loss": 2.3555,
"step": 100
},
{
"epoch": 0.1195457262402869,
"grad_norm": 0.2138003557920456,
"learning_rate": 1.3333333333333333e-05,
"loss": 2.3285,
"step": 200
},
{
"epoch": 0.17931858936043035,
"grad_norm": 0.3002658188343048,
"learning_rate": 2e-05,
"loss": 2.2489,
"step": 300
},
{
"epoch": 0.2390914524805738,
"grad_norm": 0.4061976671218872,
"learning_rate": 1.9739364808281564e-05,
"loss": 2.1912,
"step": 400
},
{
"epoch": 0.2988643156007173,
"grad_norm": 0.5177231431007385,
"learning_rate": 1.8971045373758673e-05,
"loss": 2.1328,
"step": 500
},
{
"epoch": 0.3586371787208607,
"grad_norm": 0.6728057861328125,
"learning_rate": 1.7735091913054898e-05,
"loss": 2.062,
"step": 600
},
{
"epoch": 0.41841004184100417,
"grad_norm": 0.6207203269004822,
"learning_rate": 1.6095931019607367e-05,
"loss": 2.0053,
"step": 700
},
{
"epoch": 0.4781829049611476,
"grad_norm": 0.6931679248809814,
"learning_rate": 1.4139007296160285e-05,
"loss": 2.0062,
"step": 800
},
{
"epoch": 0.5379557680812911,
"grad_norm": 0.6681210994720459,
"learning_rate": 1.1966329380681454e-05,
"loss": 1.9427,
"step": 900
},
{
"epoch": 0.5977286312014346,
"grad_norm": 0.8678436279296875,
"learning_rate": 9.691152538179525e-06,
"loss": 1.9019,
"step": 1000
},
{
"epoch": 0.657501494321578,
"grad_norm": 1.0380738973617554,
"learning_rate": 7.432074999162258e-06,
"loss": 1.9017,
"step": 1100
},
{
"epoch": 0.7172743574417214,
"grad_norm": 0.8856862783432007,
"learning_rate": 5.306855785127376e-06,
"loss": 1.8821,
"step": 1200
},
{
"epoch": 0.7770472205618649,
"grad_norm": 1.1661518812179565,
"learning_rate": 3.426276279533615e-06,
"loss": 1.8826,
"step": 1300
},
{
"epoch": 0.8368200836820083,
"grad_norm": 0.8435269594192505,
"learning_rate": 1.8883655223774121e-06,
"loss": 1.8919,
"step": 1400
},
{
"epoch": 0.8965929468021518,
"grad_norm": 0.7680924534797668,
"learning_rate": 7.732902466662218e-07,
"loss": 1.9126,
"step": 1500
},
{
"epoch": 0.9563658099222953,
"grad_norm": 0.9946077466011047,
"learning_rate": 1.3917602405313812e-07,
"loss": 1.8853,
"step": 1600
},
{
"epoch": 1.0,
"step": 1673,
"total_flos": 3.0394718060544e+16,
"train_loss": 2.029054084226035,
"train_runtime": 519.4686,
"train_samples_per_second": 6.439,
"train_steps_per_second": 3.221
}
],
"logging_steps": 100,
"max_steps": 1673,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.0394718060544e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}