CFT-webinstruct-dpsk-ckpt-0114 / trainer_state.json
ubowang's picture
Upload folder using huggingface_hub
24b7145 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.0896,
"eval_steps": 500,
"global_step": 70,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00128,
"grad_norm": 1.9465926503876676,
"learning_rate": 2.0833333333333333e-07,
"loss": 0.6416,
"step": 1
},
{
"epoch": 0.00256,
"grad_norm": 2.0124156993303566,
"learning_rate": 4.1666666666666667e-07,
"loss": 0.6669,
"step": 2
},
{
"epoch": 0.00384,
"grad_norm": 2.087646283120165,
"learning_rate": 6.25e-07,
"loss": 0.6672,
"step": 3
},
{
"epoch": 0.00512,
"grad_norm": 2.0504712472935056,
"learning_rate": 8.333333333333333e-07,
"loss": 0.6653,
"step": 4
},
{
"epoch": 0.0064,
"grad_norm": 1.815879626276492,
"learning_rate": 1.0416666666666667e-06,
"loss": 0.6509,
"step": 5
},
{
"epoch": 0.00768,
"grad_norm": 1.8764565776467699,
"learning_rate": 1.25e-06,
"loss": 0.646,
"step": 6
},
{
"epoch": 0.00896,
"grad_norm": 1.8629002346447046,
"learning_rate": 1.4583333333333335e-06,
"loss": 0.6572,
"step": 7
},
{
"epoch": 0.01024,
"grad_norm": 1.8435249737816124,
"learning_rate": 1.6666666666666667e-06,
"loss": 0.66,
"step": 8
},
{
"epoch": 0.01152,
"grad_norm": 1.922292840935334,
"learning_rate": 1.8750000000000003e-06,
"loss": 0.6572,
"step": 9
},
{
"epoch": 0.0128,
"grad_norm": 1.8885131960372619,
"learning_rate": 2.0833333333333334e-06,
"loss": 0.6462,
"step": 10
},
{
"epoch": 0.01408,
"grad_norm": 1.6704165936125248,
"learning_rate": 2.2916666666666666e-06,
"loss": 0.6475,
"step": 11
},
{
"epoch": 0.01536,
"grad_norm": 1.725582114057618,
"learning_rate": 2.5e-06,
"loss": 0.6193,
"step": 12
},
{
"epoch": 0.01664,
"grad_norm": 1.6281351666821622,
"learning_rate": 2.7083333333333334e-06,
"loss": 0.6427,
"step": 13
},
{
"epoch": 0.01792,
"grad_norm": 0.9742610148915729,
"learning_rate": 2.916666666666667e-06,
"loss": 0.6003,
"step": 14
},
{
"epoch": 0.0192,
"grad_norm": 0.9408138329286213,
"learning_rate": 3.125e-06,
"loss": 0.5782,
"step": 15
},
{
"epoch": 0.02048,
"grad_norm": 0.8871297966814529,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.5642,
"step": 16
},
{
"epoch": 0.02176,
"grad_norm": 0.8677350156338169,
"learning_rate": 3.5416666666666673e-06,
"loss": 0.5752,
"step": 17
},
{
"epoch": 0.02304,
"grad_norm": 0.8296766137632919,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.5495,
"step": 18
},
{
"epoch": 0.02432,
"grad_norm": 0.6923202785291074,
"learning_rate": 3.958333333333333e-06,
"loss": 0.4964,
"step": 19
},
{
"epoch": 0.0256,
"grad_norm": 0.7854379694338149,
"learning_rate": 4.166666666666667e-06,
"loss": 0.479,
"step": 20
},
{
"epoch": 0.02688,
"grad_norm": 0.7829163717626061,
"learning_rate": 4.3750000000000005e-06,
"loss": 0.4801,
"step": 21
},
{
"epoch": 0.02816,
"grad_norm": 0.6533431708027024,
"learning_rate": 4.583333333333333e-06,
"loss": 0.4612,
"step": 22
},
{
"epoch": 0.02944,
"grad_norm": 0.5572023606265587,
"learning_rate": 4.791666666666668e-06,
"loss": 0.4605,
"step": 23
},
{
"epoch": 0.03072,
"grad_norm": 0.5293319538350173,
"learning_rate": 5e-06,
"loss": 0.4639,
"step": 24
},
{
"epoch": 0.032,
"grad_norm": 0.41911726053111276,
"learning_rate": 4.999978471321311e-06,
"loss": 0.4227,
"step": 25
},
{
"epoch": 0.03328,
"grad_norm": 0.7172502383894598,
"learning_rate": 4.999913885656027e-06,
"loss": 0.3964,
"step": 26
},
{
"epoch": 0.03456,
"grad_norm": 0.5913996068423693,
"learning_rate": 4.999806244116505e-06,
"loss": 0.4059,
"step": 27
},
{
"epoch": 0.03584,
"grad_norm": 0.48055748126084374,
"learning_rate": 4.999655548556651e-06,
"loss": 0.4107,
"step": 28
},
{
"epoch": 0.03712,
"grad_norm": 0.41925076244744985,
"learning_rate": 4.999461801571884e-06,
"loss": 0.3989,
"step": 29
},
{
"epoch": 0.0384,
"grad_norm": 0.3653215916567385,
"learning_rate": 4.999225006499096e-06,
"loss": 0.3955,
"step": 30
},
{
"epoch": 0.03968,
"grad_norm": 0.32516349299172237,
"learning_rate": 4.998945167416598e-06,
"loss": 0.3883,
"step": 31
},
{
"epoch": 0.04096,
"grad_norm": 0.27971440200729386,
"learning_rate": 4.998622289144039e-06,
"loss": 0.3957,
"step": 32
},
{
"epoch": 0.04224,
"grad_norm": 0.2766170998438116,
"learning_rate": 4.9982563772423375e-06,
"loss": 0.3848,
"step": 33
},
{
"epoch": 0.04352,
"grad_norm": 0.30360945437785425,
"learning_rate": 4.99784743801357e-06,
"loss": 0.3938,
"step": 34
},
{
"epoch": 0.0448,
"grad_norm": 0.28460957744347015,
"learning_rate": 4.997395478500874e-06,
"loss": 0.368,
"step": 35
},
{
"epoch": 0.04608,
"grad_norm": 0.2850953684320096,
"learning_rate": 4.996900506488323e-06,
"loss": 0.3724,
"step": 36
},
{
"epoch": 0.04736,
"grad_norm": 0.25012126257873374,
"learning_rate": 4.9963625305007925e-06,
"loss": 0.361,
"step": 37
},
{
"epoch": 0.04864,
"grad_norm": 0.24498070894576404,
"learning_rate": 4.995781559803811e-06,
"loss": 0.3806,
"step": 38
},
{
"epoch": 0.04992,
"grad_norm": 0.22591581090418575,
"learning_rate": 4.995157604403404e-06,
"loss": 0.363,
"step": 39
},
{
"epoch": 0.0512,
"grad_norm": 0.22056309851677902,
"learning_rate": 4.99449067504592e-06,
"loss": 0.3587,
"step": 40
},
{
"epoch": 0.05248,
"grad_norm": 0.20829370113607343,
"learning_rate": 4.993780783217844e-06,
"loss": 0.3727,
"step": 41
},
{
"epoch": 0.05376,
"grad_norm": 0.19999252558449981,
"learning_rate": 4.993027941145604e-06,
"loss": 0.3506,
"step": 42
},
{
"epoch": 0.05504,
"grad_norm": 0.2086064795316142,
"learning_rate": 4.992232161795356e-06,
"loss": 0.3485,
"step": 43
},
{
"epoch": 0.05632,
"grad_norm": 0.18277952791784172,
"learning_rate": 4.9913934588727615e-06,
"loss": 0.3334,
"step": 44
},
{
"epoch": 0.0576,
"grad_norm": 0.18383586373314206,
"learning_rate": 4.990511846822754e-06,
"loss": 0.3531,
"step": 45
},
{
"epoch": 0.05888,
"grad_norm": 0.18390959318122982,
"learning_rate": 4.9895873408292875e-06,
"loss": 0.3539,
"step": 46
},
{
"epoch": 0.06016,
"grad_norm": 0.18173318091193896,
"learning_rate": 4.988619956815074e-06,
"loss": 0.3452,
"step": 47
},
{
"epoch": 0.06144,
"grad_norm": 0.1729819420549717,
"learning_rate": 4.987609711441316e-06,
"loss": 0.3239,
"step": 48
},
{
"epoch": 0.06272,
"grad_norm": 0.17347984174169176,
"learning_rate": 4.98655662210741e-06,
"loss": 0.3475,
"step": 49
},
{
"epoch": 0.064,
"grad_norm": 0.1699972869063586,
"learning_rate": 4.985460706950655e-06,
"loss": 0.3513,
"step": 50
},
{
"epoch": 0.06528,
"grad_norm": 0.1606057290926495,
"learning_rate": 4.984321984845934e-06,
"loss": 0.3335,
"step": 51
},
{
"epoch": 0.06656,
"grad_norm": 0.16534392303574644,
"learning_rate": 4.9831404754053935e-06,
"loss": 0.3345,
"step": 52
},
{
"epoch": 0.06784,
"grad_norm": 0.23216304038143887,
"learning_rate": 4.981916198978103e-06,
"loss": 0.3487,
"step": 53
},
{
"epoch": 0.06912,
"grad_norm": 0.1516460550172589,
"learning_rate": 4.980649176649705e-06,
"loss": 0.3369,
"step": 54
},
{
"epoch": 0.0704,
"grad_norm": 0.1448627378722896,
"learning_rate": 4.979339430242053e-06,
"loss": 0.3339,
"step": 55
},
{
"epoch": 0.07168,
"grad_norm": 0.1470353182317087,
"learning_rate": 4.9779869823128356e-06,
"loss": 0.3356,
"step": 56
},
{
"epoch": 0.07296,
"grad_norm": 0.14370030555945446,
"learning_rate": 4.976591856155187e-06,
"loss": 0.3427,
"step": 57
},
{
"epoch": 0.07424,
"grad_norm": 0.14698377717239064,
"learning_rate": 4.975154075797281e-06,
"loss": 0.3364,
"step": 58
},
{
"epoch": 0.07552,
"grad_norm": 0.1452001418009019,
"learning_rate": 4.973673666001932e-06,
"loss": 0.3287,
"step": 59
},
{
"epoch": 0.0768,
"grad_norm": 0.1533193038026651,
"learning_rate": 4.972150652266151e-06,
"loss": 0.32,
"step": 60
},
{
"epoch": 0.07808,
"grad_norm": 0.14661431291205376,
"learning_rate": 4.970585060820717e-06,
"loss": 0.351,
"step": 61
},
{
"epoch": 0.07936,
"grad_norm": 0.16618365600060153,
"learning_rate": 4.968976918629722e-06,
"loss": 0.3545,
"step": 62
},
{
"epoch": 0.08064,
"grad_norm": 0.1423441821889856,
"learning_rate": 4.967326253390107e-06,
"loss": 0.3377,
"step": 63
},
{
"epoch": 0.08192,
"grad_norm": 0.14484043253247622,
"learning_rate": 4.965633093531186e-06,
"loss": 0.3314,
"step": 64
},
{
"epoch": 0.0832,
"grad_norm": 0.1465728819756019,
"learning_rate": 4.963897468214154e-06,
"loss": 0.3365,
"step": 65
},
{
"epoch": 0.08448,
"grad_norm": 0.14519445175592308,
"learning_rate": 4.962119407331587e-06,
"loss": 0.3266,
"step": 66
},
{
"epoch": 0.08576,
"grad_norm": 0.21106088971282316,
"learning_rate": 4.960298941506927e-06,
"loss": 0.3365,
"step": 67
},
{
"epoch": 0.08704,
"grad_norm": 0.14358267564891752,
"learning_rate": 4.958436102093951e-06,
"loss": 0.3355,
"step": 68
},
{
"epoch": 0.08832,
"grad_norm": 0.1392772159594871,
"learning_rate": 4.956530921176238e-06,
"loss": 0.326,
"step": 69
},
{
"epoch": 0.0896,
"grad_norm": 0.1403897051822436,
"learning_rate": 4.954583431566609e-06,
"loss": 0.3113,
"step": 70
}
],
"logging_steps": 1,
"max_steps": 781,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 89651498975232.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}