htlou's picture
Upload folder using huggingface_hub
c79d8a2 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.979683972911964,
"eval_steps": 50,
"global_step": 330,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.045146726862302484,
"grad_norm": 18.09577241357873,
"learning_rate": 5e-07,
"loss": 1.7252,
"step": 5
},
{
"epoch": 0.09029345372460497,
"grad_norm": 12.427429423392192,
"learning_rate": 1e-06,
"loss": 1.6001,
"step": 10
},
{
"epoch": 0.13544018058690746,
"grad_norm": 7.447227160720521,
"learning_rate": 9.993977281025862e-07,
"loss": 1.2922,
"step": 15
},
{
"epoch": 0.18058690744920994,
"grad_norm": 4.316398393626138,
"learning_rate": 9.975923633360984e-07,
"loss": 1.1376,
"step": 20
},
{
"epoch": 0.22573363431151242,
"grad_norm": 3.9406940968187656,
"learning_rate": 9.945882549823904e-07,
"loss": 1.0491,
"step": 25
},
{
"epoch": 0.2708803611738149,
"grad_norm": 3.810723345906196,
"learning_rate": 9.90392640201615e-07,
"loss": 1.0089,
"step": 30
},
{
"epoch": 0.3160270880361174,
"grad_norm": 3.8464121948958576,
"learning_rate": 9.85015626597272e-07,
"loss": 0.9962,
"step": 35
},
{
"epoch": 0.3611738148984199,
"grad_norm": 3.2295854401750845,
"learning_rate": 9.784701678661044e-07,
"loss": 0.9752,
"step": 40
},
{
"epoch": 0.40632054176072235,
"grad_norm": 3.8385044412155596,
"learning_rate": 9.707720325915103e-07,
"loss": 0.9454,
"step": 45
},
{
"epoch": 0.45146726862302483,
"grad_norm": 3.5887788672901273,
"learning_rate": 9.619397662556433e-07,
"loss": 0.9344,
"step": 50
},
{
"epoch": 0.45146726862302483,
"eval_loss": 0.9311009049415588,
"eval_runtime": 55.6318,
"eval_samples_per_second": 56.622,
"eval_steps_per_second": 0.899,
"step": 50
},
{
"epoch": 0.4966139954853273,
"grad_norm": 3.587356821639005,
"learning_rate": 9.519946465617217e-07,
"loss": 0.9239,
"step": 55
},
{
"epoch": 0.5417607223476298,
"grad_norm": 3.32920886191508,
"learning_rate": 9.409606321741774e-07,
"loss": 0.9331,
"step": 60
},
{
"epoch": 0.5869074492099323,
"grad_norm": 3.588072493745418,
"learning_rate": 9.28864305000136e-07,
"loss": 0.9039,
"step": 65
},
{
"epoch": 0.6320541760722348,
"grad_norm": 3.503346081138309,
"learning_rate": 9.157348061512726e-07,
"loss": 0.8858,
"step": 70
},
{
"epoch": 0.6772009029345373,
"grad_norm": 3.493284507608022,
"learning_rate": 9.016037657403223e-07,
"loss": 0.9026,
"step": 75
},
{
"epoch": 0.7223476297968398,
"grad_norm": 3.3777773087899727,
"learning_rate": 8.865052266813685e-07,
"loss": 0.8951,
"step": 80
},
{
"epoch": 0.7674943566591422,
"grad_norm": 3.4055672963069994,
"learning_rate": 8.704755626774795e-07,
"loss": 0.8947,
"step": 85
},
{
"epoch": 0.8126410835214447,
"grad_norm": 3.415767069790649,
"learning_rate": 8.535533905932737e-07,
"loss": 0.8945,
"step": 90
},
{
"epoch": 0.8577878103837472,
"grad_norm": 3.625379846327293,
"learning_rate": 8.357794774235092e-07,
"loss": 0.8758,
"step": 95
},
{
"epoch": 0.9029345372460497,
"grad_norm": 3.589930432996723,
"learning_rate": 8.171966420818227e-07,
"loss": 0.8818,
"step": 100
},
{
"epoch": 0.9029345372460497,
"eval_loss": 0.8771370649337769,
"eval_runtime": 55.0752,
"eval_samples_per_second": 57.194,
"eval_steps_per_second": 0.908,
"step": 100
},
{
"epoch": 0.9480812641083521,
"grad_norm": 3.431382987024524,
"learning_rate": 7.978496522462167e-07,
"loss": 0.8825,
"step": 105
},
{
"epoch": 0.9932279909706546,
"grad_norm": 3.753090530415666,
"learning_rate": 7.777851165098011e-07,
"loss": 0.87,
"step": 110
},
{
"epoch": 1.0383747178329572,
"grad_norm": 3.8762413482705185,
"learning_rate": 7.570513720966107e-07,
"loss": 0.8215,
"step": 115
},
{
"epoch": 1.0835214446952597,
"grad_norm": 3.6411326851223107,
"learning_rate": 7.356983684129989e-07,
"loss": 0.8021,
"step": 120
},
{
"epoch": 1.1286681715575622,
"grad_norm": 3.8015150678190293,
"learning_rate": 7.13777546715141e-07,
"loss": 0.8122,
"step": 125
},
{
"epoch": 1.1738148984198646,
"grad_norm": 3.554114852631137,
"learning_rate": 6.913417161825449e-07,
"loss": 0.812,
"step": 130
},
{
"epoch": 1.2189616252821671,
"grad_norm": 3.512124531880888,
"learning_rate": 6.684449266961099e-07,
"loss": 0.7826,
"step": 135
},
{
"epoch": 1.2641083521444696,
"grad_norm": 3.6396827015577697,
"learning_rate": 6.451423386272311e-07,
"loss": 0.7936,
"step": 140
},
{
"epoch": 1.309255079006772,
"grad_norm": 3.6873566290287325,
"learning_rate": 6.21490089951632e-07,
"loss": 0.8014,
"step": 145
},
{
"epoch": 1.3544018058690745,
"grad_norm": 3.6757170742910867,
"learning_rate": 5.975451610080642e-07,
"loss": 0.7974,
"step": 150
},
{
"epoch": 1.3544018058690745,
"eval_loss": 0.8601916432380676,
"eval_runtime": 54.9398,
"eval_samples_per_second": 57.335,
"eval_steps_per_second": 0.91,
"step": 150
},
{
"epoch": 1.399548532731377,
"grad_norm": 3.4471162121206023,
"learning_rate": 5.733652372276809e-07,
"loss": 0.7972,
"step": 155
},
{
"epoch": 1.4446952595936795,
"grad_norm": 3.545405581828385,
"learning_rate": 5.490085701647804e-07,
"loss": 0.8086,
"step": 160
},
{
"epoch": 1.489841986455982,
"grad_norm": 3.500167727908525,
"learning_rate": 5.245338371637091e-07,
"loss": 0.7844,
"step": 165
},
{
"epoch": 1.5349887133182845,
"grad_norm": 3.683324105059045,
"learning_rate": 5e-07,
"loss": 0.814,
"step": 170
},
{
"epoch": 1.580135440180587,
"grad_norm": 3.5076105573904224,
"learning_rate": 4.75466162836291e-07,
"loss": 0.7606,
"step": 175
},
{
"epoch": 1.6252821670428894,
"grad_norm": 3.39732342028104,
"learning_rate": 4.5099142983521963e-07,
"loss": 0.7836,
"step": 180
},
{
"epoch": 1.670428893905192,
"grad_norm": 3.522550690645414,
"learning_rate": 4.2663476277231915e-07,
"loss": 0.7882,
"step": 185
},
{
"epoch": 1.7155756207674944,
"grad_norm": 3.896694774670664,
"learning_rate": 4.0245483899193586e-07,
"loss": 0.8049,
"step": 190
},
{
"epoch": 1.7607223476297968,
"grad_norm": 3.9732787046924556,
"learning_rate": 3.785099100483681e-07,
"loss": 0.7794,
"step": 195
},
{
"epoch": 1.8058690744920993,
"grad_norm": 3.8350374340781825,
"learning_rate": 3.548576613727689e-07,
"loss": 0.7942,
"step": 200
},
{
"epoch": 1.8058690744920993,
"eval_loss": 0.8489097356796265,
"eval_runtime": 55.1124,
"eval_samples_per_second": 57.156,
"eval_steps_per_second": 0.907,
"step": 200
},
{
"epoch": 1.8510158013544018,
"grad_norm": 3.647701362803135,
"learning_rate": 3.3155507330388996e-07,
"loss": 0.7827,
"step": 205
},
{
"epoch": 1.8961625282167043,
"grad_norm": 3.5156707572378187,
"learning_rate": 3.086582838174551e-07,
"loss": 0.7899,
"step": 210
},
{
"epoch": 1.9413092550790068,
"grad_norm": 3.56378125431085,
"learning_rate": 2.8622245328485907e-07,
"loss": 0.8011,
"step": 215
},
{
"epoch": 1.9864559819413092,
"grad_norm": 3.465941533238677,
"learning_rate": 2.6430163158700113e-07,
"loss": 0.7722,
"step": 220
},
{
"epoch": 2.0316027088036117,
"grad_norm": 3.780773444000903,
"learning_rate": 2.4294862790338916e-07,
"loss": 0.7563,
"step": 225
},
{
"epoch": 2.0767494356659144,
"grad_norm": 3.58554801379656,
"learning_rate": 2.2221488349019902e-07,
"loss": 0.7356,
"step": 230
},
{
"epoch": 2.1218961625282167,
"grad_norm": 3.7489661478667906,
"learning_rate": 2.021503477537833e-07,
"loss": 0.7422,
"step": 235
},
{
"epoch": 2.1670428893905194,
"grad_norm": 3.6327819309767717,
"learning_rate": 1.828033579181773e-07,
"loss": 0.7331,
"step": 240
},
{
"epoch": 2.2121896162528216,
"grad_norm": 3.862265193047436,
"learning_rate": 1.6422052257649077e-07,
"loss": 0.7378,
"step": 245
},
{
"epoch": 2.2573363431151243,
"grad_norm": 3.7313891351245787,
"learning_rate": 1.4644660940672627e-07,
"loss": 0.7405,
"step": 250
},
{
"epoch": 2.2573363431151243,
"eval_loss": 0.8493446111679077,
"eval_runtime": 54.9903,
"eval_samples_per_second": 57.283,
"eval_steps_per_second": 0.909,
"step": 250
},
{
"epoch": 2.3024830699774266,
"grad_norm": 3.6025441770788005,
"learning_rate": 1.2952443732252054e-07,
"loss": 0.7311,
"step": 255
},
{
"epoch": 2.3476297968397293,
"grad_norm": 3.7220315278082143,
"learning_rate": 1.134947733186315e-07,
"loss": 0.7183,
"step": 260
},
{
"epoch": 2.3927765237020315,
"grad_norm": 4.011429432621815,
"learning_rate": 9.839623425967758e-08,
"loss": 0.7381,
"step": 265
},
{
"epoch": 2.4379232505643342,
"grad_norm": 3.5985031594527155,
"learning_rate": 8.426519384872732e-08,
"loss": 0.7359,
"step": 270
},
{
"epoch": 2.4830699774266365,
"grad_norm": 3.8934477742892417,
"learning_rate": 7.1135694999864e-08,
"loss": 0.7423,
"step": 275
},
{
"epoch": 2.528216704288939,
"grad_norm": 4.140166060538144,
"learning_rate": 5.9039367825822526e-08,
"loss": 0.7336,
"step": 280
},
{
"epoch": 2.5733634311512414,
"grad_norm": 3.685670529512933,
"learning_rate": 4.800535343827833e-08,
"loss": 0.7231,
"step": 285
},
{
"epoch": 2.618510158013544,
"grad_norm": 3.764984332523118,
"learning_rate": 3.806023374435663e-08,
"loss": 0.731,
"step": 290
},
{
"epoch": 2.6636568848758464,
"grad_norm": 3.695489561322323,
"learning_rate": 2.922796740848965e-08,
"loss": 0.7243,
"step": 295
},
{
"epoch": 2.708803611738149,
"grad_norm": 3.785952827138228,
"learning_rate": 2.1529832133895588e-08,
"loss": 0.7321,
"step": 300
},
{
"epoch": 2.708803611738149,
"eval_loss": 0.8476512432098389,
"eval_runtime": 55.0992,
"eval_samples_per_second": 57.17,
"eval_steps_per_second": 0.907,
"step": 300
},
{
"epoch": 2.7539503386004514,
"grad_norm": 3.719298487431947,
"learning_rate": 1.4984373402728012e-08,
"loss": 0.7248,
"step": 305
},
{
"epoch": 2.799097065462754,
"grad_norm": 3.570380389588657,
"learning_rate": 9.607359798384784e-09,
"loss": 0.7527,
"step": 310
},
{
"epoch": 2.8442437923250563,
"grad_norm": 3.547493528277377,
"learning_rate": 5.411745017609493e-09,
"loss": 0.738,
"step": 315
},
{
"epoch": 2.889390519187359,
"grad_norm": 3.7382434901187755,
"learning_rate": 2.407636663901591e-09,
"loss": 0.7268,
"step": 320
},
{
"epoch": 2.9345372460496613,
"grad_norm": 3.5368152248539033,
"learning_rate": 6.022718974137975e-10,
"loss": 0.7383,
"step": 325
},
{
"epoch": 2.979683972911964,
"grad_norm": 3.5267604120974374,
"learning_rate": 0.0,
"loss": 0.7248,
"step": 330
},
{
"epoch": 2.979683972911964,
"step": 330,
"total_flos": 1945595623243776.0,
"train_loss": 0.8492823528520989,
"train_runtime": 4699.828,
"train_samples_per_second": 18.096,
"train_steps_per_second": 0.07
}
],
"logging_steps": 5,
"max_steps": 330,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1945595623243776.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}