{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9850746268656714, "eval_steps": 500, "global_step": 75, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03980099502487562, "grad_norm": 0.49672797322273254, "learning_rate": 1.25e-05, "loss": 1.634, "step": 1 }, { "epoch": 0.07960199004975124, "grad_norm": 0.4738655984401703, "learning_rate": 2.5e-05, "loss": 1.6666, "step": 2 }, { "epoch": 0.11940298507462686, "grad_norm": 0.4305928945541382, "learning_rate": 3.7500000000000003e-05, "loss": 1.6673, "step": 3 }, { "epoch": 0.15920398009950248, "grad_norm": 0.2917488217353821, "learning_rate": 5e-05, "loss": 1.5859, "step": 4 }, { "epoch": 0.19900497512437812, "grad_norm": 0.3740411102771759, "learning_rate": 6.25e-05, "loss": 1.641, "step": 5 }, { "epoch": 0.23880597014925373, "grad_norm": 0.3605235815048218, "learning_rate": 7.500000000000001e-05, "loss": 1.5957, "step": 6 }, { "epoch": 0.27860696517412936, "grad_norm": 0.2877049744129181, "learning_rate": 8.75e-05, "loss": 1.5118, "step": 7 }, { "epoch": 0.31840796019900497, "grad_norm": 0.2710678279399872, "learning_rate": 0.0001, "loss": 1.5322, "step": 8 }, { "epoch": 0.3582089552238806, "grad_norm": 0.26977717876434326, "learning_rate": 9.994504457428558e-05, "loss": 1.5521, "step": 9 }, { "epoch": 0.39800995024875624, "grad_norm": 0.2279333472251892, "learning_rate": 9.978029910109491e-05, "loss": 1.5501, "step": 10 }, { "epoch": 0.43781094527363185, "grad_norm": 0.22376185655593872, "learning_rate": 9.950612572673255e-05, "loss": 1.5476, "step": 11 }, { "epoch": 0.47761194029850745, "grad_norm": 0.22192299365997314, "learning_rate": 9.91231271437788e-05, "loss": 1.5191, "step": 12 }, { "epoch": 0.5174129353233831, "grad_norm": 0.20543566346168518, "learning_rate": 9.863214526624065e-05, "loss": 1.5528, "step": 13 }, { "epoch": 0.5572139303482587, "grad_norm": 0.20152153074741364, "learning_rate": 9.8034259378842e-05, "loss": 1.4816, "step": 14 }, { "epoch": 0.5970149253731343, "grad_norm": 0.204048290848732, "learning_rate": 9.733078376452171e-05, "loss": 1.5519, "step": 15 }, { "epoch": 0.6368159203980099, "grad_norm": 0.19486752152442932, "learning_rate": 9.652326481535435e-05, "loss": 1.5183, "step": 16 }, { "epoch": 0.6766169154228856, "grad_norm": 0.1839868277311325, "learning_rate": 9.561347763324484e-05, "loss": 1.5272, "step": 17 }, { "epoch": 0.7164179104477612, "grad_norm": 0.1841403841972351, "learning_rate": 9.460342212786932e-05, "loss": 1.523, "step": 18 }, { "epoch": 0.7562189054726368, "grad_norm": 0.17461948096752167, "learning_rate": 9.349531862043952e-05, "loss": 1.5316, "step": 19 }, { "epoch": 0.7960199004975125, "grad_norm": 0.1808539479970932, "learning_rate": 9.229160296295488e-05, "loss": 1.4991, "step": 20 }, { "epoch": 0.835820895522388, "grad_norm": 0.1901710480451584, "learning_rate": 9.099492118367123e-05, "loss": 1.5086, "step": 21 }, { "epoch": 0.8756218905472637, "grad_norm": 0.183896005153656, "learning_rate": 8.960812367055646e-05, "loss": 1.5435, "step": 22 }, { "epoch": 0.9154228855721394, "grad_norm": 0.17992867529392242, "learning_rate": 8.81342589055191e-05, "loss": 1.5201, "step": 23 }, { "epoch": 0.9552238805970149, "grad_norm": 0.17739859223365784, "learning_rate": 8.657656676318346e-05, "loss": 1.5059, "step": 24 }, { "epoch": 0.9950248756218906, "grad_norm": 0.17343272268772125, "learning_rate": 8.493847138894209e-05, "loss": 1.5023, "step": 25 }, { "epoch": 1.0348258706467661, "grad_norm": 0.181735098361969, "learning_rate": 8.322357367194109e-05, "loss": 1.4237, "step": 26 }, { "epoch": 1.0746268656716418, "grad_norm": 0.1831899732351303, "learning_rate": 8.143564332954425e-05, "loss": 1.4127, "step": 27 }, { "epoch": 1.1144278606965174, "grad_norm": 0.17571744322776794, "learning_rate": 7.957861062067614e-05, "loss": 1.415, "step": 28 }, { "epoch": 1.154228855721393, "grad_norm": 0.17378666996955872, "learning_rate": 7.765655770625997e-05, "loss": 1.3883, "step": 29 }, { "epoch": 1.1940298507462686, "grad_norm": 0.18887346982955933, "learning_rate": 7.56737096757421e-05, "loss": 1.3688, "step": 30 }, { "epoch": 1.2338308457711442, "grad_norm": 0.1842719465494156, "learning_rate": 7.363442525942826e-05, "loss": 1.3973, "step": 31 }, { "epoch": 1.2736318407960199, "grad_norm": 0.180495485663414, "learning_rate": 7.154318724704853e-05, "loss": 1.4196, "step": 32 }, { "epoch": 1.3134328358208955, "grad_norm": 0.18591800332069397, "learning_rate": 6.940459263361249e-05, "loss": 1.3993, "step": 33 }, { "epoch": 1.3532338308457712, "grad_norm": 0.1974848359823227, "learning_rate": 6.722334251421665e-05, "loss": 1.4004, "step": 34 }, { "epoch": 1.3930348258706466, "grad_norm": 0.18464985489845276, "learning_rate": 6.500423175001705e-05, "loss": 1.4223, "step": 35 }, { "epoch": 1.4328358208955223, "grad_norm": 0.19280724227428436, "learning_rate": 6.275213842808383e-05, "loss": 1.4227, "step": 36 }, { "epoch": 1.472636815920398, "grad_norm": 0.2069428563117981, "learning_rate": 6.0472013138307235e-05, "loss": 1.4064, "step": 37 }, { "epoch": 1.5124378109452736, "grad_norm": 0.19798892736434937, "learning_rate": 5.816886809092651e-05, "loss": 1.4589, "step": 38 }, { "epoch": 1.5522388059701493, "grad_norm": 0.19987910985946655, "learning_rate": 5.584776609860414e-05, "loss": 1.3616, "step": 39 }, { "epoch": 1.5920398009950247, "grad_norm": 0.19689033925533295, "learning_rate": 5.351380944726465e-05, "loss": 1.402, "step": 40 }, { "epoch": 1.6318407960199006, "grad_norm": 0.20285791158676147, "learning_rate": 5.117212868016303e-05, "loss": 1.4233, "step": 41 }, { "epoch": 1.671641791044776, "grad_norm": 0.20456896722316742, "learning_rate": 4.882787131983698e-05, "loss": 1.3909, "step": 42 }, { "epoch": 1.7114427860696517, "grad_norm": 0.1989758014678955, "learning_rate": 4.648619055273537e-05, "loss": 1.4099, "step": 43 }, { "epoch": 1.7512437810945274, "grad_norm": 0.20214244723320007, "learning_rate": 4.415223390139588e-05, "loss": 1.4281, "step": 44 }, { "epoch": 1.7910447761194028, "grad_norm": 0.20304188132286072, "learning_rate": 4.183113190907349e-05, "loss": 1.3892, "step": 45 }, { "epoch": 1.8308457711442787, "grad_norm": 0.20430393517017365, "learning_rate": 3.952798686169279e-05, "loss": 1.3681, "step": 46 }, { "epoch": 1.8706467661691542, "grad_norm": 0.216043621301651, "learning_rate": 3.7247861571916185e-05, "loss": 1.4124, "step": 47 }, { "epoch": 1.9104477611940298, "grad_norm": 0.20856983959674835, "learning_rate": 3.499576824998298e-05, "loss": 1.3772, "step": 48 }, { "epoch": 1.9502487562189055, "grad_norm": 0.21441003680229187, "learning_rate": 3.277665748578336e-05, "loss": 1.3912, "step": 49 }, { "epoch": 1.9900497512437811, "grad_norm": 0.2081148624420166, "learning_rate": 3.0595407366387504e-05, "loss": 1.4066, "step": 50 }, { "epoch": 2.029850746268657, "grad_norm": 0.21571974456310272, "learning_rate": 2.8456812752951485e-05, "loss": 1.2869, "step": 51 }, { "epoch": 2.0696517412935322, "grad_norm": 0.22047464549541473, "learning_rate": 2.636557474057173e-05, "loss": 1.3308, "step": 52 }, { "epoch": 2.109452736318408, "grad_norm": 0.21658696234226227, "learning_rate": 2.4326290324257894e-05, "loss": 1.3483, "step": 53 }, { "epoch": 2.1492537313432836, "grad_norm": 0.22115129232406616, "learning_rate": 2.234344229374003e-05, "loss": 1.3131, "step": 54 }, { "epoch": 2.189054726368159, "grad_norm": 0.21616297960281372, "learning_rate": 2.042138937932388e-05, "loss": 1.32, "step": 55 }, { "epoch": 2.228855721393035, "grad_norm": 0.2277287244796753, "learning_rate": 1.8564356670455767e-05, "loss": 1.3285, "step": 56 }, { "epoch": 2.2686567164179103, "grad_norm": 0.21879006922245026, "learning_rate": 1.677642632805892e-05, "loss": 1.3478, "step": 57 }, { "epoch": 2.308457711442786, "grad_norm": 0.2215149849653244, "learning_rate": 1.5061528611057918e-05, "loss": 1.3001, "step": 58 }, { "epoch": 2.3482587064676617, "grad_norm": 0.2294432371854782, "learning_rate": 1.3423433236816563e-05, "loss": 1.288, "step": 59 }, { "epoch": 2.388059701492537, "grad_norm": 0.22794978320598602, "learning_rate": 1.1865741094480909e-05, "loss": 1.2953, "step": 60 }, { "epoch": 2.427860696517413, "grad_norm": 0.22905172407627106, "learning_rate": 1.0391876329443533e-05, "loss": 1.3366, "step": 61 }, { "epoch": 2.4676616915422884, "grad_norm": 0.2197260707616806, "learning_rate": 9.005078816328771e-06, "loss": 1.3045, "step": 62 }, { "epoch": 2.5074626865671643, "grad_norm": 0.23517443239688873, "learning_rate": 7.708397037045129e-06, "loss": 1.2732, "step": 63 }, { "epoch": 2.5472636815920398, "grad_norm": 0.22419849038124084, "learning_rate": 6.50468137956049e-06, "loss": 1.3236, "step": 64 }, { "epoch": 2.587064676616915, "grad_norm": 0.22183862328529358, "learning_rate": 5.3965778721306755e-06, "loss": 1.3171, "step": 65 }, { "epoch": 2.626865671641791, "grad_norm": 0.2313942015171051, "learning_rate": 4.386522366755169e-06, "loss": 1.3622, "step": 66 }, { "epoch": 2.6666666666666665, "grad_norm": 0.22842709720134735, "learning_rate": 3.476735184645674e-06, "loss": 1.3202, "step": 67 }, { "epoch": 2.7064676616915424, "grad_norm": 0.21870732307434082, "learning_rate": 2.6692162354782944e-06, "loss": 1.3243, "step": 68 }, { "epoch": 2.746268656716418, "grad_norm": 0.2380901575088501, "learning_rate": 1.9657406211579966e-06, "loss": 1.2638, "step": 69 }, { "epoch": 2.7860696517412933, "grad_norm": 0.2310485988855362, "learning_rate": 1.3678547337593494e-06, "loss": 1.3266, "step": 70 }, { "epoch": 2.825870646766169, "grad_norm": 0.22601790726184845, "learning_rate": 8.768728562211947e-07, "loss": 1.3183, "step": 71 }, { "epoch": 2.8656716417910446, "grad_norm": 0.22665373980998993, "learning_rate": 4.938742732674529e-07, "loss": 1.3247, "step": 72 }, { "epoch": 2.9054726368159205, "grad_norm": 0.22332097589969635, "learning_rate": 2.1970089890509527e-07, "loss": 1.3518, "step": 73 }, { "epoch": 2.945273631840796, "grad_norm": 0.2267157882452011, "learning_rate": 5.4955425714431353e-08, "loss": 1.2951, "step": 74 }, { "epoch": 2.9850746268656714, "grad_norm": 0.23764650523662567, "learning_rate": 0.0, "loss": 1.3156, "step": 75 }, { "epoch": 2.9850746268656714, "step": 75, "total_flos": 1.7993258830774927e+18, "train_loss": 1.423751606941223, "train_runtime": 7978.1931, "train_samples_per_second": 0.604, "train_steps_per_second": 0.009 } ], "logging_steps": 1, "max_steps": 75, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7993258830774927e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }