{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500.0,
  "global_step": 6237,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.02405002405002405,
      "grad_norm": 0.4139963388442993,
      "learning_rate": 0.00019996828714700116,
      "loss": 1.5971,
      "step": 50
    },
    {
      "epoch": 0.0481000481000481,
      "grad_norm": 0.3423018157482147,
      "learning_rate": 0.00019987316870210547,
      "loss": 1.274,
      "step": 100
    },
    {
      "epoch": 0.07215007215007214,
      "grad_norm": 0.3551710247993469,
      "learning_rate": 0.0001997147049948582,
      "loss": 1.2519,
      "step": 150
    },
    {
      "epoch": 0.0962000962000962,
      "grad_norm": 0.32329073548316956,
      "learning_rate": 0.0001994929965319844,
      "loss": 1.2382,
      "step": 200
    },
    {
      "epoch": 0.12025012025012025,
      "grad_norm": 0.48585018515586853,
      "learning_rate": 0.0001992081839336419,
      "loss": 1.2293,
      "step": 250
    },
    {
      "epoch": 0.1443001443001443,
      "grad_norm": 0.40136224031448364,
      "learning_rate": 0.00019886044784423197,
      "loss": 1.2214,
      "step": 300
    },
    {
      "epoch": 0.16835016835016836,
      "grad_norm": 0.574002206325531,
      "learning_rate": 0.00019845000881782432,
      "loss": 1.2184,
      "step": 350
    },
    {
      "epoch": 0.1924001924001924,
      "grad_norm": 0.4179827570915222,
      "learning_rate": 0.00019797712717826914,
      "loss": 1.2064,
      "step": 400
    },
    {
      "epoch": 0.21645021645021645,
      "grad_norm": 0.33033809065818787,
      "learning_rate": 0.00019744210285408488,
      "loss": 1.2055,
      "step": 450
    },
    {
      "epoch": 0.2405002405002405,
      "grad_norm": 0.2719138562679291,
      "learning_rate": 0.0001968452751882264,
      "loss": 1.2077,
      "step": 500
    },
    {
      "epoch": 0.26455026455026454,
      "grad_norm": 0.29797521233558655,
      "learning_rate": 0.00019618702272285434,
      "loss": 1.2096,
      "step": 550
    },
    {
      "epoch": 0.2886002886002886,
      "grad_norm": 0.3336372673511505,
      "learning_rate": 0.00019546776295924212,
      "loss": 1.2072,
      "step": 600
    },
    {
      "epoch": 0.3126503126503126,
      "grad_norm": 0.26755037903785706,
      "learning_rate": 0.0001946879520929728,
      "loss": 1.1974,
      "step": 650
    },
    {
      "epoch": 0.3367003367003367,
      "grad_norm": 0.36268576979637146,
      "learning_rate": 0.00019384808472459368,
      "loss": 1.2045,
      "step": 700
    },
    {
      "epoch": 0.36075036075036077,
      "grad_norm": 0.3121575713157654,
      "learning_rate": 0.0001929486935459127,
      "loss": 1.1889,
      "step": 750
    },
    {
      "epoch": 0.3848003848003848,
      "grad_norm": 0.3159404993057251,
      "learning_rate": 0.00019199034900213452,
      "loss": 1.1921,
      "step": 800
    },
    {
      "epoch": 0.40885040885040885,
      "grad_norm": 0.7236579060554504,
      "learning_rate": 0.000190973658930052,
      "loss": 1.194,
      "step": 850
    },
    {
      "epoch": 0.4329004329004329,
      "grad_norm": 0.24907168745994568,
      "learning_rate": 0.00018989926817252113,
      "loss": 1.191,
      "step": 900
    },
    {
      "epoch": 0.45695045695045694,
      "grad_norm": 0.24481187760829926,
      "learning_rate": 0.00018876785816946505,
      "loss": 1.1857,
      "step": 950
    },
    {
      "epoch": 0.481000481000481,
      "grad_norm": 0.2668200731277466,
      "learning_rate": 0.00018758014652566597,
      "loss": 1.1957,
      "step": 1000
    },
    {
      "epoch": 0.5050505050505051,
      "grad_norm": 0.2687171399593353,
      "learning_rate": 0.0001863368865556191,
      "loss": 1.1864,
      "step": 1050
    },
    {
      "epoch": 0.5291005291005291,
      "grad_norm": 0.23915782570838928,
      "learning_rate": 0.0001850388668057379,
      "loss": 1.184,
      "step": 1100
    },
    {
      "epoch": 0.5531505531505532,
      "grad_norm": 0.37159469723701477,
      "learning_rate": 0.0001836869105542127,
      "loss": 1.1849,
      "step": 1150
    },
    {
      "epoch": 0.5772005772005772,
      "grad_norm": 0.2752649784088135,
      "learning_rate": 0.0001822818752888408,
      "loss": 1.1843,
      "step": 1200
    },
    {
      "epoch": 0.6012506012506013,
      "grad_norm": 0.19733025133609772,
      "learning_rate": 0.00018082465216315882,
      "loss": 1.1766,
      "step": 1250
    },
    {
      "epoch": 0.6253006253006252,
      "grad_norm": 0.2180165797472,
      "learning_rate": 0.00017931616543122214,
      "loss": 1.1865,
      "step": 1300
    },
    {
      "epoch": 0.6493506493506493,
      "grad_norm": 0.25025510787963867,
      "learning_rate": 0.00017775737186139038,
      "loss": 1.1723,
      "step": 1350
    },
    {
      "epoch": 0.6734006734006734,
      "grad_norm": 0.2865007817745209,
      "learning_rate": 0.00017614926012949028,
      "loss": 1.172,
      "step": 1400
    },
    {
      "epoch": 0.6974506974506974,
      "grad_norm": 0.3406023681163788,
      "learning_rate": 0.00017449285019174098,
      "loss": 1.1795,
      "step": 1450
    },
    {
      "epoch": 0.7215007215007215,
      "grad_norm": 0.19766800105571747,
      "learning_rate": 0.00017278919263783978,
      "loss": 1.1784,
      "step": 1500
    },
    {
      "epoch": 0.7455507455507455,
      "grad_norm": 0.1965962052345276,
      "learning_rate": 0.00017103936802461797,
      "loss": 1.1754,
      "step": 1550
    },
    {
      "epoch": 0.7696007696007696,
      "grad_norm": 0.2381555736064911,
      "learning_rate": 0.00016924448619069023,
      "loss": 1.1671,
      "step": 1600
    },
    {
      "epoch": 0.7936507936507936,
      "grad_norm": 0.20156389474868774,
      "learning_rate": 0.00016740568555253155,
      "loss": 1.1738,
      "step": 1650
    },
    {
      "epoch": 0.8177008177008177,
      "grad_norm": 0.18294361233711243,
      "learning_rate": 0.00016552413238242857,
      "loss": 1.1727,
      "step": 1700
    },
    {
      "epoch": 0.8417508417508418,
      "grad_norm": 0.2975623309612274,
      "learning_rate": 0.00016360102006876317,
      "loss": 1.1677,
      "step": 1750
    },
    {
      "epoch": 0.8658008658008658,
      "grad_norm": 0.1871371865272522,
      "learning_rate": 0.0001616375683590974,
      "loss": 1.1689,
      "step": 1800
    },
    {
      "epoch": 0.8898508898508899,
      "grad_norm": 0.21457934379577637,
      "learning_rate": 0.00015963502258654005,
      "loss": 1.1605,
      "step": 1850
    },
    {
      "epoch": 0.9139009139009139,
      "grad_norm": 0.20261706411838531,
      "learning_rate": 0.0001575946528798853,
      "loss": 1.1627,
      "step": 1900
    },
    {
      "epoch": 0.937950937950938,
      "grad_norm": 0.17685186862945557,
      "learning_rate": 0.0001555177533580245,
      "loss": 1.1627,
      "step": 1950
    },
    {
      "epoch": 0.962000962000962,
      "grad_norm": 0.212468221783638,
      "learning_rate": 0.00015340564130914233,
      "loss": 1.161,
      "step": 2000
    },
    {
      "epoch": 0.9860509860509861,
      "grad_norm": 0.175174742937088,
      "learning_rate": 0.00015125965635521724,
      "loss": 1.1688,
      "step": 2050
    },
    {
      "epoch": 1.0101010101010102,
      "grad_norm": 0.19970253109931946,
      "learning_rate": 0.00014908115960235682,
      "loss": 1.142,
      "step": 2100
    },
    {
      "epoch": 1.034151034151034,
      "grad_norm": 0.21254608035087585,
      "learning_rate": 0.00014687153277750676,
      "loss": 1.1271,
      "step": 2150
    },
    {
      "epoch": 1.0582010582010581,
      "grad_norm": 0.1651500016450882,
      "learning_rate": 0.00014463217735208062,
      "loss": 1.121,
      "step": 2200
    },
    {
      "epoch": 1.0822510822510822,
      "grad_norm": 0.2405405044555664,
      "learning_rate": 0.00014236451365306674,
      "loss": 1.1313,
      "step": 2250
    },
    {
      "epoch": 1.1063011063011063,
      "grad_norm": 0.17223596572875977,
      "learning_rate": 0.00014006997996217593,
      "loss": 1.1344,
      "step": 2300
    },
    {
      "epoch": 1.1303511303511304,
      "grad_norm": 0.1969347894191742,
      "learning_rate": 0.00013775003160360096,
      "loss": 1.1176,
      "step": 2350
    },
    {
      "epoch": 1.1544011544011543,
      "grad_norm": 0.187143936753273,
      "learning_rate": 0.00013540614002096701,
      "loss": 1.1322,
      "step": 2400
    },
    {
      "epoch": 1.1784511784511784,
      "grad_norm": 0.1838238537311554,
      "learning_rate": 0.00013303979184405826,
      "loss": 1.1293,
      "step": 2450
    },
    {
      "epoch": 1.2025012025012025,
      "grad_norm": 0.17928341031074524,
      "learning_rate": 0.00013065248794591223,
      "loss": 1.1268,
      "step": 2500
    },
    {
      "epoch": 1.2265512265512266,
      "grad_norm": 0.2683047950267792,
      "learning_rate": 0.00012824574249088063,
      "loss": 1.1234,
      "step": 2550
    },
    {
      "epoch": 1.2506012506012505,
      "grad_norm": 0.18034860491752625,
      "learning_rate": 0.0001258210819742599,
      "loss": 1.125,
      "step": 2600
    },
    {
      "epoch": 1.2746512746512746,
      "grad_norm": 0.26357391476631165,
      "learning_rate": 0.00012338004425410074,
      "loss": 1.1217,
      "step": 2650
    },
    {
      "epoch": 1.2987012987012987,
      "grad_norm": 0.17828579246997833,
      "learning_rate": 0.00012092417757581085,
      "loss": 1.1262,
      "step": 2700
    },
    {
      "epoch": 1.3227513227513228,
      "grad_norm": 0.20247310400009155,
      "learning_rate": 0.00011845503959016928,
      "loss": 1.1246,
      "step": 2750
    },
    {
      "epoch": 1.3468013468013469,
      "grad_norm": 0.17381271719932556,
      "learning_rate": 0.0001159741963653755,
      "loss": 1.1181,
      "step": 2800
    },
    {
      "epoch": 1.370851370851371,
      "grad_norm": 0.19958114624023438,
      "learning_rate": 0.00011348322139375948,
      "loss": 1.1307,
      "step": 2850
    },
    {
      "epoch": 1.3949013949013949,
      "grad_norm": 0.21912401914596558,
      "learning_rate": 0.00011098369459378328,
      "loss": 1.1264,
      "step": 2900
    },
    {
      "epoch": 1.418951418951419,
      "grad_norm": 0.1694297194480896,
      "learning_rate": 0.00010847720130796631,
      "loss": 1.1256,
      "step": 2950
    },
    {
      "epoch": 1.443001443001443,
      "grad_norm": 0.13446395099163055,
      "learning_rate": 0.00010596533129737092,
      "loss": 1.1258,
      "step": 3000
    },
    {
      "epoch": 1.467051467051467,
      "grad_norm": 0.140371173620224,
      "learning_rate": 0.00010344967773328507,
      "loss": 1.1191,
      "step": 3050
    },
    {
      "epoch": 1.491101491101491,
      "grad_norm": 0.18016813695430756,
      "learning_rate": 0.00010093183618674224,
      "loss": 1.114,
      "step": 3100
    },
    {
      "epoch": 1.5151515151515151,
      "grad_norm": 0.17306862771511078,
      "learning_rate": 9.84134036165192e-05,
      "loss": 1.1149,
      "step": 3150
    },
    {
      "epoch": 1.5392015392015392,
      "grad_norm": 0.14116255939006805,
      "learning_rate": 9.589597735625377e-05,
      "loss": 1.123,
      "step": 3200
    },
    {
      "epoch": 1.5632515632515633,
      "grad_norm": 0.16819800436496735,
      "learning_rate": 9.338115410132441e-05,
      "loss": 1.1203,
      "step": 3250
    },
    {
      "epoch": 1.5873015873015874,
      "grad_norm": 0.21958529949188232,
      "learning_rate": 9.087052889613518e-05,
      "loss": 1.1226,
      "step": 3300
    },
    {
      "epoch": 1.6113516113516113,
      "grad_norm": 0.15786272287368774,
      "learning_rate": 8.836569412244745e-05,
      "loss": 1.1212,
      "step": 3350
    },
    {
      "epoch": 1.6354016354016354,
      "grad_norm": 0.17366796731948853,
      "learning_rate": 8.586823848940047e-05,
      "loss": 1.1129,
      "step": 3400
    },
    {
      "epoch": 1.6594516594516593,
      "grad_norm": 0.21448016166687012,
      "learning_rate": 8.337974602586152e-05,
      "loss": 1.1216,
      "step": 3450
    },
    {
      "epoch": 1.6835016835016834,
      "grad_norm": 0.17243099212646484,
      "learning_rate": 8.090179507574427e-05,
      "loss": 1.1096,
      "step": 3500
    },
    {
      "epoch": 1.7075517075517075,
      "grad_norm": 0.1429734081029892,
      "learning_rate": 7.843595729693316e-05,
      "loss": 1.1071,
      "step": 3550
    },
    {
      "epoch": 1.7316017316017316,
      "grad_norm": 0.15200386941432953,
      "learning_rate": 7.598379666444808e-05,
      "loss": 1.1158,
      "step": 3600
    },
    {
      "epoch": 1.7556517556517557,
      "grad_norm": 0.1442406326532364,
      "learning_rate": 7.354686847848242e-05,
      "loss": 1.112,
      "step": 3650
    },
    {
      "epoch": 1.7797017797017798,
      "grad_norm": 0.17678239941596985,
      "learning_rate": 7.11267183779428e-05,
      "loss": 1.1118,
      "step": 3700
    },
    {
      "epoch": 1.8037518037518039,
      "grad_norm": 0.147593155503273,
      "learning_rate": 6.872488136011667e-05,
      "loss": 1.1165,
      "step": 3750
    },
    {
      "epoch": 1.8278018278018278,
      "grad_norm": 0.1334652155637741,
      "learning_rate": 6.634288080708952e-05,
      "loss": 1.1135,
      "step": 3800
    },
    {
      "epoch": 1.8518518518518519,
      "grad_norm": 0.14890378713607788,
      "learning_rate": 6.398222751952899e-05,
      "loss": 1.1086,
      "step": 3850
    },
    {
      "epoch": 1.8759018759018757,
      "grad_norm": 0.1334807574748993,
      "learning_rate": 6.164441875844882e-05,
      "loss": 1.1144,
      "step": 3900
    },
    {
      "epoch": 1.8999518999518998,
      "grad_norm": 0.12897680699825287,
      "learning_rate": 5.933093729556062e-05,
      "loss": 1.1116,
      "step": 3950
    },
    {
      "epoch": 1.924001924001924,
      "grad_norm": 0.17530564963817596,
      "learning_rate": 5.7043250472815356e-05,
      "loss": 1.1039,
      "step": 4000
    },
    {
      "epoch": 1.948051948051948,
      "grad_norm": 0.15966495871543884,
      "learning_rate": 5.478280927173145e-05,
      "loss": 1.101,
      "step": 4050
    },
    {
      "epoch": 1.9721019721019721,
      "grad_norm": 0.18890446424484253,
      "learning_rate": 5.255104739309924e-05,
      "loss": 1.1077,
      "step": 4100
    },
    {
      "epoch": 1.9961519961519962,
      "grad_norm": 0.1547369807958603,
      "learning_rate": 5.0349380347646494e-05,
      "loss": 1.103,
      "step": 4150
    },
    {
      "epoch": 2.0202020202020203,
      "grad_norm": 0.13888758420944214,
      "learning_rate": 4.8179204558240444e-05,
      "loss": 1.0826,
      "step": 4200
    },
    {
      "epoch": 2.0442520442520444,
      "grad_norm": 0.11266086250543594,
      "learning_rate": 4.6041896474197e-05,
      "loss": 1.071,
      "step": 4250
    },
    {
      "epoch": 2.068302068302068,
      "grad_norm": 0.14245671033859253,
      "learning_rate": 4.393881169825779e-05,
      "loss": 1.0759,
      "step": 4300
    },
    {
      "epoch": 2.092352092352092,
      "grad_norm": 0.1226249411702156,
      "learning_rate": 4.187128412678969e-05,
      "loss": 1.0742,
      "step": 4350
    },
    {
      "epoch": 2.1164021164021163,
      "grad_norm": 0.12307476997375488,
      "learning_rate": 3.984062510375155e-05,
      "loss": 1.0721,
      "step": 4400
    },
    {
      "epoch": 2.1404521404521404,
      "grad_norm": 0.12813834846019745,
      "learning_rate": 3.7848122588965144e-05,
      "loss": 1.0726,
      "step": 4450
    },
    {
      "epoch": 2.1645021645021645,
      "grad_norm": 0.13432885706424713,
      "learning_rate": 3.5895040341217543e-05,
      "loss": 1.0745,
      "step": 4500
    },
    {
      "epoch": 2.1885521885521886,
      "grad_norm": 0.11649097502231598,
      "learning_rate": 3.398261711671309e-05,
      "loss": 1.079,
      "step": 4550
    },
    {
      "epoch": 2.2126022126022127,
      "grad_norm": 0.11140163242816925,
      "learning_rate": 3.211206588338358e-05,
      "loss": 1.0748,
      "step": 4600
    },
    {
      "epoch": 2.236652236652237,
      "grad_norm": 0.10978424549102783,
      "learning_rate": 3.028457305155483e-05,
      "loss": 1.0726,
      "step": 4650
    },
    {
      "epoch": 2.260702260702261,
      "grad_norm": 0.11395589262247086,
      "learning_rate": 2.8501297721457422e-05,
      "loss": 1.0656,
      "step": 4700
    },
    {
      "epoch": 2.284752284752285,
      "grad_norm": 0.10599405318498611,
      "learning_rate": 2.6763370948059353e-05,
      "loss": 1.0765,
      "step": 4750
    },
    {
      "epoch": 2.3088023088023086,
      "grad_norm": 0.11157254874706268,
      "learning_rate": 2.5071895023686442e-05,
      "loss": 1.0726,
      "step": 4800
    },
    {
      "epoch": 2.3328523328523327,
      "grad_norm": 0.1390163153409958,
      "learning_rate": 2.342794277888547e-05,
      "loss": 1.0731,
      "step": 4850
    },
    {
      "epoch": 2.356902356902357,
      "grad_norm": 0.1519329994916916,
      "learning_rate": 2.1832556901973965e-05,
      "loss": 1.0704,
      "step": 4900
    },
    {
      "epoch": 2.380952380952381,
      "grad_norm": 0.1278182566165924,
      "learning_rate": 2.0286749277707782e-05,
      "loss": 1.0661,
      "step": 4950
    },
    {
      "epoch": 2.405002405002405,
      "grad_norm": 0.10508263111114502,
      "learning_rate": 1.879150034548588e-05,
      "loss": 1.0758,
      "step": 5000
    },
    {
      "epoch": 2.429052429052429,
      "grad_norm": 0.09690719097852707,
      "learning_rate": 1.7347758477500044e-05,
      "loss": 1.0644,
      "step": 5050
    },
    {
      "epoch": 2.4531024531024532,
      "grad_norm": 0.10174595564603806,
      "learning_rate": 1.5956439377222798e-05,
      "loss": 1.0726,
      "step": 5100
    },
    {
      "epoch": 2.4771524771524773,
      "grad_norm": 0.10294167697429657,
      "learning_rate": 1.4618425498616162e-05,
      "loss": 1.0655,
      "step": 5150
    },
    {
      "epoch": 2.501202501202501,
      "grad_norm": 0.11103129386901855,
      "learning_rate": 1.3334565486428996e-05,
      "loss": 1.0651,
      "step": 5200
    },
    {
      "epoch": 2.525252525252525,
      "grad_norm": 0.10614852607250214,
      "learning_rate": 1.2105673637938053e-05,
      "loss": 1.0701,
      "step": 5250
    },
    {
      "epoch": 2.549302549302549,
      "grad_norm": 0.09437720477581024,
      "learning_rate": 1.0932529386474188e-05,
      "loss": 1.0673,
      "step": 5300
    },
    {
      "epoch": 2.5733525733525733,
      "grad_norm": 0.0965106412768364,
      "learning_rate": 9.815876807061264e-06,
      "loss": 1.0769,
      "step": 5350
    },
    {
      "epoch": 2.5974025974025974,
      "grad_norm": 0.09335634112358093,
      "learning_rate": 8.756424144481312e-06,
      "loss": 1.0646,
      "step": 5400
    },
    {
      "epoch": 2.6214526214526215,
      "grad_norm": 0.09890544414520264,
      "learning_rate": 7.75484336406529e-06,
      "loss": 1.0757,
      "step": 5450
    },
    {
      "epoch": 2.6455026455026456,
      "grad_norm": 0.09670912474393845,
      "learning_rate": 6.8117697254943106e-06,
      "loss": 1.0668,
      "step": 5500
    },
    {
      "epoch": 2.6695526695526697,
      "grad_norm": 0.09898468106985092,
      "learning_rate": 5.927801379881714e-06,
      "loss": 1.0745,
      "step": 5550
    },
    {
      "epoch": 2.6936026936026938,
      "grad_norm": 0.08697386831045151,
      "learning_rate": 5.103498990391509e-06,
      "loss": 1.0653,
      "step": 5600
    },
    {
      "epoch": 2.717652717652718,
      "grad_norm": 0.09457134455442429,
      "learning_rate": 4.339385376633775e-06,
      "loss": 1.0678,
      "step": 5650
    },
    {
      "epoch": 2.741702741702742,
      "grad_norm": 0.09092475473880768,
      "learning_rate": 3.6359451830626723e-06,
      "loss": 1.0635,
      "step": 5700
    },
    {
      "epoch": 2.7657527657527656,
      "grad_norm": 0.08736653625965118,
      "learning_rate": 2.993624571587239e-06,
      "loss": 1.0639,
      "step": 5750
    },
    {
      "epoch": 2.7898027898027897,
      "grad_norm": 0.09138292819261551,
      "learning_rate": 2.4128309385900717e-06,
      "loss": 1.065,
      "step": 5800
    },
    {
      "epoch": 2.813852813852814,
      "grad_norm": 0.08842656016349792,
      "learning_rate": 1.8939326565333037e-06,
      "loss": 1.0636,
      "step": 5850
    },
    {
      "epoch": 2.837902837902838,
      "grad_norm": 0.08870802819728851,
      "learning_rate": 1.437258840315714e-06,
      "loss": 1.0706,
      "step": 5900
    },
    {
      "epoch": 2.861952861952862,
      "grad_norm": 0.08659425377845764,
      "learning_rate": 1.0430991385293575e-06,
      "loss": 1.0673,
      "step": 5950
    },
    {
      "epoch": 2.886002886002886,
      "grad_norm": 0.08142086863517761,
      "learning_rate": 7.117035497478553e-07,
      "loss": 1.0697,
      "step": 6000
    },
    {
      "epoch": 2.91005291005291,
      "grad_norm": 0.080448217689991,
      "learning_rate": 4.432822639630407e-07,
      "loss": 1.0655,
      "step": 6050
    },
    {
      "epoch": 2.934102934102934,
      "grad_norm": 0.08980288356542587,
      "learning_rate": 2.380055292704575e-07,
      "loss": 1.0701,
      "step": 6100
    },
    {
      "epoch": 2.958152958152958,
      "grad_norm": 0.08309097588062286,
      "learning_rate": 9.600354388833443e-08,
      "loss": 1.0684,
      "step": 6150
    },
    {
      "epoch": 2.982202982202982,
      "grad_norm": 0.08456841111183167,
      "learning_rate": 1.7366373578442397e-08,
      "loss": 1.0684,
      "step": 6200
    }
  ],
  "logging_steps": 50,
  "max_steps": 6237,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.056700790948663e+20,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}