phospho-app
/

lego-pickup-mono-setup-8d8zmkubs9

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.5,
+  "eval_steps": 500,
+  "global_step": 1455,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.01718213058419244,
+      "grad_norm": 4.919781684875488,
+      "learning_rate": 2.7397260273972603e-05,
+      "loss": 0.9081,
+      "step": 10
+    },
+    {
+      "epoch": 0.03436426116838488,
+      "grad_norm": 5.393226623535156,
+      "learning_rate": 5.479452054794521e-05,
+      "loss": 0.4511,
+      "step": 20
+    },
+    {
+      "epoch": 0.05154639175257732,
+      "grad_norm": 2.135725259780884,
+      "learning_rate": 8.219178082191781e-05,
+      "loss": 0.289,
+      "step": 30
+    },
+    {
+      "epoch": 0.06872852233676977,
+      "grad_norm": 2.005138397216797,
+      "learning_rate": 0.00010958904109589041,
+      "loss": 0.2236,
+      "step": 40
+    },
+    {
+      "epoch": 0.0859106529209622,
+      "grad_norm": 2.274747371673584,
+      "learning_rate": 0.000136986301369863,
+      "loss": 0.2057,
+      "step": 50
+    },
+    {
+      "epoch": 0.10309278350515463,
+      "grad_norm": 2.1100614070892334,
+      "learning_rate": 0.00016438356164383562,
+      "loss": 0.1831,
+      "step": 60
+    },
+    {
+      "epoch": 0.12027491408934708,
+      "grad_norm": 0.9432219862937927,
+      "learning_rate": 0.0001917808219178082,
+      "loss": 0.1452,
+      "step": 70
+    },
+    {
+      "epoch": 0.13745704467353953,
+      "grad_norm": 1.4754092693328857,
+      "learning_rate": 0.00019998733979961563,
+      "loss": 0.1496,
+      "step": 80
+    },
+    {
+      "epoch": 0.15463917525773196,
+      "grad_norm": 1.504097580909729,
+      "learning_rate": 0.0001999253383717226,
+      "loss": 0.1542,
+      "step": 90
+    },
+    {
+      "epoch": 0.1718213058419244,
+      "grad_norm": 1.2584880590438843,
+      "learning_rate": 0.00019981170237143067,
+      "loss": 0.1341,
+      "step": 100
+    },
+    {
+      "epoch": 0.18900343642611683,
+      "grad_norm": 1.030260443687439,
+      "learning_rate": 0.00019964649051804355,
+      "loss": 0.1153,
+      "step": 110
+    },
+    {
+      "epoch": 0.20618556701030927,
+      "grad_norm": 0.6816794276237488,
+      "learning_rate": 0.000199429788181734,
+      "loss": 0.1458,
+      "step": 120
+    },
+    {
+      "epoch": 0.22336769759450173,
+      "grad_norm": 1.3301407098770142,
+      "learning_rate": 0.0001991617073394306,
+      "loss": 0.0968,
+      "step": 130
+    },
+    {
+      "epoch": 0.24054982817869416,
+      "grad_norm": 1.0439300537109375,
+      "learning_rate": 0.00019884238651695556,
+      "loss": 0.1089,
+      "step": 140
+    },
+    {
+      "epoch": 0.25773195876288657,
+      "grad_norm": 1.2270495891571045,
+      "learning_rate": 0.00019847199071744415,
+      "loss": 0.1115,
+      "step": 150
+    },
+    {
+      "epoch": 0.27491408934707906,
+      "grad_norm": 0.5990088582038879,
+      "learning_rate": 0.00019805071133608242,
+      "loss": 0.101,
+      "step": 160
+    },
+    {
+      "epoch": 0.2920962199312715,
+      "grad_norm": 0.9263975024223328,
+      "learning_rate": 0.0001975787660612072,
+      "loss": 0.0987,
+      "step": 170
+    },
+    {
+      "epoch": 0.30927835051546393,
+      "grad_norm": 0.6488857865333557,
+      "learning_rate": 0.00019705639876181969,
+      "loss": 0.0761,
+      "step": 180
+    },
+    {
+      "epoch": 0.32646048109965636,
+      "grad_norm": 1.0950772762298584,
+      "learning_rate": 0.00019648387936157068,
+      "loss": 0.1128,
+      "step": 190
+    },
+    {
+      "epoch": 0.3436426116838488,
+      "grad_norm": 1.5179388523101807,
+      "learning_rate": 0.00019586150369928245,
+      "loss": 0.1069,
+      "step": 200
+    },
+    {
+      "epoch": 0.36082474226804123,
+      "grad_norm": 0.7192593216896057,
+      "learning_rate": 0.00019518959337607957,
+      "loss": 0.0902,
+      "step": 210
+    },
+    {
+      "epoch": 0.37800687285223367,
+      "grad_norm": 0.8463940620422363,
+      "learning_rate": 0.0001944684955892075,
+      "loss": 0.0883,
+      "step": 220
+    },
+    {
+      "epoch": 0.3951890034364261,
+      "grad_norm": 0.8777188658714294,
+      "learning_rate": 0.0001936985829526247,
+      "loss": 0.0783,
+      "step": 230
+    },
+    {
+      "epoch": 0.41237113402061853,
+      "grad_norm": 0.8422673940658569,
+      "learning_rate": 0.00019288025330446126,
+      "loss": 0.0842,
+      "step": 240
+    },
+    {
+      "epoch": 0.42955326460481097,
+      "grad_norm": 0.7569606900215149,
+      "learning_rate": 0.00019201392950144363,
+      "loss": 0.0765,
+      "step": 250
+    },
+    {
+      "epoch": 0.44673539518900346,
+      "grad_norm": 0.7297337055206299,
+      "learning_rate": 0.0001911000592003909,
+      "loss": 0.0763,
+      "step": 260
+    },
+    {
+      "epoch": 0.4639175257731959,
+      "grad_norm": 0.570925772190094,
+      "learning_rate": 0.00019013911462689668,
+      "loss": 0.0687,
+      "step": 270
+    },
+    {
+      "epoch": 0.48109965635738833,
+      "grad_norm": 0.7235879302024841,
+      "learning_rate": 0.000189131592331315,
+      "loss": 0.0884,
+      "step": 280
+    },
+    {
+      "epoch": 0.49828178694158076,
+      "grad_norm": 0.3659440875053406,
+      "learning_rate": 0.00018807801293217735,
+      "loss": 0.0613,
+      "step": 290
+    },
+    {
+      "epoch": 0.5154639175257731,
+      "grad_norm": 0.5860876441001892,
+      "learning_rate": 0.00018697892084717238,
+      "loss": 0.0739,
+      "step": 300
+    },
+    {
+      "epoch": 0.5326460481099656,
+      "grad_norm": 0.7031033039093018,
+      "learning_rate": 0.00018583488401182843,
+      "loss": 0.0637,
+      "step": 310
+    },
+    {
+      "epoch": 0.5498281786941581,
+      "grad_norm": 0.6644700765609741,
+      "learning_rate": 0.0001846464935860431,
+      "loss": 0.0682,
+      "step": 320
+    },
+    {
+      "epoch": 0.5670103092783505,
+      "grad_norm": 0.7531244158744812,
+      "learning_rate": 0.0001834143636486124,
+      "loss": 0.0723,
+      "step": 330
+    },
+    {
+      "epoch": 0.584192439862543,
+      "grad_norm": 0.7708925008773804,
+      "learning_rate": 0.00018213913087991685,
+      "loss": 0.0619,
+      "step": 340
+    },
+    {
+      "epoch": 0.6013745704467354,
+      "grad_norm": 0.716632604598999,
+      "learning_rate": 0.00018082145423292868,
+      "loss": 0.0613,
+      "step": 350
+    },
+    {
+      "epoch": 0.6185567010309279,
+      "grad_norm": 0.4382629096508026,
+      "learning_rate": 0.0001794620145927101,
+      "loss": 0.0614,
+      "step": 360
+    },
+    {
+      "epoch": 0.6357388316151202,
+      "grad_norm": 0.6322966814041138,
+      "learning_rate": 0.00017806151442457827,
+      "loss": 0.0581,
+      "step": 370
+    },
+    {
+      "epoch": 0.6529209621993127,
+      "grad_norm": 0.5152290463447571,
+      "learning_rate": 0.00017662067741111974,
+      "loss": 0.07,
+      "step": 380
+    },
+    {
+      "epoch": 0.6701030927835051,
+      "grad_norm": 0.6691680550575256,
+      "learning_rate": 0.00017514024807824055,
+      "loss": 0.0839,
+      "step": 390
+    },
+    {
+      "epoch": 0.6872852233676976,
+      "grad_norm": 0.5262071490287781,
+      "learning_rate": 0.00017362099141044626,
+      "loss": 0.052,
+      "step": 400
+    },
+    {
+      "epoch": 0.7044673539518901,
+      "grad_norm": 0.6715118288993835,
+      "learning_rate": 0.00017206369245555036,
+      "loss": 0.0682,
+      "step": 410
+    },
+    {
+      "epoch": 0.7216494845360825,
+      "grad_norm": 0.6439153552055359,
+      "learning_rate": 0.0001704691559190155,
+      "loss": 0.0733,
+      "step": 420
+    },
+    {
+      "epoch": 0.738831615120275,
+      "grad_norm": 0.37341853976249695,
+      "learning_rate": 0.0001688382057481364,
+      "loss": 0.0653,
+      "step": 430
+    },
+    {
+      "epoch": 0.7560137457044673,
+      "grad_norm": 1.1194217205047607,
+      "learning_rate": 0.00016717168470628077,
+      "loss": 0.0735,
+      "step": 440
+    },
+    {
+      "epoch": 0.7731958762886598,
+      "grad_norm": 0.6904529333114624,
+      "learning_rate": 0.0001654704539374066,
+      "loss": 0.0652,
+      "step": 450
+    },
+    {
+      "epoch": 0.7903780068728522,
+      "grad_norm": 0.8207924962043762,
+      "learning_rate": 0.00016373539252108202,
+      "loss": 0.0479,
+      "step": 460
+    },
+    {
+      "epoch": 0.8075601374570447,
+      "grad_norm": 0.5141025185585022,
+      "learning_rate": 0.00016196739701823716,
+      "loss": 0.0626,
+      "step": 470
+    },
+    {
+      "epoch": 0.8247422680412371,
+      "grad_norm": 0.46161457896232605,
+      "learning_rate": 0.00016016738100788297,
+      "loss": 0.0469,
+      "step": 480
+    },
+    {
+      "epoch": 0.8419243986254296,
+      "grad_norm": 0.3261314034461975,
+      "learning_rate": 0.00015833627461503595,
+      "loss": 0.0489,
+      "step": 490
+    },
+    {
+      "epoch": 0.8591065292096219,
+      "grad_norm": 0.564329981803894,
+      "learning_rate": 0.0001564750240300934,
+      "loss": 0.0445,
+      "step": 500
+    },
+    {
+      "epoch": 0.8762886597938144,
+      "grad_norm": 0.5079665184020996,
+      "learning_rate": 0.00015458459101990693,
+      "loss": 0.0557,
+      "step": 510
+    },
+    {
+      "epoch": 0.8934707903780069,
+      "grad_norm": 0.41103053092956543,
+      "learning_rate": 0.00015266595243080714,
+      "loss": 0.042,
+      "step": 520
+    },
+    {
+      "epoch": 0.9106529209621993,
+      "grad_norm": 0.5678098201751709,
+      "learning_rate": 0.00015072009968383656,
+      "loss": 0.0526,
+      "step": 530
+    },
+    {
+      "epoch": 0.9278350515463918,
+      "grad_norm": 0.6878070831298828,
+      "learning_rate": 0.00014874803826245089,
+      "loss": 0.0605,
+      "step": 540
+    },
+    {
+      "epoch": 0.9450171821305842,
+      "grad_norm": 0.586892306804657,
+      "learning_rate": 0.00014675078719295415,
+      "loss": 0.0494,
+      "step": 550
+    },
+    {
+      "epoch": 0.9621993127147767,
+      "grad_norm": 0.4587668776512146,
+      "learning_rate": 0.00014472937851793557,
+      "loss": 0.0515,
+      "step": 560
+    },
+    {
+      "epoch": 0.979381443298969,
+      "grad_norm": 0.5171328186988831,
+      "learning_rate": 0.00014268485676298078,
+      "loss": 0.0576,
+      "step": 570
+    },
+    {
+      "epoch": 0.9965635738831615,
+      "grad_norm": 0.6215698719024658,
+      "learning_rate": 0.0001406182783969324,
+      "loss": 0.0537,
+      "step": 580
+    },
+    {
+      "epoch": 1.013745704467354,
+      "grad_norm": 0.40490490198135376,
+      "learning_rate": 0.00013853071128597924,
+      "loss": 0.0467,
+      "step": 590
+    },
+    {
+      "epoch": 1.0309278350515463,
+      "grad_norm": 0.7031866312026978,
+      "learning_rate": 0.0001364232341418564,
+      "loss": 0.049,
+      "step": 600
+    },
+    {
+      "epoch": 1.0481099656357389,
+      "grad_norm": 0.3372012972831726,
+      "learning_rate": 0.00013429693596444067,
+      "loss": 0.0598,
+      "step": 610
+    },
+    {
+      "epoch": 1.0652920962199313,
+      "grad_norm": 0.4208463728427887,
+      "learning_rate": 0.00013215291547903006,
+      "loss": 0.0549,
+      "step": 620
+    },
+    {
+      "epoch": 1.0824742268041236,
+      "grad_norm": 0.5574418902397156,
+      "learning_rate": 0.00012999228056859784,
+      "loss": 0.0469,
+      "step": 630
+    },
+    {
+      "epoch": 1.0996563573883162,
+      "grad_norm": 0.5417460203170776,
+      "learning_rate": 0.00012781614770131442,
+      "loss": 0.0397,
+      "step": 640
+    },
+    {
+      "epoch": 1.1168384879725086,
+      "grad_norm": 0.38392218947410583,
+      "learning_rate": 0.00012562564135363313,
+      "loss": 0.0409,
+      "step": 650
+    },
+    {
+      "epoch": 1.134020618556701,
+      "grad_norm": 0.3199153244495392,
+      "learning_rate": 0.0001234218934292376,
+      "loss": 0.0402,
+      "step": 660
+    },
+    {
+      "epoch": 1.1512027491408934,
+      "grad_norm": 0.33709654211997986,
+      "learning_rate": 0.00012120604267415172,
+      "loss": 0.0338,
+      "step": 670
+    },
+    {
+      "epoch": 1.168384879725086,
+      "grad_norm": 0.685562789440155,
+      "learning_rate": 0.00011897923408831346,
+      "loss": 0.0514,
+      "step": 680
+    },
+    {
+      "epoch": 1.1855670103092784,
+      "grad_norm": 0.6378128528594971,
+      "learning_rate": 0.0001167426183339174,
+      "loss": 0.0385,
+      "step": 690
+    },
+    {
+      "epoch": 1.2027491408934707,
+      "grad_norm": 0.39844685792922974,
+      "learning_rate": 0.00011449735114083127,
+      "loss": 0.0355,
+      "step": 700
+    },
+    {
+      "epoch": 1.2199312714776633,
+      "grad_norm": 0.5079712271690369,
+      "learning_rate": 0.00011224459270939384,
+      "loss": 0.0433,
+      "step": 710
+    },
+    {
+      "epoch": 1.2371134020618557,
+      "grad_norm": 0.3737178444862366,
+      "learning_rate": 0.000109985507110903,
+      "loss": 0.0418,
+      "step": 720
+    },
+    {
+      "epoch": 1.254295532646048,
+      "grad_norm": 0.5172721147537231,
+      "learning_rate": 0.00010772126168610325,
+      "loss": 0.0484,
+      "step": 730
+    },
+    {
+      "epoch": 1.2714776632302405,
+      "grad_norm": 0.3860677480697632,
+      "learning_rate": 0.00010545302644198405,
+      "loss": 0.0421,
+      "step": 740
+    },
+    {
+      "epoch": 1.2886597938144329,
+      "grad_norm": 0.37599268555641174,
+      "learning_rate": 0.00010318197344720018,
+      "loss": 0.0428,
+      "step": 750
+    },
+    {
+      "epoch": 1.3058419243986255,
+      "grad_norm": 0.26397764682769775,
+      "learning_rate": 0.0001009092762264271,
+      "loss": 0.0402,
+      "step": 760
+    },
+    {
+      "epoch": 1.3230240549828178,
+      "grad_norm": 0.35941213369369507,
+      "learning_rate": 9.863610915396365e-05,
+      "loss": 0.0404,
+      "step": 770
+    },
+    {
+      "epoch": 1.3402061855670104,
+      "grad_norm": 0.47325387597084045,
+      "learning_rate": 9.63636468468959e-05,
+      "loss": 0.0446,
+      "step": 780
+    },
+    {
+      "epoch": 1.3573883161512028,
+      "grad_norm": 0.4716935455799103,
+      "learning_rate": 9.409306355813529e-05,
+      "loss": 0.0342,
+      "step": 790
+    },
+    {
+      "epoch": 1.3745704467353952,
+      "grad_norm": 0.2812814712524414,
+      "learning_rate": 9.18255325696454e-05,
+      "loss": 0.0343,
+      "step": 800
+    },
+    {
+      "epoch": 1.3917525773195876,
+      "grad_norm": 0.29519158601760864,
+      "learning_rate": 8.956222558616998e-05,
+      "loss": 0.0372,
+      "step": 810
+    },
+    {
+      "epoch": 1.40893470790378,
+      "grad_norm": 0.41123560070991516,
+      "learning_rate": 8.730431212977625e-05,
+      "loss": 0.0567,
+      "step": 820
+    },
+    {
+      "epoch": 1.4261168384879725,
+      "grad_norm": 0.3533785343170166,
+      "learning_rate": 8.505295893552594e-05,
+      "loss": 0.0385,
+      "step": 830
+    },
+    {
+      "epoch": 1.443298969072165,
+      "grad_norm": 0.4819997251033783,
+      "learning_rate": 8.280932934858652e-05,
+      "loss": 0.0392,
+      "step": 840
+    },
+    {
+      "epoch": 1.4604810996563573,
+      "grad_norm": 0.285748690366745,
+      "learning_rate": 8.05745827230941e-05,
+      "loss": 0.0362,
+      "step": 850
+    },
+    {
+      "epoch": 1.47766323024055,
+      "grad_norm": 0.39609745144844055,
+      "learning_rate": 7.834987382307861e-05,
+      "loss": 0.0432,
+      "step": 860
+    },
+    {
+      "epoch": 1.4948453608247423,
+      "grad_norm": 0.33522850275039673,
+      "learning_rate": 7.613635222576072e-05,
+      "loss": 0.04,
+      "step": 870
+    },
+    {
+      "epoch": 1.5120274914089347,
+      "grad_norm": 0.47676974534988403,
+      "learning_rate": 7.393516172752919e-05,
+      "loss": 0.0347,
+      "step": 880
+    },
+    {
+      "epoch": 1.529209621993127,
+      "grad_norm": 0.5136341452598572,
+      "learning_rate": 7.174743975290513e-05,
+      "loss": 0.0374,
+      "step": 890
+    },
+    {
+      "epoch": 1.5463917525773194,
+      "grad_norm": 0.361741840839386,
+      "learning_rate": 6.957431676679896e-05,
+      "loss": 0.0358,
+      "step": 900
+    },
+    {
+      "epoch": 1.563573883161512,
+      "grad_norm": 0.49855756759643555,
+      "learning_rate": 6.741691569036338e-05,
+      "loss": 0.0352,
+      "step": 910
+    },
+    {
+      "epoch": 1.5807560137457046,
+      "grad_norm": 0.26860520243644714,
+      "learning_rate": 6.527635132074493e-05,
+      "loss": 0.0397,
+      "step": 920
+    },
+    {
+      "epoch": 1.597938144329897,
+      "grad_norm": 0.4026907682418823,
+      "learning_rate": 6.315372975503285e-05,
+      "loss": 0.0495,
+      "step": 930
+    },
+    {
+      "epoch": 1.6151202749140894,
+      "grad_norm": 0.377718448638916,
+      "learning_rate": 6.1050147818704e-05,
+      "loss": 0.0328,
+      "step": 940
+    },
+    {
+      "epoch": 1.6323024054982818,
+      "grad_norm": 0.41204559803009033,
+      "learning_rate": 5.896669249885851e-05,
+      "loss": 0.0336,
+      "step": 950
+    },
+    {
+      "epoch": 1.6494845360824741,
+      "grad_norm": 0.41056016087532043,
+      "learning_rate": 5.690444038253935e-05,
+      "loss": 0.0406,
+      "step": 960
+    },
+    {
+      "epoch": 1.6666666666666665,
+      "grad_norm": 0.4092923402786255,
+      "learning_rate": 5.4864457100425783e-05,
+      "loss": 0.033,
+      "step": 970
+    },
+    {
+      "epoch": 1.6838487972508591,
+      "grad_norm": 0.35451173782348633,
+      "learning_rate": 5.284779677618841e-05,
+      "loss": 0.0351,
+      "step": 980
+    },
+    {
+      "epoch": 1.7010309278350515,
+      "grad_norm": 0.5240267515182495,
+      "learning_rate": 5.0855501481790305e-05,
+      "loss": 0.0326,
+      "step": 990
+    },
+    {
+      "epoch": 1.718213058419244,
+      "grad_norm": 0.6117306351661682,
+      "learning_rate": 4.8888600699015496e-05,
+      "loss": 0.0327,
+      "step": 1000
+    },
+    {
+      "epoch": 1.7353951890034365,
+      "grad_norm": 0.27679693698883057,
+      "learning_rate": 4.694811078750338e-05,
+      "loss": 0.0421,
+      "step": 1010
+    },
+    {
+      "epoch": 1.7525773195876289,
+      "grad_norm": 0.44769006967544556,
+      "learning_rate": 4.50350344595635e-05,
+      "loss": 0.0313,
+      "step": 1020
+    },
+    {
+      "epoch": 1.7697594501718212,
+      "grad_norm": 0.35016706585884094,
+      "learning_rate": 4.315036026204262e-05,
+      "loss": 0.0388,
+      "step": 1030
+    },
+    {
+      "epoch": 1.7869415807560136,
+      "grad_norm": 0.2692789137363434,
+      "learning_rate": 4.129506206551138e-05,
+      "loss": 0.0295,
+      "step": 1040
+    },
+    {
+      "epoch": 1.8041237113402062,
+      "grad_norm": 0.45466527342796326,
+      "learning_rate": 3.947009856103465e-05,
+      "loss": 0.0367,
+      "step": 1050
+    },
+    {
+      "epoch": 1.8213058419243986,
+      "grad_norm": 0.2106892317533493,
+      "learning_rate": 3.767641276478563e-05,
+      "loss": 0.0369,
+      "step": 1060
+    },
+    {
+      "epoch": 1.8384879725085912,
+      "grad_norm": 0.2619366943836212,
+      "learning_rate": 3.591493153075966e-05,
+      "loss": 0.0278,
+      "step": 1070
+    },
+    {
+      "epoch": 1.8556701030927836,
+      "grad_norm": 0.35229456424713135,
+      "learning_rate": 3.41865650718396e-05,
+      "loss": 0.034,
+      "step": 1080
+    },
+    {
+      "epoch": 1.872852233676976,
+      "grad_norm": 0.325093537569046,
+      "learning_rate": 3.24922064894601e-05,
+      "loss": 0.034,
+      "step": 1090
+    },
+    {
+      "epoch": 1.8900343642611683,
+      "grad_norm": 0.13483747839927673,
+      "learning_rate": 3.083273131211382e-05,
+      "loss": 0.0254,
+      "step": 1100
+    },
+    {
+      "epoch": 1.9072164948453607,
+      "grad_norm": 0.37040096521377563,
+      "learning_rate": 2.920899704293849e-05,
+      "loss": 0.027,
+      "step": 1110
+    },
+    {
+      "epoch": 1.9243986254295533,
+      "grad_norm": 0.2273675948381424,
+      "learning_rate": 2.762184271661785e-05,
+      "loss": 0.0265,
+      "step": 1120
+    },
+    {
+      "epoch": 1.9415807560137457,
+      "grad_norm": 0.12444432824850082,
+      "learning_rate": 2.6072088465826038e-05,
+      "loss": 0.0248,
+      "step": 1130
+    },
+    {
+      "epoch": 1.9587628865979383,
+      "grad_norm": 0.3245929479598999,
+      "learning_rate": 2.4560535097439108e-05,
+      "loss": 0.0296,
+      "step": 1140
+    },
+    {
+      "epoch": 1.9759450171821307,
+      "grad_norm": 0.28723689913749695,
+      "learning_rate": 2.308796367873296e-05,
+      "loss": 0.03,
+      "step": 1150
+    },
+    {
+      "epoch": 1.993127147766323,
+      "grad_norm": 0.2640382647514343,
+      "learning_rate": 2.165513513378121e-05,
+      "loss": 0.0237,
+      "step": 1160
+    },
+    {
+      "epoch": 2.0103092783505154,
+      "grad_norm": 0.2659141421318054,
+      "learning_rate": 2.0262789850261798e-05,
+      "loss": 0.034,
+      "step": 1170
+    },
+    {
+      "epoch": 2.027491408934708,
+      "grad_norm": 0.41219818592071533,
+      "learning_rate": 1.8911647296875147e-05,
+      "loss": 0.0282,
+      "step": 1180
+    },
+    {
+      "epoch": 2.0446735395189,
+      "grad_norm": 0.23828770220279694,
+      "learning_rate": 1.7602405651572275e-05,
+      "loss": 0.0288,
+      "step": 1190
+    },
+    {
+      "epoch": 2.0618556701030926,
+      "grad_norm": 0.18391673266887665,
+      "learning_rate": 1.6335741440784035e-05,
+      "loss": 0.0314,
+      "step": 1200
+    },
+    {
+      "epoch": 2.0790378006872854,
+      "grad_norm": 0.27285298705101013,
+      "learning_rate": 1.511230918983867e-05,
+      "loss": 0.0367,
+      "step": 1210
+    },
+    {
+      "epoch": 2.0962199312714778,
+      "grad_norm": 0.3976253271102905,
+      "learning_rate": 1.3932741084747913e-05,
+      "loss": 0.0322,
+      "step": 1220
+    },
+    {
+      "epoch": 2.11340206185567,
+      "grad_norm": 0.17294034361839294,
+      "learning_rate": 1.2797646645536566e-05,
+      "loss": 0.0221,
+      "step": 1230
+    },
+    {
+      "epoch": 2.1305841924398625,
+      "grad_norm": 0.39969944953918457,
+      "learning_rate": 1.1707612411284253e-05,
+      "loss": 0.0318,
+      "step": 1240
+    },
+    {
+      "epoch": 2.147766323024055,
+      "grad_norm": 0.2953219711780548,
+      "learning_rate": 1.0663201637042252e-05,
+      "loss": 0.0267,
+      "step": 1250
+    },
+    {
+      "epoch": 2.1649484536082473,
+      "grad_norm": 0.24471993744373322,
+      "learning_rate": 9.664954002781745e-06,
+      "loss": 0.0219,
+      "step": 1260
+    },
+    {
+      "epoch": 2.1821305841924397,
+      "grad_norm": 0.26958975195884705,
+      "learning_rate": 8.713385334524283e-06,
+      "loss": 0.0227,
+      "step": 1270
+    },
+    {
+      "epoch": 2.1993127147766325,
+      "grad_norm": 0.3202515244483948,
+      "learning_rate": 7.808987337798158e-06,
+      "loss": 0.0258,
+      "step": 1280
+    },
+    {
+      "epoch": 2.216494845360825,
+      "grad_norm": 0.23253372311592102,
+      "learning_rate": 6.952227343558671e-06,
+      "loss": 0.0254,
+      "step": 1290
+    },
+    {
+      "epoch": 2.2336769759450172,
+      "grad_norm": 0.2511325776576996,
+      "learning_rate": 6.143548066703475e-06,
+      "loss": 0.0282,
+      "step": 1300
+    },
+    {
+      "epoch": 2.2508591065292096,
+      "grad_norm": 0.21489648520946503,
+      "learning_rate": 5.383367377307857e-06,
+      "loss": 0.02,
+      "step": 1310
+    },
+    {
+      "epoch": 2.268041237113402,
+      "grad_norm": 0.3450476825237274,
+      "learning_rate": 4.672078084698095e-06,
+      "loss": 0.0279,
+      "step": 1320
+    },
+    {
+      "epoch": 2.2852233676975944,
+      "grad_norm": 0.36600273847579956,
+      "learning_rate": 4.010047734474454e-06,
+      "loss": 0.034,
+      "step": 1330
+    },
+    {
+      "epoch": 2.3024054982817868,
+      "grad_norm": 0.48743385076522827,
+      "learning_rate": 3.397618418588877e-06,
+      "loss": 0.0384,
+      "step": 1340
+    },
+    {
+      "epoch": 2.319587628865979,
+      "grad_norm": 0.30427098274230957,
+      "learning_rate": 2.8351065985751766e-06,
+      "loss": 0.0226,
+      "step": 1350
+    },
+    {
+      "epoch": 2.336769759450172,
+      "grad_norm": 0.36907103657722473,
+      "learning_rate": 2.322802942023461e-06,
+      "loss": 0.0334,
+      "step": 1360
+    },
+    {
+      "epoch": 2.3539518900343643,
+      "grad_norm": 0.184714213013649,
+      "learning_rate": 1.8609721723830132e-06,
+      "loss": 0.0223,
+      "step": 1370
+    },
+    {
+      "epoch": 2.3711340206185567,
+      "grad_norm": 0.43203970789909363,
+      "learning_rate": 1.4498529321713584e-06,
+      "loss": 0.0273,
+      "step": 1380
+    },
+    {
+      "epoch": 2.388316151202749,
+      "grad_norm": 0.26931440830230713,
+      "learning_rate": 1.0896576596600705e-06,
+      "loss": 0.0265,
+      "step": 1390
+    },
+    {
+      "epoch": 2.4054982817869415,
+      "grad_norm": 0.19834183156490326,
+      "learning_rate": 7.80572479101327e-07,
+      "loss": 0.0253,
+      "step": 1400
+    },
+    {
+      "epoch": 2.422680412371134,
+      "grad_norm": 0.22906753420829773,
+      "learning_rate": 5.227571045515633e-07,
+      "loss": 0.0331,
+      "step": 1410
+    },
+    {
+      "epoch": 2.4398625429553267,
+      "grad_norm": 0.307211697101593,
+      "learning_rate": 3.163447573422351e-07,
+      "loss": 0.03,
+      "step": 1420
+    },
+    {
+      "epoch": 2.457044673539519,
+      "grad_norm": 0.31240877509117126,
+      "learning_rate": 1.614420972401165e-07,
+      "loss": 0.0307,
+      "step": 1430
+    },
+    {
+      "epoch": 2.4742268041237114,
+      "grad_norm": 0.28886109590530396,
+      "learning_rate": 5.812916733284324e-08,
+      "loss": 0.0214,
+      "step": 1440
+    },
+    {
+      "epoch": 2.491408934707904,
+      "grad_norm": 0.29304051399230957,
+      "learning_rate": 6.459352668164442e-09,
+      "loss": 0.0285,
+      "step": 1450
+    },
+    {
+      "epoch": 2.5,
+      "step": 1455,
+      "total_flos": 4.98510260177783e+16,
+      "train_loss": 0.0646941511594143,
+      "train_runtime": 645.0485,
+      "train_samples_per_second": 36.09,
+      "train_steps_per_second": 2.256
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1455,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4.98510260177783e+16,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}