{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 200, "global_step": 699, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001430615164520744, "grad_norm": 0.7870709777607977, "learning_rate": 2.8571428571428573e-06, "loss": 0.9791, "step": 1 }, { "epoch": 0.00715307582260372, "grad_norm": 0.673048592872478, "learning_rate": 1.4285714285714285e-05, "loss": 0.9655, "step": 5 }, { "epoch": 0.01430615164520744, "grad_norm": 0.6751613698851058, "learning_rate": 2.857142857142857e-05, "loss": 1.0427, "step": 10 }, { "epoch": 0.02145922746781116, "grad_norm": 0.37361113910805865, "learning_rate": 4.2857142857142856e-05, "loss": 1.0165, "step": 15 }, { "epoch": 0.02861230329041488, "grad_norm": 0.25425037309567433, "learning_rate": 5.714285714285714e-05, "loss": 0.7928, "step": 20 }, { "epoch": 0.0357653791130186, "grad_norm": 0.47123658085709424, "learning_rate": 7.142857142857143e-05, "loss": 0.9567, "step": 25 }, { "epoch": 0.04291845493562232, "grad_norm": 0.19886770818499985, "learning_rate": 8.571428571428571e-05, "loss": 0.7555, "step": 30 }, { "epoch": 0.05007153075822604, "grad_norm": 0.18492536060018994, "learning_rate": 0.0001, "loss": 0.8203, "step": 35 }, { "epoch": 0.05722460658082976, "grad_norm": 0.11579635774128288, "learning_rate": 0.00011428571428571428, "loss": 0.8403, "step": 40 }, { "epoch": 0.06437768240343347, "grad_norm": 0.14426080380091869, "learning_rate": 0.00012857142857142858, "loss": 0.765, "step": 45 }, { "epoch": 0.0715307582260372, "grad_norm": 0.3962935654200976, "learning_rate": 0.00014285714285714287, "loss": 0.9031, "step": 50 }, { "epoch": 0.07868383404864092, "grad_norm": 0.1122336069942178, "learning_rate": 0.00015714285714285716, "loss": 0.7221, "step": 55 }, { "epoch": 0.08583690987124463, "grad_norm": 0.14487919195639437, "learning_rate": 0.00017142857142857143, "loss": 0.8207, "step": 60 }, { "epoch": 0.09298998569384835, "grad_norm": 0.10991881963153113, "learning_rate": 0.00018571428571428572, "loss": 0.7837, "step": 65 }, { "epoch": 0.10014306151645208, "grad_norm": 0.12479037045363885, "learning_rate": 0.0002, "loss": 0.6984, "step": 70 }, { "epoch": 0.1072961373390558, "grad_norm": 0.3825157581902254, "learning_rate": 0.00019996881929740885, "loss": 0.8457, "step": 75 }, { "epoch": 0.11444921316165951, "grad_norm": 0.10204843755668831, "learning_rate": 0.00019987529663435965, "loss": 0.6723, "step": 80 }, { "epoch": 0.12160228898426323, "grad_norm": 0.1556108526142336, "learning_rate": 0.00019971949033289923, "loss": 0.787, "step": 85 }, { "epoch": 0.12875536480686695, "grad_norm": 0.1089156658349774, "learning_rate": 0.00019950149755602656, "loss": 0.7182, "step": 90 }, { "epoch": 0.13590844062947066, "grad_norm": 0.11809573830330126, "learning_rate": 0.00019922145424710051, "loss": 0.7088, "step": 95 }, { "epoch": 0.1430615164520744, "grad_norm": 0.4570754502103615, "learning_rate": 0.00019887953504506362, "loss": 0.8189, "step": 100 }, { "epoch": 0.15021459227467812, "grad_norm": 0.10840048416131945, "learning_rate": 0.00019847595317553488, "loss": 0.6759, "step": 105 }, { "epoch": 0.15736766809728184, "grad_norm": 0.1381325645315335, "learning_rate": 0.0001980109603178392, "loss": 0.768, "step": 110 }, { "epoch": 0.16452074391988555, "grad_norm": 0.1017001439638063, "learning_rate": 0.0001974848464480566, "loss": 0.7698, "step": 115 }, { "epoch": 0.17167381974248927, "grad_norm": 0.13205919198629731, "learning_rate": 0.00019689793965818917, "loss": 0.6944, "step": 120 }, { "epoch": 0.17882689556509299, "grad_norm": 0.48394521253305983, "learning_rate": 0.00019625060595155816, "loss": 0.8363, "step": 125 }, { "epoch": 0.1859799713876967, "grad_norm": 0.14521632085748434, "learning_rate": 0.00019554324901455928, "loss": 0.662, "step": 130 }, { "epoch": 0.19313304721030042, "grad_norm": 0.1689121199898348, "learning_rate": 0.00019477630996491804, "loss": 0.7749, "step": 135 }, { "epoch": 0.20028612303290416, "grad_norm": 0.10646210944295648, "learning_rate": 0.00019395026707660275, "loss": 0.7846, "step": 140 }, { "epoch": 0.20743919885550788, "grad_norm": 0.11099677119841628, "learning_rate": 0.00019306563548156594, "loss": 0.6496, "step": 145 }, { "epoch": 0.2145922746781116, "grad_norm": 0.4940715196481721, "learning_rate": 0.00019212296684850095, "loss": 0.7797, "step": 150 }, { "epoch": 0.2217453505007153, "grad_norm": 0.10857780176447043, "learning_rate": 0.0001911228490388136, "loss": 0.6568, "step": 155 }, { "epoch": 0.22889842632331903, "grad_norm": 0.1558194607792288, "learning_rate": 0.00019006590574002346, "loss": 0.7589, "step": 160 }, { "epoch": 0.23605150214592274, "grad_norm": 0.11205216199084236, "learning_rate": 0.00018895279607682365, "loss": 0.7432, "step": 165 }, { "epoch": 0.24320457796852646, "grad_norm": 0.10598348111600908, "learning_rate": 0.00018778421420004136, "loss": 0.6713, "step": 170 }, { "epoch": 0.2503576537911302, "grad_norm": 0.37721971351878564, "learning_rate": 0.00018656088885375568, "loss": 0.7928, "step": 175 }, { "epoch": 0.2575107296137339, "grad_norm": 0.11245600928935082, "learning_rate": 0.00018528358292084246, "loss": 0.6346, "step": 180 }, { "epoch": 0.2646638054363376, "grad_norm": 0.1620137549427017, "learning_rate": 0.00018395309294723, "loss": 0.757, "step": 185 }, { "epoch": 0.2718168812589413, "grad_norm": 0.09949111843144275, "learning_rate": 0.00018257024864516162, "loss": 0.7145, "step": 190 }, { "epoch": 0.27896995708154504, "grad_norm": 0.12215913633341108, "learning_rate": 0.00018113591237577556, "loss": 0.6667, "step": 195 }, { "epoch": 0.2861230329041488, "grad_norm": 0.5598361803307672, "learning_rate": 0.00017965097861132447, "loss": 0.8266, "step": 200 }, { "epoch": 0.2861230329041488, "eval_loss": 0.68742835521698, "eval_runtime": 974.2638, "eval_samples_per_second": 2.053, "eval_steps_per_second": 0.128, "step": 200 }, { "epoch": 0.2932761087267525, "grad_norm": 0.1103365072886984, "learning_rate": 0.00017811637337736988, "loss": 0.6201, "step": 205 }, { "epoch": 0.30042918454935624, "grad_norm": 0.18081388953274952, "learning_rate": 0.00017653305367529968, "loss": 0.7519, "step": 210 }, { "epoch": 0.30758226037195996, "grad_norm": 0.0979269774081774, "learning_rate": 0.00017490200688552866, "loss": 0.7811, "step": 215 }, { "epoch": 0.3147353361945637, "grad_norm": 0.1178423488937459, "learning_rate": 0.00017322425015175403, "loss": 0.6599, "step": 220 }, { "epoch": 0.3218884120171674, "grad_norm": 0.38638772763542717, "learning_rate": 0.00017150082974665053, "loss": 0.7658, "step": 225 }, { "epoch": 0.3290414878397711, "grad_norm": 0.10912001826583005, "learning_rate": 0.00016973282041940005, "loss": 0.6353, "step": 230 }, { "epoch": 0.3361945636623748, "grad_norm": 0.13882513170563118, "learning_rate": 0.00016792132472546274, "loss": 0.778, "step": 235 }, { "epoch": 0.34334763948497854, "grad_norm": 0.10083866344440855, "learning_rate": 0.00016606747233900815, "loss": 0.7229, "step": 240 }, { "epoch": 0.35050071530758226, "grad_norm": 0.1254352004833624, "learning_rate": 0.00016417241934843456, "loss": 0.7063, "step": 245 }, { "epoch": 0.35765379113018597, "grad_norm": 0.46499091728878567, "learning_rate": 0.00016223734753541575, "loss": 0.7838, "step": 250 }, { "epoch": 0.3648068669527897, "grad_norm": 0.1078358562936522, "learning_rate": 0.00016026346363792567, "loss": 0.6502, "step": 255 }, { "epoch": 0.3719599427753934, "grad_norm": 0.14916467905079567, "learning_rate": 0.0001582519985976994, "loss": 0.7599, "step": 260 }, { "epoch": 0.3791130185979971, "grad_norm": 0.10034285034077563, "learning_rate": 0.00015620420679260085, "loss": 0.7457, "step": 265 }, { "epoch": 0.38626609442060084, "grad_norm": 0.1178480592114291, "learning_rate": 0.00015412136525437485, "loss": 0.6755, "step": 270 }, { "epoch": 0.39341917024320455, "grad_norm": 0.4121462451204322, "learning_rate": 0.00015200477287227236, "loss": 0.8288, "step": 275 }, { "epoch": 0.4005722460658083, "grad_norm": 0.10328930758167154, "learning_rate": 0.00014985574958304484, "loss": 0.624, "step": 280 }, { "epoch": 0.40772532188841204, "grad_norm": 0.13717935978977466, "learning_rate": 0.00014767563554781316, "loss": 0.732, "step": 285 }, { "epoch": 0.41487839771101576, "grad_norm": 0.09128697749820436, "learning_rate": 0.00014546579031632427, "loss": 0.7197, "step": 290 }, { "epoch": 0.4220314735336195, "grad_norm": 0.12086812939243298, "learning_rate": 0.00014322759197911685, "loss": 0.6709, "step": 295 }, { "epoch": 0.4291845493562232, "grad_norm": 0.40143408299485855, "learning_rate": 0.00014096243630812478, "loss": 0.815, "step": 300 }, { "epoch": 0.4363376251788269, "grad_norm": 0.09924403899475973, "learning_rate": 0.00013867173588625405, "loss": 0.6408, "step": 305 }, { "epoch": 0.4434907010014306, "grad_norm": 0.13959887072890406, "learning_rate": 0.0001363569192264762, "loss": 0.731, "step": 310 }, { "epoch": 0.45064377682403434, "grad_norm": 0.09921104913462633, "learning_rate": 0.0001340194298809878, "loss": 0.7096, "step": 315 }, { "epoch": 0.45779685264663805, "grad_norm": 0.11089054056325505, "learning_rate": 0.00013166072554099056, "loss": 0.6672, "step": 320 }, { "epoch": 0.46494992846924177, "grad_norm": 0.4230116831435091, "learning_rate": 0.00012928227712765504, "loss": 0.7871, "step": 325 }, { "epoch": 0.4721030042918455, "grad_norm": 0.09737599815254978, "learning_rate": 0.00012688556787483334, "loss": 0.6405, "step": 330 }, { "epoch": 0.4792560801144492, "grad_norm": 0.14484542924430074, "learning_rate": 0.00012447209240409364, "loss": 0.7468, "step": 335 }, { "epoch": 0.4864091559370529, "grad_norm": 0.08987261375789386, "learning_rate": 0.00012204335579265323, "loss": 0.7169, "step": 340 }, { "epoch": 0.49356223175965663, "grad_norm": 0.10881204819795895, "learning_rate": 0.00011960087263479119, "loss": 0.6587, "step": 345 }, { "epoch": 0.5007153075822603, "grad_norm": 0.49675617394166816, "learning_rate": 0.0001171461660973261, "loss": 0.779, "step": 350 }, { "epoch": 0.5078683834048641, "grad_norm": 0.10435791237431437, "learning_rate": 0.00011468076696974778, "loss": 0.6392, "step": 355 }, { "epoch": 0.5150214592274678, "grad_norm": 0.14338822382120334, "learning_rate": 0.00011220621270959564, "loss": 0.746, "step": 360 }, { "epoch": 0.5221745350500715, "grad_norm": 0.09020136891885296, "learning_rate": 0.00010972404648367822, "loss": 0.7134, "step": 365 }, { "epoch": 0.5293276108726752, "grad_norm": 0.11624451016773857, "learning_rate": 0.00010723581620573299, "loss": 0.6715, "step": 370 }, { "epoch": 0.5364806866952789, "grad_norm": 0.5023243515754996, "learning_rate": 0.00010474307357112547, "loss": 0.7996, "step": 375 }, { "epoch": 0.5436337625178826, "grad_norm": 0.10482971216811227, "learning_rate": 0.00010224737308919014, "loss": 0.6509, "step": 380 }, { "epoch": 0.5507868383404864, "grad_norm": 0.15040978772920802, "learning_rate": 9.975027111381675e-05, "loss": 0.7435, "step": 385 }, { "epoch": 0.5579399141630901, "grad_norm": 0.09272976728170695, "learning_rate": 9.72533248728859e-05, "loss": 0.6922, "step": 390 }, { "epoch": 0.5650929899856938, "grad_norm": 0.1221214527065867, "learning_rate": 9.475809149716011e-05, "loss": 0.6814, "step": 395 }, { "epoch": 0.5722460658082976, "grad_norm": 0.5036832457140492, "learning_rate": 9.226612704923507e-05, "loss": 0.7478, "step": 400 }, { "epoch": 0.5722460658082976, "eval_loss": 0.6692562103271484, "eval_runtime": 972.3875, "eval_samples_per_second": 2.057, "eval_steps_per_second": 0.129, "step": 400 }, { "epoch": 0.5793991416309013, "grad_norm": 0.09390195648925902, "learning_rate": 8.977898555315713e-05, "loss": 0.6167, "step": 405 }, { "epoch": 0.586552217453505, "grad_norm": 0.14219781785034696, "learning_rate": 8.729821802531212e-05, "loss": 0.73, "step": 410 }, { "epoch": 0.5937052932761088, "grad_norm": 0.09671061747598036, "learning_rate": 8.482537150718975e-05, "loss": 0.7244, "step": 415 }, { "epoch": 0.6008583690987125, "grad_norm": 0.11955392967058884, "learning_rate": 8.23619881006267e-05, "loss": 0.6769, "step": 420 }, { "epoch": 0.6080114449213162, "grad_norm": 0.4189202491522242, "learning_rate": 7.99096040061303e-05, "loss": 0.7751, "step": 425 }, { "epoch": 0.6151645207439199, "grad_norm": 0.09721911498914373, "learning_rate": 7.746974856488238e-05, "loss": 0.6233, "step": 430 }, { "epoch": 0.6223175965665236, "grad_norm": 0.13846978267168025, "learning_rate": 7.50439433050205e-05, "loss": 0.7407, "step": 435 }, { "epoch": 0.6294706723891274, "grad_norm": 0.08838254367219467, "learning_rate": 7.263370099279172e-05, "loss": 0.7088, "step": 440 }, { "epoch": 0.6366237482117311, "grad_norm": 0.11873577831994696, "learning_rate": 7.024052468917022e-05, "loss": 0.6736, "step": 445 }, { "epoch": 0.6437768240343348, "grad_norm": 0.33813588083054347, "learning_rate": 6.786590681252744e-05, "loss": 0.7469, "step": 450 }, { "epoch": 0.6509298998569385, "grad_norm": 0.1035926098875485, "learning_rate": 6.551132820793902e-05, "loss": 0.6177, "step": 455 }, { "epoch": 0.6580829756795422, "grad_norm": 0.13996128874715752, "learning_rate": 6.31782572237088e-05, "loss": 0.7217, "step": 460 }, { "epoch": 0.6652360515021459, "grad_norm": 0.09586075004988552, "learning_rate": 6.08681487956865e-05, "loss": 0.6931, "step": 465 }, { "epoch": 0.6723891273247496, "grad_norm": 0.11431458069436656, "learning_rate": 5.858244353994906e-05, "loss": 0.6904, "step": 470 }, { "epoch": 0.6795422031473534, "grad_norm": 0.36826661059200655, "learning_rate": 5.632256685441229e-05, "loss": 0.7614, "step": 475 }, { "epoch": 0.6866952789699571, "grad_norm": 0.09520510613886908, "learning_rate": 5.408992802993269e-05, "loss": 0.6571, "step": 480 }, { "epoch": 0.6938483547925608, "grad_norm": 0.15384325014915562, "learning_rate": 5.188591937145386e-05, "loss": 0.7585, "step": 485 }, { "epoch": 0.7010014306151645, "grad_norm": 0.0899989739652617, "learning_rate": 4.971191532974552e-05, "loss": 0.7155, "step": 490 }, { "epoch": 0.7081545064377682, "grad_norm": 0.10977217448571064, "learning_rate": 4.756927164427685e-05, "loss": 0.6359, "step": 495 }, { "epoch": 0.7153075822603719, "grad_norm": 0.4092589446417867, "learning_rate": 4.5459324497758104e-05, "loss": 0.7788, "step": 500 }, { "epoch": 0.7224606580829757, "grad_norm": 0.09127215016969027, "learning_rate": 4.338338968287853e-05, "loss": 0.6295, "step": 505 }, { "epoch": 0.7296137339055794, "grad_norm": 0.15013590282846936, "learning_rate": 4.134276178175927e-05, "loss": 0.7025, "step": 510 }, { "epoch": 0.7367668097281831, "grad_norm": 0.0899908161380569, "learning_rate": 3.933871335863408e-05, "loss": 0.67, "step": 515 }, { "epoch": 0.7439198855507868, "grad_norm": 0.11465342107800292, "learning_rate": 3.737249416626014e-05, "loss": 0.6392, "step": 520 }, { "epoch": 0.7510729613733905, "grad_norm": 0.4280343702289959, "learning_rate": 3.544533036655476e-05, "loss": 0.786, "step": 525 }, { "epoch": 0.7582260371959942, "grad_norm": 0.10266949219770882, "learning_rate": 3.355842376594357e-05, "loss": 0.6068, "step": 530 }, { "epoch": 0.765379113018598, "grad_norm": 0.14253463950155285, "learning_rate": 3.171295106589726e-05, "loss": 0.7457, "step": 535 }, { "epoch": 0.7725321888412017, "grad_norm": 0.09202889667099431, "learning_rate": 2.9910063129123755e-05, "loss": 0.682, "step": 540 }, { "epoch": 0.7796852646638054, "grad_norm": 0.11296926525170213, "learning_rate": 2.8150884261874366e-05, "loss": 0.6397, "step": 545 }, { "epoch": 0.7868383404864091, "grad_norm": 0.509027438656149, "learning_rate": 2.6436511512810325e-05, "loss": 0.7777, "step": 550 }, { "epoch": 0.7939914163090128, "grad_norm": 0.09297223864460286, "learning_rate": 2.4768013988868065e-05, "loss": 0.6147, "step": 555 }, { "epoch": 0.8011444921316166, "grad_norm": 0.1399057626649613, "learning_rate": 2.3146432188548917e-05, "loss": 0.7012, "step": 560 }, { "epoch": 0.8082975679542204, "grad_norm": 0.09185298523651206, "learning_rate": 2.157277735304971e-05, "loss": 0.7267, "step": 565 }, { "epoch": 0.8154506437768241, "grad_norm": 0.1123786553918693, "learning_rate": 2.0048030835638644e-05, "loss": 0.6257, "step": 570 }, { "epoch": 0.8226037195994278, "grad_norm": 0.37210838744162594, "learning_rate": 1.8573143489669385e-05, "loss": 0.7216, "step": 575 }, { "epoch": 0.8297567954220315, "grad_norm": 0.10101729128186683, "learning_rate": 1.7149035075615794e-05, "loss": 0.6183, "step": 580 }, { "epoch": 0.8369098712446352, "grad_norm": 0.15231579242566323, "learning_rate": 1.5776593687496167e-05, "loss": 0.7182, "step": 585 }, { "epoch": 0.844062947067239, "grad_norm": 0.08888363881120907, "learning_rate": 1.44566751990454e-05, "loss": 0.6994, "step": 590 }, { "epoch": 0.8512160228898427, "grad_norm": 0.11401883397158272, "learning_rate": 1.3190102729980226e-05, "loss": 0.6824, "step": 595 }, { "epoch": 0.8583690987124464, "grad_norm": 0.4299251120038569, "learning_rate": 1.1977666132689947e-05, "loss": 0.7963, "step": 600 }, { "epoch": 0.8583690987124464, "eval_loss": 0.660947322845459, "eval_runtime": 974.2409, "eval_samples_per_second": 2.053, "eval_steps_per_second": 0.128, "step": 600 }, { "epoch": 0.8655221745350501, "grad_norm": 0.09507686854622492, "learning_rate": 1.0820121499673618e-05, "loss": 0.6198, "step": 605 }, { "epoch": 0.8726752503576538, "grad_norm": 0.1446777341342134, "learning_rate": 9.71819069202996e-06, "loss": 0.6841, "step": 610 }, { "epoch": 0.8798283261802575, "grad_norm": 0.09274602760519131, "learning_rate": 8.672560889294812e-06, "loss": 0.6759, "step": 615 }, { "epoch": 0.8869814020028612, "grad_norm": 0.13384317589987976, "learning_rate": 7.683884160906118e-06, "loss": 0.6853, "step": 620 }, { "epoch": 0.894134477825465, "grad_norm": 0.5618493715100463, "learning_rate": 6.75277705956443e-06, "loss": 0.76, "step": 625 }, { "epoch": 0.9012875536480687, "grad_norm": 0.08834479759548475, "learning_rate": 5.879820236741884e-06, "loss": 0.6058, "step": 630 }, { "epoch": 0.9084406294706724, "grad_norm": 0.13992620759849517, "learning_rate": 5.065558080579835e-06, "loss": 0.7586, "step": 635 }, { "epoch": 0.9155937052932761, "grad_norm": 0.09567272654127822, "learning_rate": 4.310498376400751e-06, "loss": 0.7173, "step": 640 }, { "epoch": 0.9227467811158798, "grad_norm": 0.112867269041588, "learning_rate": 3.615111990046094e-06, "loss": 0.6469, "step": 645 }, { "epoch": 0.9298998569384835, "grad_norm": 0.4489160430511751, "learning_rate": 2.979832574237884e-06, "loss": 0.7839, "step": 650 }, { "epoch": 0.9370529327610873, "grad_norm": 0.09860092960999309, "learning_rate": 2.4050562981465906e-06, "loss": 0.6202, "step": 655 }, { "epoch": 0.944206008583691, "grad_norm": 0.14952199122077578, "learning_rate": 1.8911416003346893e-06, "loss": 0.7517, "step": 660 }, { "epoch": 0.9513590844062947, "grad_norm": 0.09034770607257726, "learning_rate": 1.4384089652291543e-06, "loss": 0.722, "step": 665 }, { "epoch": 0.9585121602288984, "grad_norm": 0.12043580567803806, "learning_rate": 1.0471407232629626e-06, "loss": 0.6474, "step": 670 }, { "epoch": 0.9656652360515021, "grad_norm": 0.3910952273479509, "learning_rate": 7.175808748098311e-07, "loss": 0.7645, "step": 675 }, { "epoch": 0.9728183118741058, "grad_norm": 0.09625514486008946, "learning_rate": 4.4993493802217847e-07, "loss": 0.6296, "step": 680 }, { "epoch": 0.9799713876967096, "grad_norm": 0.14269654608023083, "learning_rate": 2.443698206670897e-07, "loss": 0.7063, "step": 685 }, { "epoch": 0.9871244635193133, "grad_norm": 0.09242040569454475, "learning_rate": 1.0101371604032128e-07, "loss": 0.7074, "step": 690 }, { "epoch": 0.994277539341917, "grad_norm": 0.12417081551389351, "learning_rate": 1.99560230231266e-08, "loss": 0.6753, "step": 695 }, { "epoch": 1.0, "step": 699, "total_flos": 1.493876632190976e+16, "train_loss": 0.7242236417932743, "train_runtime": 29465.3177, "train_samples_per_second": 0.759, "train_steps_per_second": 0.024 } ], "logging_steps": 5, "max_steps": 699, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.493876632190976e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }