{ "best_metric": 0.9264618754386902, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 3.0, "eval_steps": 50, "global_step": 141, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02127659574468085, "grad_norm": 14.213656425476074, "learning_rate": 1.1200000000000001e-05, "loss": 4.4616, "step": 1 }, { "epoch": 0.02127659574468085, "eval_loss": 3.8836147785186768, "eval_runtime": 3.7874, "eval_samples_per_second": 167.135, "eval_steps_per_second": 5.281, "step": 1 }, { "epoch": 0.0425531914893617, "grad_norm": 7.082957744598389, "learning_rate": 2.2400000000000002e-05, "loss": 3.8433, "step": 2 }, { "epoch": 0.06382978723404255, "grad_norm": 5.922336578369141, "learning_rate": 3.36e-05, "loss": 3.5573, "step": 3 }, { "epoch": 0.0851063829787234, "grad_norm": 5.772343158721924, "learning_rate": 4.4800000000000005e-05, "loss": 3.5562, "step": 4 }, { "epoch": 0.10638297872340426, "grad_norm": 6.365505695343018, "learning_rate": 5.6e-05, "loss": 3.1005, "step": 5 }, { "epoch": 0.1276595744680851, "grad_norm": 7.662172794342041, "learning_rate": 6.72e-05, "loss": 2.6307, "step": 6 }, { "epoch": 0.14893617021276595, "grad_norm": 7.855373382568359, "learning_rate": 7.84e-05, "loss": 2.1008, "step": 7 }, { "epoch": 0.1702127659574468, "grad_norm": 5.307789325714111, "learning_rate": 8.960000000000001e-05, "loss": 1.8141, "step": 8 }, { "epoch": 0.19148936170212766, "grad_norm": 2.865940570831299, "learning_rate": 0.0001008, "loss": 1.8787, "step": 9 }, { "epoch": 0.2127659574468085, "grad_norm": 1.4380123615264893, "learning_rate": 0.000112, "loss": 1.4918, "step": 10 }, { "epoch": 0.23404255319148937, "grad_norm": 1.5105291604995728, "learning_rate": 0.00011198389746057678, "loss": 1.4308, "step": 11 }, { "epoch": 0.2553191489361702, "grad_norm": 1.0955153703689575, "learning_rate": 0.0001119355991027277, "loss": 1.1133, "step": 12 }, { "epoch": 0.2765957446808511, "grad_norm": 8.190502166748047, "learning_rate": 0.00011185513270238886, "loss": 0.7666, "step": 13 }, { "epoch": 0.2978723404255319, "grad_norm": 12.864703178405762, "learning_rate": 0.00011174254453503828, "loss": 0.6613, "step": 14 }, { "epoch": 0.3191489361702128, "grad_norm": 2.111825466156006, "learning_rate": 0.00011159789934908318, "loss": 1.6219, "step": 15 }, { "epoch": 0.3404255319148936, "grad_norm": 1.2697428464889526, "learning_rate": 0.00011142128032862395, "loss": 1.4151, "step": 16 }, { "epoch": 0.3617021276595745, "grad_norm": 0.5486758351325989, "learning_rate": 0.00011121278904561553, "loss": 1.2228, "step": 17 }, { "epoch": 0.3829787234042553, "grad_norm": 0.6482474207878113, "learning_rate": 0.0001109725454014545, "loss": 1.1537, "step": 18 }, { "epoch": 0.40425531914893614, "grad_norm": 1.133864402770996, "learning_rate": 0.00011070068755802486, "loss": 0.7857, "step": 19 }, { "epoch": 0.425531914893617, "grad_norm": 0.4991963803768158, "learning_rate": 0.00011039737185824234, "loss": 0.1432, "step": 20 }, { "epoch": 0.44680851063829785, "grad_norm": 0.7302205562591553, "learning_rate": 0.00011006277273614316, "loss": 1.3804, "step": 21 }, { "epoch": 0.46808510638297873, "grad_norm": 0.6448777914047241, "learning_rate": 0.00010969708261656854, "loss": 1.4363, "step": 22 }, { "epoch": 0.48936170212765956, "grad_norm": 0.6198523044586182, "learning_rate": 0.00010930051180450303, "loss": 1.2917, "step": 23 }, { "epoch": 0.5106382978723404, "grad_norm": 0.40335163474082947, "learning_rate": 0.00010887328836413005, "loss": 1.2051, "step": 24 }, { "epoch": 0.5319148936170213, "grad_norm": 0.5545335412025452, "learning_rate": 0.00010841565798767423, "loss": 0.9898, "step": 25 }, { "epoch": 0.5531914893617021, "grad_norm": 1.7060941457748413, "learning_rate": 0.00010792788385410628, "loss": 0.0614, "step": 26 }, { "epoch": 0.574468085106383, "grad_norm": 0.7902534008026123, "learning_rate": 0.00010741024647779101, "loss": 1.128, "step": 27 }, { "epoch": 0.5957446808510638, "grad_norm": 0.44602009654045105, "learning_rate": 0.00010686304354716622, "loss": 1.4464, "step": 28 }, { "epoch": 0.6170212765957447, "grad_norm": 0.3824542164802551, "learning_rate": 0.00010628658975354491, "loss": 1.2856, "step": 29 }, { "epoch": 0.6382978723404256, "grad_norm": 0.42575424909591675, "learning_rate": 0.00010568121661013911, "loss": 1.134, "step": 30 }, { "epoch": 0.6595744680851063, "grad_norm": 0.506963312625885, "learning_rate": 0.00010504727226140981, "loss": 0.9792, "step": 31 }, { "epoch": 0.6808510638297872, "grad_norm": 0.46332627534866333, "learning_rate": 0.00010438512128285228, "loss": 0.3159, "step": 32 }, { "epoch": 0.7021276595744681, "grad_norm": 0.4355514943599701, "learning_rate": 0.00010369514447133208, "loss": 0.8945, "step": 33 }, { "epoch": 0.723404255319149, "grad_norm": 0.4772055447101593, "learning_rate": 0.0001029777386260924, "loss": 1.4357, "step": 34 }, { "epoch": 0.7446808510638298, "grad_norm": 0.3987302780151367, "learning_rate": 0.00010223331632055843, "loss": 1.3066, "step": 35 }, { "epoch": 0.7659574468085106, "grad_norm": 0.37436139583587646, "learning_rate": 0.00010146230566507025, "loss": 1.1248, "step": 36 }, { "epoch": 0.7872340425531915, "grad_norm": 0.4493124783039093, "learning_rate": 0.00010066515006068056, "loss": 1.0106, "step": 37 }, { "epoch": 0.8085106382978723, "grad_norm": 0.8829596042633057, "learning_rate": 9.984230794415887e-05, "loss": 0.4913, "step": 38 }, { "epoch": 0.8297872340425532, "grad_norm": 0.5549487471580505, "learning_rate": 9.899425252434878e-05, "loss": 0.6699, "step": 39 }, { "epoch": 0.851063829787234, "grad_norm": 0.4452535808086395, "learning_rate": 9.812147151002993e-05, "loss": 1.4244, "step": 40 }, { "epoch": 0.8723404255319149, "grad_norm": 0.35962700843811035, "learning_rate": 9.722446682944128e-05, "loss": 1.2756, "step": 41 }, { "epoch": 0.8936170212765957, "grad_norm": 0.3556835949420929, "learning_rate": 9.630375434162683e-05, "loss": 1.1866, "step": 42 }, { "epoch": 0.9148936170212766, "grad_norm": 0.34359875321388245, "learning_rate": 9.53598635397699e-05, "loss": 1.0799, "step": 43 }, { "epoch": 0.9361702127659575, "grad_norm": 0.5090391039848328, "learning_rate": 9.43933372466865e-05, "loss": 0.7165, "step": 44 }, { "epoch": 0.9574468085106383, "grad_norm": 0.42997509241104126, "learning_rate": 9.340473130265294e-05, "loss": 1.0119, "step": 45 }, { "epoch": 0.9787234042553191, "grad_norm": 0.3640575408935547, "learning_rate": 9.239461424574742e-05, "loss": 1.2632, "step": 46 }, { "epoch": 1.0, "grad_norm": 0.3492790460586548, "learning_rate": 9.136356698488885e-05, "loss": 0.9953, "step": 47 }, { "epoch": 1.0212765957446808, "grad_norm": 0.3192073702812195, "learning_rate": 9.031218246576172e-05, "loss": 0.21, "step": 48 }, { "epoch": 1.0425531914893618, "grad_norm": 0.434736043214798, "learning_rate": 8.924106532981847e-05, "loss": 1.1353, "step": 49 }, { "epoch": 1.0638297872340425, "grad_norm": 0.36542627215385437, "learning_rate": 8.815083156655581e-05, "loss": 1.3234, "step": 50 }, { "epoch": 1.0638297872340425, "eval_loss": 0.9605554938316345, "eval_runtime": 3.8841, "eval_samples_per_second": 162.971, "eval_steps_per_second": 5.149, "step": 50 }, { "epoch": 1.0851063829787233, "grad_norm": 0.3345516622066498, "learning_rate": 8.704210815926495e-05, "loss": 1.1487, "step": 51 }, { "epoch": 1.1063829787234043, "grad_norm": 0.39309221506118774, "learning_rate": 8.59155327244593e-05, "loss": 1.0582, "step": 52 }, { "epoch": 1.127659574468085, "grad_norm": 0.5331593751907349, "learning_rate": 8.477175314518714e-05, "loss": 0.8832, "step": 53 }, { "epoch": 1.148936170212766, "grad_norm": 0.8910200595855713, "learning_rate": 8.361142719844015e-05, "loss": 0.2747, "step": 54 }, { "epoch": 1.1702127659574468, "grad_norm": 0.35534268617630005, "learning_rate": 8.243522217687193e-05, "loss": 0.7217, "step": 55 }, { "epoch": 1.1914893617021276, "grad_norm": 0.44460806250572205, "learning_rate": 8.124381450504426e-05, "loss": 1.3437, "step": 56 }, { "epoch": 1.2127659574468086, "grad_norm": 0.37915581464767456, "learning_rate": 8.00378893504216e-05, "loss": 1.2378, "step": 57 }, { "epoch": 1.2340425531914894, "grad_norm": 0.3455619513988495, "learning_rate": 7.881814022933765e-05, "loss": 1.1035, "step": 58 }, { "epoch": 1.2553191489361701, "grad_norm": 0.39131176471710205, "learning_rate": 7.758526860816059e-05, "loss": 0.9421, "step": 59 }, { "epoch": 1.2765957446808511, "grad_norm": 0.3872637450695038, "learning_rate": 7.633998349988623e-05, "loss": 0.4359, "step": 60 }, { "epoch": 1.297872340425532, "grad_norm": 0.3637562692165375, "learning_rate": 7.508300105639138e-05, "loss": 0.498, "step": 61 }, { "epoch": 1.3191489361702127, "grad_norm": 0.4627838730812073, "learning_rate": 7.381504415658137e-05, "loss": 1.3362, "step": 62 }, { "epoch": 1.3404255319148937, "grad_norm": 0.40288859605789185, "learning_rate": 7.253684199066931e-05, "loss": 1.2646, "step": 63 }, { "epoch": 1.3617021276595744, "grad_norm": 0.3752979636192322, "learning_rate": 7.124912964082547e-05, "loss": 1.0772, "step": 64 }, { "epoch": 1.3829787234042552, "grad_norm": 0.4062426686286926, "learning_rate": 6.995264765843836e-05, "loss": 1.0008, "step": 65 }, { "epoch": 1.4042553191489362, "grad_norm": 0.4707888662815094, "learning_rate": 6.86481416382306e-05, "loss": 0.6325, "step": 66 }, { "epoch": 1.425531914893617, "grad_norm": 0.28356093168258667, "learning_rate": 6.733636178947425e-05, "loss": 0.2474, "step": 67 }, { "epoch": 1.4468085106382977, "grad_norm": 0.44394785165786743, "learning_rate": 6.601806250455254e-05, "loss": 1.2399, "step": 68 }, { "epoch": 1.4680851063829787, "grad_norm": 0.3906940519809723, "learning_rate": 6.469400192511568e-05, "loss": 1.2571, "step": 69 }, { "epoch": 1.4893617021276595, "grad_norm": 0.40481507778167725, "learning_rate": 6.33649415060808e-05, "loss": 1.1472, "step": 70 }, { "epoch": 1.5106382978723403, "grad_norm": 0.3902008533477783, "learning_rate": 6.203164557772622e-05, "loss": 1.0084, "step": 71 }, { "epoch": 1.5319148936170213, "grad_norm": 0.47971320152282715, "learning_rate": 6.069488090613228e-05, "loss": 0.8643, "step": 72 }, { "epoch": 1.5531914893617023, "grad_norm": 0.35613465309143066, "learning_rate": 5.935541625222126e-05, "loss": 0.072, "step": 73 }, { "epoch": 1.574468085106383, "grad_norm": 0.4089168310165405, "learning_rate": 5.801402192965016e-05, "loss": 1.0347, "step": 74 }, { "epoch": 1.5957446808510638, "grad_norm": 0.4405708909034729, "learning_rate": 5.667146936181042e-05, "loss": 1.2854, "step": 75 }, { "epoch": 1.6170212765957448, "grad_norm": 0.38711073994636536, "learning_rate": 5.53285306381896e-05, "loss": 1.2008, "step": 76 }, { "epoch": 1.6382978723404256, "grad_norm": 0.3995616137981415, "learning_rate": 5.398597807034986e-05, "loss": 1.0411, "step": 77 }, { "epoch": 1.6595744680851063, "grad_norm": 0.4823427200317383, "learning_rate": 5.2644583747778746e-05, "loss": 0.8773, "step": 78 }, { "epoch": 1.6808510638297873, "grad_norm": 0.27052074670791626, "learning_rate": 5.130511909386772e-05, "loss": 0.2128, "step": 79 }, { "epoch": 1.702127659574468, "grad_norm": 0.34951841831207275, "learning_rate": 4.996835442227378e-05, "loss": 0.6826, "step": 80 }, { "epoch": 1.7234042553191489, "grad_norm": 0.44672834873199463, "learning_rate": 4.863505849391921e-05, "loss": 1.3443, "step": 81 }, { "epoch": 1.7446808510638299, "grad_norm": 0.37677329778671265, "learning_rate": 4.7305998074884325e-05, "loss": 1.1532, "step": 82 }, { "epoch": 1.7659574468085106, "grad_norm": 0.3660067319869995, "learning_rate": 4.598193749544746e-05, "loss": 1.0569, "step": 83 }, { "epoch": 1.7872340425531914, "grad_norm": 0.4302046597003937, "learning_rate": 4.466363821052573e-05, "loss": 0.9091, "step": 84 }, { "epoch": 1.8085106382978724, "grad_norm": 0.3698153793811798, "learning_rate": 4.335185836176942e-05, "loss": 0.4112, "step": 85 }, { "epoch": 1.8297872340425532, "grad_norm": 0.27242642641067505, "learning_rate": 4.2047352341561654e-05, "loss": 0.4239, "step": 86 }, { "epoch": 1.851063829787234, "grad_norm": 0.463748574256897, "learning_rate": 4.0750870359174544e-05, "loss": 1.3113, "step": 87 }, { "epoch": 1.872340425531915, "grad_norm": 0.3822973966598511, "learning_rate": 3.946315800933069e-05, "loss": 1.2033, "step": 88 }, { "epoch": 1.8936170212765957, "grad_norm": 0.3759160041809082, "learning_rate": 3.8184955843418635e-05, "loss": 1.1545, "step": 89 }, { "epoch": 1.9148936170212765, "grad_norm": 0.3939042091369629, "learning_rate": 3.691699894360862e-05, "loss": 0.9759, "step": 90 }, { "epoch": 1.9361702127659575, "grad_norm": 0.463613361120224, "learning_rate": 3.5660016500113756e-05, "loss": 0.6793, "step": 91 }, { "epoch": 1.9574468085106385, "grad_norm": 0.39532244205474854, "learning_rate": 3.441473139183941e-05, "loss": 1.0509, "step": 92 }, { "epoch": 1.978723404255319, "grad_norm": 0.37965285778045654, "learning_rate": 3.3181859770662366e-05, "loss": 1.1558, "step": 93 }, { "epoch": 2.0, "grad_norm": 0.4093703627586365, "learning_rate": 3.196211064957841e-05, "loss": 0.9204, "step": 94 }, { "epoch": 2.021276595744681, "grad_norm": 0.11672822386026382, "learning_rate": 3.075618549495574e-05, "loss": 0.0652, "step": 95 }, { "epoch": 2.0425531914893615, "grad_norm": 0.4452710747718811, "learning_rate": 2.9564777823128087e-05, "loss": 1.0477, "step": 96 }, { "epoch": 2.0638297872340425, "grad_norm": 0.4406222999095917, "learning_rate": 2.8388572801559853e-05, "loss": 1.2642, "step": 97 }, { "epoch": 2.0851063829787235, "grad_norm": 0.3580534756183624, "learning_rate": 2.7228246854812867e-05, "loss": 1.0798, "step": 98 }, { "epoch": 2.106382978723404, "grad_norm": 0.3822851777076721, "learning_rate": 2.60844672755407e-05, "loss": 1.0079, "step": 99 }, { "epoch": 2.127659574468085, "grad_norm": 0.4569602608680725, "learning_rate": 2.4957891840735056e-05, "loss": 0.8454, "step": 100 }, { "epoch": 2.127659574468085, "eval_loss": 0.9264618754386902, "eval_runtime": 3.7503, "eval_samples_per_second": 168.787, "eval_steps_per_second": 5.333, "step": 100 }, { "epoch": 2.148936170212766, "grad_norm": 0.26321274042129517, "learning_rate": 2.384916843344419e-05, "loss": 0.2123, "step": 101 }, { "epoch": 2.1702127659574466, "grad_norm": 0.36982113122940063, "learning_rate": 2.275893467018154e-05, "loss": 0.7511, "step": 102 }, { "epoch": 2.1914893617021276, "grad_norm": 0.4143376648426056, "learning_rate": 2.1687817534238292e-05, "loss": 1.2891, "step": 103 }, { "epoch": 2.2127659574468086, "grad_norm": 0.3768226206302643, "learning_rate": 2.0636433015111154e-05, "loss": 1.127, "step": 104 }, { "epoch": 2.2340425531914896, "grad_norm": 0.3875614106655121, "learning_rate": 1.9605385754252593e-05, "loss": 0.9696, "step": 105 }, { "epoch": 2.25531914893617, "grad_norm": 0.4567450284957886, "learning_rate": 1.8595268697347047e-05, "loss": 0.8626, "step": 106 }, { "epoch": 2.276595744680851, "grad_norm": 0.43263593316078186, "learning_rate": 1.76066627533135e-05, "loss": 0.417, "step": 107 }, { "epoch": 2.297872340425532, "grad_norm": 0.2629643380641937, "learning_rate": 1.664013646023009e-05, "loss": 0.3379, "step": 108 }, { "epoch": 2.3191489361702127, "grad_norm": 0.47793322801589966, "learning_rate": 1.5696245658373157e-05, "loss": 1.1889, "step": 109 }, { "epoch": 2.3404255319148937, "grad_norm": 0.4138708710670471, "learning_rate": 1.4775533170558723e-05, "loss": 1.1401, "step": 110 }, { "epoch": 2.3617021276595747, "grad_norm": 0.4172796905040741, "learning_rate": 1.3878528489970085e-05, "loss": 1.0396, "step": 111 }, { "epoch": 2.382978723404255, "grad_norm": 0.4409525692462921, "learning_rate": 1.3005747475651238e-05, "loss": 0.9267, "step": 112 }, { "epoch": 2.404255319148936, "grad_norm": 0.49562060832977295, "learning_rate": 1.2157692055841128e-05, "loss": 0.6103, "step": 113 }, { "epoch": 2.425531914893617, "grad_norm": 0.16902895271778107, "learning_rate": 1.1334849939319436e-05, "loss": 0.1601, "step": 114 }, { "epoch": 2.4468085106382977, "grad_norm": 0.5262559652328491, "learning_rate": 1.0537694334929756e-05, "loss": 1.2164, "step": 115 }, { "epoch": 2.4680851063829787, "grad_norm": 0.45780274271965027, "learning_rate": 9.766683679441566e-06, "loss": 1.2017, "step": 116 }, { "epoch": 2.4893617021276597, "grad_norm": 0.4124818444252014, "learning_rate": 9.022261373907599e-06, "loss": 1.0688, "step": 117 }, { "epoch": 2.5106382978723403, "grad_norm": 0.41770249605178833, "learning_rate": 8.304855528667915e-06, "loss": 0.893, "step": 118 }, { "epoch": 2.5319148936170213, "grad_norm": 0.5359745025634766, "learning_rate": 7.614878717147731e-06, "loss": 0.734, "step": 119 }, { "epoch": 2.5531914893617023, "grad_norm": 0.12150562554597855, "learning_rate": 6.952727738590198e-06, "loss": 0.0908, "step": 120 }, { "epoch": 2.574468085106383, "grad_norm": 0.45060330629348755, "learning_rate": 6.318783389860888e-06, "loss": 0.9969, "step": 121 }, { "epoch": 2.595744680851064, "grad_norm": 0.4552464485168457, "learning_rate": 5.7134102464550925e-06, "loss": 1.2133, "step": 122 }, { "epoch": 2.617021276595745, "grad_norm": 0.41424861550331116, "learning_rate": 5.136956452833776e-06, "loss": 1.0531, "step": 123 }, { "epoch": 2.6382978723404253, "grad_norm": 0.4196318984031677, "learning_rate": 4.589753522209003e-06, "loss": 0.9811, "step": 124 }, { "epoch": 2.6595744680851063, "grad_norm": 0.4966506063938141, "learning_rate": 4.072116145893723e-06, "loss": 0.8532, "step": 125 }, { "epoch": 2.6808510638297873, "grad_norm": 0.2934734523296356, "learning_rate": 3.584342012325771e-06, "loss": 0.1873, "step": 126 }, { "epoch": 2.702127659574468, "grad_norm": 0.3809848725795746, "learning_rate": 3.126711635869966e-06, "loss": 0.6348, "step": 127 }, { "epoch": 2.723404255319149, "grad_norm": 0.463334858417511, "learning_rate": 2.699488195496971e-06, "loss": 1.2586, "step": 128 }, { "epoch": 2.74468085106383, "grad_norm": 0.43426749110221863, "learning_rate": 2.3029173834314634e-06, "loss": 1.1442, "step": 129 }, { "epoch": 2.7659574468085104, "grad_norm": 0.4165303111076355, "learning_rate": 1.9372272638568494e-06, "loss": 1.0423, "step": 130 }, { "epoch": 2.7872340425531914, "grad_norm": 0.48008161783218384, "learning_rate": 1.6026281417576689e-06, "loss": 0.8429, "step": 131 }, { "epoch": 2.8085106382978724, "grad_norm": 0.45738592743873596, "learning_rate": 1.299312441975153e-06, "loss": 0.3767, "step": 132 }, { "epoch": 2.829787234042553, "grad_norm": 0.350779265165329, "learning_rate": 1.0274545985455078e-06, "loss": 0.5226, "step": 133 }, { "epoch": 2.851063829787234, "grad_norm": 0.49393415451049805, "learning_rate": 7.872109543844799e-07, "loss": 1.2395, "step": 134 }, { "epoch": 2.872340425531915, "grad_norm": 0.4229400157928467, "learning_rate": 5.787196713760618e-07, "loss": 1.1422, "step": 135 }, { "epoch": 2.8936170212765955, "grad_norm": 0.42691662907600403, "learning_rate": 4.021006509168048e-07, "loss": 1.0496, "step": 136 }, { "epoch": 2.9148936170212765, "grad_norm": 0.4528738260269165, "learning_rate": 2.574554649617209e-07, "loss": 0.905, "step": 137 }, { "epoch": 2.9361702127659575, "grad_norm": 0.4881468713283539, "learning_rate": 1.4486729761113447e-07, "loss": 0.5696, "step": 138 }, { "epoch": 2.9574468085106385, "grad_norm": 0.412494957447052, "learning_rate": 6.440089727230269e-08, "loss": 0.7729, "step": 139 }, { "epoch": 2.978723404255319, "grad_norm": 0.4168694317340851, "learning_rate": 1.6102539423217266e-08, "loss": 1.1272, "step": 140 }, { "epoch": 3.0, "grad_norm": 0.48817628622055054, "learning_rate": 0.0, "loss": 0.8255, "step": 141 } ], "logging_steps": 1, "max_steps": 141, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.466930952990884e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }