{ "best_metric": 1.1565907001495361, "best_model_checkpoint": "./wav2vec-bert-korean-dialect-recognition/checkpoint-465000", "epoch": 10.0, "eval_steps": 5000, "global_step": 514250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0035555555555555557, "grad_norm": 0.34849223494529724, "learning_rate": 1e-05, "loss": 1.7918, "step": 100 }, { "epoch": 0.0071111111111111115, "grad_norm": 0.5689309239387512, "learning_rate": 2e-05, "loss": 1.7899, "step": 200 }, { "epoch": 0.010666666666666666, "grad_norm": 0.5689606070518494, "learning_rate": 3e-05, "loss": 1.7863, "step": 300 }, { "epoch": 0.014222222222222223, "grad_norm": 0.4549981355667114, "learning_rate": 4e-05, "loss": 1.7809, "step": 400 }, { "epoch": 0.017777777777777778, "grad_norm": 0.7047455906867981, "learning_rate": 5e-05, "loss": 1.779, "step": 500 }, { "epoch": 0.021333333333333333, "grad_norm": 0.6272245645523071, "learning_rate": 4.998219056099733e-05, "loss": 1.7771, "step": 600 }, { "epoch": 0.024888888888888887, "grad_norm": 0.29775723814964294, "learning_rate": 4.996438112199466e-05, "loss": 1.7631, "step": 700 }, { "epoch": 0.028444444444444446, "grad_norm": 0.36544644832611084, "learning_rate": 4.994657168299199e-05, "loss": 1.7696, "step": 800 }, { "epoch": 0.032, "grad_norm": 0.4729597866535187, "learning_rate": 4.9928762243989316e-05, "loss": 1.757, "step": 900 }, { "epoch": 0.035555555555555556, "grad_norm": 0.4762429893016815, "learning_rate": 4.9910952804986644e-05, "loss": 1.757, "step": 1000 }, { "epoch": 0.035555555555555556, "eval_accuracy": 0.19341666666666665, "eval_f1": 0.1444328505167073, "eval_loss": 1.808000922203064, "eval_precision": 0.3630348293356796, "eval_recall": 0.19341666666666665, "eval_runtime": 6903.8383, "eval_samples_per_second": 26.072, "eval_steps_per_second": 0.407, "step": 1000 }, { "epoch": 0.03911111111111111, "grad_norm": 0.8917739987373352, "learning_rate": 4.989314336598397e-05, "loss": 1.7562, "step": 1100 }, { "epoch": 0.042666666666666665, "grad_norm": 0.4020093083381653, "learning_rate": 4.987533392698131e-05, "loss": 1.7489, "step": 1200 }, { "epoch": 0.04622222222222222, "grad_norm": 0.5697293281555176, "learning_rate": 4.985752448797863e-05, "loss": 1.7565, "step": 1300 }, { "epoch": 0.049777777777777775, "grad_norm": 0.6953628659248352, "learning_rate": 4.983971504897596e-05, "loss": 1.7526, "step": 1400 }, { "epoch": 0.05333333333333334, "grad_norm": 0.9041539430618286, "learning_rate": 4.982190560997329e-05, "loss": 1.7488, "step": 1500 }, { "epoch": 0.05688888888888889, "grad_norm": 0.4119342863559723, "learning_rate": 4.9804096170970614e-05, "loss": 1.7444, "step": 1600 }, { "epoch": 0.060444444444444446, "grad_norm": 0.7086865901947021, "learning_rate": 4.978628673196795e-05, "loss": 1.7455, "step": 1700 }, { "epoch": 0.064, "grad_norm": 0.7648047804832458, "learning_rate": 4.976847729296528e-05, "loss": 1.7394, "step": 1800 }, { "epoch": 0.06755555555555555, "grad_norm": 0.4442588984966278, "learning_rate": 4.97506678539626e-05, "loss": 1.7353, "step": 1900 }, { "epoch": 0.07111111111111111, "grad_norm": 0.28793784976005554, "learning_rate": 4.9732858414959934e-05, "loss": 1.7207, "step": 2000 }, { "epoch": 0.07111111111111111, "eval_accuracy": 0.18577222222222223, "eval_f1": 0.11668451950466963, "eval_loss": 1.8230526447296143, "eval_precision": 0.4509605125783309, "eval_recall": 0.18577222222222223, "eval_runtime": 6900.5074, "eval_samples_per_second": 26.085, "eval_steps_per_second": 0.408, "step": 2000 }, { "epoch": 0.07466666666666667, "grad_norm": 0.48562493920326233, "learning_rate": 4.971522707034729e-05, "loss": 1.7302, "step": 2100 }, { "epoch": 0.07822222222222222, "grad_norm": 0.6742926239967346, "learning_rate": 4.9697417631344616e-05, "loss": 1.7172, "step": 2200 }, { "epoch": 0.08177777777777778, "grad_norm": 0.38361403346061707, "learning_rate": 4.9679608192341944e-05, "loss": 1.7244, "step": 2300 }, { "epoch": 0.08533333333333333, "grad_norm": 0.4890897870063782, "learning_rate": 4.966179875333927e-05, "loss": 1.7228, "step": 2400 }, { "epoch": 0.08888888888888889, "grad_norm": 0.9702339768409729, "learning_rate": 4.96439893143366e-05, "loss": 1.7231, "step": 2500 }, { "epoch": 0.09244444444444444, "grad_norm": 0.8490220904350281, "learning_rate": 4.962617987533393e-05, "loss": 1.7227, "step": 2600 }, { "epoch": 0.096, "grad_norm": 0.9444390535354614, "learning_rate": 4.960837043633126e-05, "loss": 1.7238, "step": 2700 }, { "epoch": 0.09955555555555555, "grad_norm": 1.1613951921463013, "learning_rate": 4.9590560997328586e-05, "loss": 1.7226, "step": 2800 }, { "epoch": 0.10311111111111111, "grad_norm": 0.2626320421695709, "learning_rate": 4.9572751558325914e-05, "loss": 1.7004, "step": 2900 }, { "epoch": 0.10666666666666667, "grad_norm": 0.4100841283798218, "learning_rate": 4.955494211932324e-05, "loss": 1.7191, "step": 3000 }, { "epoch": 0.10666666666666667, "eval_accuracy": 0.2318611111111111, "eval_f1": 0.1955747254747818, "eval_loss": 1.7802455425262451, "eval_precision": 0.3030764829783232, "eval_recall": 0.2318611111111111, "eval_runtime": 6901.4182, "eval_samples_per_second": 26.082, "eval_steps_per_second": 0.408, "step": 3000 }, { "epoch": 0.11022222222222222, "grad_norm": 0.38032007217407227, "learning_rate": 4.953713268032058e-05, "loss": 1.7079, "step": 3100 }, { "epoch": 0.11377777777777778, "grad_norm": 0.4086116850376129, "learning_rate": 4.95193232413179e-05, "loss": 1.6926, "step": 3200 }, { "epoch": 0.11733333333333333, "grad_norm": 0.791631817817688, "learning_rate": 4.950151380231523e-05, "loss": 1.6944, "step": 3300 }, { "epoch": 0.12088888888888889, "grad_norm": 0.777863621711731, "learning_rate": 4.948370436331256e-05, "loss": 1.6868, "step": 3400 }, { "epoch": 0.12444444444444444, "grad_norm": 0.4967781603336334, "learning_rate": 4.9465894924309884e-05, "loss": 1.6882, "step": 3500 }, { "epoch": 0.128, "grad_norm": 0.7300374507904053, "learning_rate": 4.944808548530722e-05, "loss": 1.6983, "step": 3600 }, { "epoch": 0.13155555555555556, "grad_norm": 0.7436855435371399, "learning_rate": 4.943027604630454e-05, "loss": 1.691, "step": 3700 }, { "epoch": 0.1351111111111111, "grad_norm": 1.6240664720535278, "learning_rate": 4.941246660730187e-05, "loss": 1.6818, "step": 3800 }, { "epoch": 0.13866666666666666, "grad_norm": 0.8324419260025024, "learning_rate": 4.9394657168299205e-05, "loss": 1.6873, "step": 3900 }, { "epoch": 0.14222222222222222, "grad_norm": 0.5409539937973022, "learning_rate": 4.9376847729296526e-05, "loss": 1.6801, "step": 4000 }, { "epoch": 0.14222222222222222, "eval_accuracy": 0.2709666666666667, "eval_f1": 0.24834467168328822, "eval_loss": 1.7570682764053345, "eval_precision": 0.31626692244901133, "eval_recall": 0.2709666666666667, "eval_runtime": 6899.1111, "eval_samples_per_second": 26.09, "eval_steps_per_second": 0.408, "step": 4000 }, { "epoch": 0.14577777777777778, "grad_norm": 0.42626267671585083, "learning_rate": 4.9359216384683886e-05, "loss": 1.6828, "step": 4100 }, { "epoch": 0.14933333333333335, "grad_norm": 0.9065359830856323, "learning_rate": 4.9341406945681214e-05, "loss": 1.6818, "step": 4200 }, { "epoch": 0.15288888888888888, "grad_norm": 0.7950819730758667, "learning_rate": 4.932359750667854e-05, "loss": 1.678, "step": 4300 }, { "epoch": 0.15644444444444444, "grad_norm": 0.43593984842300415, "learning_rate": 4.930578806767587e-05, "loss": 1.6651, "step": 4400 }, { "epoch": 0.16, "grad_norm": 0.8693380355834961, "learning_rate": 4.92879786286732e-05, "loss": 1.654, "step": 4500 }, { "epoch": 0.16355555555555557, "grad_norm": 0.7723489999771118, "learning_rate": 4.927016918967053e-05, "loss": 1.669, "step": 4600 }, { "epoch": 0.1671111111111111, "grad_norm": 1.067395806312561, "learning_rate": 4.9252359750667856e-05, "loss": 1.6559, "step": 4700 }, { "epoch": 0.17066666666666666, "grad_norm": 0.6594125032424927, "learning_rate": 4.9234550311665185e-05, "loss": 1.6652, "step": 4800 }, { "epoch": 0.17422222222222222, "grad_norm": 0.8139324188232422, "learning_rate": 4.921674087266251e-05, "loss": 1.671, "step": 4900 }, { "epoch": 0.17777777777777778, "grad_norm": 0.641225278377533, "learning_rate": 4.919893143365984e-05, "loss": 1.6729, "step": 5000 }, { "epoch": 0.17777777777777778, "eval_accuracy": 0.316, "eval_f1": 0.30710382220672644, "eval_loss": 1.7127083539962769, "eval_precision": 0.3273503090794928, "eval_recall": 0.316, "eval_runtime": 6895.662, "eval_samples_per_second": 26.103, "eval_steps_per_second": 0.408, "step": 5000 }, { "epoch": 0.18133333333333335, "grad_norm": 0.7610512971878052, "learning_rate": 4.918112199465717e-05, "loss": 1.655, "step": 5100 }, { "epoch": 0.18488888888888888, "grad_norm": 1.1036220788955688, "learning_rate": 4.91633125556545e-05, "loss": 1.6436, "step": 5200 }, { "epoch": 0.18844444444444444, "grad_norm": 0.7507835626602173, "learning_rate": 4.9145503116651826e-05, "loss": 1.6401, "step": 5300 }, { "epoch": 0.192, "grad_norm": 0.9475215673446655, "learning_rate": 4.9127693677649155e-05, "loss": 1.643, "step": 5400 }, { "epoch": 0.19555555555555557, "grad_norm": 0.9549380540847778, "learning_rate": 4.910988423864648e-05, "loss": 1.6428, "step": 5500 }, { "epoch": 0.1991111111111111, "grad_norm": 0.6130171418190002, "learning_rate": 4.909207479964381e-05, "loss": 1.657, "step": 5600 }, { "epoch": 0.20266666666666666, "grad_norm": 0.42279860377311707, "learning_rate": 4.907426536064114e-05, "loss": 1.654, "step": 5700 }, { "epoch": 0.20622222222222222, "grad_norm": 0.42804285883903503, "learning_rate": 4.9056455921638475e-05, "loss": 1.623, "step": 5800 }, { "epoch": 0.20977777777777779, "grad_norm": 0.4771113693714142, "learning_rate": 4.9038646482635796e-05, "loss": 1.6426, "step": 5900 }, { "epoch": 0.21333333333333335, "grad_norm": 1.0067540407180786, "learning_rate": 4.9020837043633125e-05, "loss": 1.6273, "step": 6000 }, { "epoch": 0.21333333333333335, "eval_accuracy": 0.2663388888888889, "eval_f1": 0.22778708359054992, "eval_loss": 1.7038311958312988, "eval_precision": 0.3393060721774552, "eval_recall": 0.2663388888888889, "eval_runtime": 6901.6294, "eval_samples_per_second": 26.081, "eval_steps_per_second": 0.408, "step": 6000 }, { "epoch": 0.21688888888888888, "grad_norm": 2.9190707206726074, "learning_rate": 4.9003205699020485e-05, "loss": 1.6362, "step": 6100 }, { "epoch": 0.22044444444444444, "grad_norm": 1.7024515867233276, "learning_rate": 4.898539626001781e-05, "loss": 1.6353, "step": 6200 }, { "epoch": 0.224, "grad_norm": 0.7489303946495056, "learning_rate": 4.8967586821015135e-05, "loss": 1.6451, "step": 6300 }, { "epoch": 0.22755555555555557, "grad_norm": 0.676858127117157, "learning_rate": 4.894977738201247e-05, "loss": 1.6572, "step": 6400 }, { "epoch": 0.2311111111111111, "grad_norm": 2.145578145980835, "learning_rate": 4.89319679430098e-05, "loss": 1.6229, "step": 6500 }, { "epoch": 0.23466666666666666, "grad_norm": 1.2439501285552979, "learning_rate": 4.8914158504007126e-05, "loss": 1.6217, "step": 6600 }, { "epoch": 0.23822222222222222, "grad_norm": 1.0118827819824219, "learning_rate": 4.8896349065004455e-05, "loss": 1.648, "step": 6700 }, { "epoch": 0.24177777777777779, "grad_norm": 0.609039843082428, "learning_rate": 4.887853962600178e-05, "loss": 1.5899, "step": 6800 }, { "epoch": 0.24533333333333332, "grad_norm": 0.5727493762969971, "learning_rate": 4.886073018699911e-05, "loss": 1.6225, "step": 6900 }, { "epoch": 0.24888888888888888, "grad_norm": 0.5670767426490784, "learning_rate": 4.884292074799644e-05, "loss": 1.638, "step": 7000 }, { "epoch": 0.24888888888888888, "eval_accuracy": 0.3340166666666667, "eval_f1": 0.2974824945957891, "eval_loss": 1.6556248664855957, "eval_precision": 0.336487348404201, "eval_recall": 0.33401666666666663, "eval_runtime": 6901.7384, "eval_samples_per_second": 26.08, "eval_steps_per_second": 0.408, "step": 7000 }, { "epoch": 0.25244444444444447, "grad_norm": 0.7777906656265259, "learning_rate": 4.882511130899377e-05, "loss": 1.6207, "step": 7100 }, { "epoch": 0.256, "grad_norm": 0.6265078186988831, "learning_rate": 4.8807301869991096e-05, "loss": 1.5886, "step": 7200 }, { "epoch": 0.25955555555555554, "grad_norm": 1.0755648612976074, "learning_rate": 4.8789492430988425e-05, "loss": 1.6486, "step": 7300 }, { "epoch": 0.26311111111111113, "grad_norm": 0.45006299018859863, "learning_rate": 4.877168299198575e-05, "loss": 1.6193, "step": 7400 }, { "epoch": 0.26666666666666666, "grad_norm": 0.6583587527275085, "learning_rate": 4.875387355298308e-05, "loss": 1.6144, "step": 7500 }, { "epoch": 0.2702222222222222, "grad_norm": 0.6178300976753235, "learning_rate": 4.873606411398041e-05, "loss": 1.6237, "step": 7600 }, { "epoch": 0.2737777777777778, "grad_norm": 1.8750592470169067, "learning_rate": 4.8718254674977745e-05, "loss": 1.6085, "step": 7700 }, { "epoch": 0.2773333333333333, "grad_norm": 0.7604326009750366, "learning_rate": 4.8700445235975067e-05, "loss": 1.6266, "step": 7800 }, { "epoch": 0.2808888888888889, "grad_norm": 1.083824634552002, "learning_rate": 4.8682635796972395e-05, "loss": 1.6221, "step": 7900 }, { "epoch": 0.28444444444444444, "grad_norm": 1.174501895904541, "learning_rate": 4.866482635796973e-05, "loss": 1.6088, "step": 8000 }, { "epoch": 0.28444444444444444, "eval_accuracy": 0.3466888888888889, "eval_f1": 0.30295230645057153, "eval_loss": 1.6231971979141235, "eval_precision": 0.35294347851231134, "eval_recall": 0.3466888888888889, "eval_runtime": 6903.4464, "eval_samples_per_second": 26.074, "eval_steps_per_second": 0.407, "step": 8000 }, { "epoch": 0.288, "grad_norm": 0.7184209823608398, "learning_rate": 4.864719501335708e-05, "loss": 1.5824, "step": 8100 }, { "epoch": 0.29155555555555557, "grad_norm": 1.433828353881836, "learning_rate": 4.8629385574354405e-05, "loss": 1.6032, "step": 8200 }, { "epoch": 0.2951111111111111, "grad_norm": 0.8803566694259644, "learning_rate": 4.861157613535174e-05, "loss": 1.596, "step": 8300 }, { "epoch": 0.2986666666666667, "grad_norm": 0.8585767149925232, "learning_rate": 4.859376669634907e-05, "loss": 1.5911, "step": 8400 }, { "epoch": 0.3022222222222222, "grad_norm": 1.2960944175720215, "learning_rate": 4.8575957257346397e-05, "loss": 1.6054, "step": 8500 }, { "epoch": 0.30577777777777776, "grad_norm": 1.4361809492111206, "learning_rate": 4.8558147818343725e-05, "loss": 1.6076, "step": 8600 }, { "epoch": 0.30933333333333335, "grad_norm": 0.27110278606414795, "learning_rate": 4.854033837934105e-05, "loss": 1.581, "step": 8700 }, { "epoch": 0.3128888888888889, "grad_norm": 1.219621181488037, "learning_rate": 4.852252894033838e-05, "loss": 1.5964, "step": 8800 }, { "epoch": 0.3164444444444444, "grad_norm": 1.1345587968826294, "learning_rate": 4.850471950133571e-05, "loss": 1.5969, "step": 8900 }, { "epoch": 0.32, "grad_norm": 0.9537592530250549, "learning_rate": 4.848691006233304e-05, "loss": 1.6045, "step": 9000 }, { "epoch": 0.32, "eval_accuracy": 0.3677666666666667, "eval_f1": 0.3466501694189074, "eval_loss": 1.6154073476791382, "eval_precision": 0.37185325506278916, "eval_recall": 0.36776666666666663, "eval_runtime": 6911.5391, "eval_samples_per_second": 26.043, "eval_steps_per_second": 0.407, "step": 9000 }, { "epoch": 0.3539065842180998, "grad_norm": 1.5688387155532837, "learning_rate": 4.8325215290496045e-05, "loss": 1.5906, "step": 9100 }, { "epoch": 0.35779566756115583, "grad_norm": 0.9865477681159973, "learning_rate": 4.8305731987686555e-05, "loss": 1.6037, "step": 9200 }, { "epoch": 0.36168475090421187, "grad_norm": 2.3448054790496826, "learning_rate": 4.828624868487706e-05, "loss": 1.5608, "step": 9300 }, { "epoch": 0.3655738342472679, "grad_norm": 2.508720636367798, "learning_rate": 4.826676538206757e-05, "loss": 1.6175, "step": 9400 }, { "epoch": 0.36946291759032396, "grad_norm": 0.7456146478652954, "learning_rate": 4.824728207925808e-05, "loss": 1.587, "step": 9500 }, { "epoch": 0.37335200093338, "grad_norm": 1.0600403547286987, "learning_rate": 4.822779877644859e-05, "loss": 1.5726, "step": 9600 }, { "epoch": 0.37724108427643605, "grad_norm": 1.3075721263885498, "learning_rate": 4.820831547363909e-05, "loss": 1.5604, "step": 9700 }, { "epoch": 0.3811301676194921, "grad_norm": 0.9544525146484375, "learning_rate": 4.81888321708296e-05, "loss": 1.5793, "step": 9800 }, { "epoch": 0.38501925096254813, "grad_norm": 1.5886527299880981, "learning_rate": 4.816934886802011e-05, "loss": 1.5832, "step": 9900 }, { "epoch": 0.3889083343056042, "grad_norm": 1.7134085893630981, "learning_rate": 4.8149865565210614e-05, "loss": 1.5529, "step": 10000 }, { "epoch": 0.3889083343056042, "eval_accuracy": 0.3898111111111111, "eval_f1": 0.35573873891655433, "eval_loss": 1.5715184211730957, "eval_precision": 0.38197948036795637, "eval_recall": 0.3898111111111111, "eval_runtime": 6925.498, "eval_samples_per_second": 25.991, "eval_steps_per_second": 0.406, "step": 10000 }, { "epoch": 0.3927974176486602, "grad_norm": 0.8219287395477295, "learning_rate": 4.8130382262401124e-05, "loss": 1.5336, "step": 10100 }, { "epoch": 0.39668650099171626, "grad_norm": 0.6594845056533813, "learning_rate": 4.8110898959591634e-05, "loss": 1.588, "step": 10200 }, { "epoch": 0.4005755843347723, "grad_norm": 0.9765191674232483, "learning_rate": 4.8091415656782144e-05, "loss": 1.532, "step": 10300 }, { "epoch": 0.40446466767782835, "grad_norm": 1.4016308784484863, "learning_rate": 4.807193235397265e-05, "loss": 1.5627, "step": 10400 }, { "epoch": 0.4083537510208844, "grad_norm": 1.0591779947280884, "learning_rate": 4.8052449051163157e-05, "loss": 1.6455, "step": 10500 }, { "epoch": 0.41224283436394044, "grad_norm": 1.0856883525848389, "learning_rate": 4.8032965748353666e-05, "loss": 1.5797, "step": 10600 }, { "epoch": 0.4161319177069965, "grad_norm": 0.7659090161323547, "learning_rate": 4.801348244554417e-05, "loss": 1.5778, "step": 10700 }, { "epoch": 0.4200210010500525, "grad_norm": 1.5165112018585205, "learning_rate": 4.799399914273468e-05, "loss": 1.6096, "step": 10800 }, { "epoch": 0.42391008439310857, "grad_norm": 1.2313932180404663, "learning_rate": 4.797451583992519e-05, "loss": 1.557, "step": 10900 }, { "epoch": 0.4277991677361646, "grad_norm": 0.760732889175415, "learning_rate": 4.79550325371157e-05, "loss": 1.5729, "step": 11000 }, { "epoch": 0.4277991677361646, "eval_accuracy": 0.3882, "eval_f1": 0.3648581086862601, "eval_loss": 1.56192147731781, "eval_precision": 0.4034139914848996, "eval_recall": 0.38820000000000005, "eval_runtime": 6919.7832, "eval_samples_per_second": 26.012, "eval_steps_per_second": 0.407, "step": 11000 }, { "epoch": 0.43168825107922065, "grad_norm": 1.3028088808059692, "learning_rate": 4.79355492343062e-05, "loss": 1.5451, "step": 11100 }, { "epoch": 0.4355773344222767, "grad_norm": 1.3975151777267456, "learning_rate": 4.791626076452481e-05, "loss": 1.5874, "step": 11200 }, { "epoch": 0.4394664177653327, "grad_norm": 1.667127013206482, "learning_rate": 4.789677746171532e-05, "loss": 1.5884, "step": 11300 }, { "epoch": 0.4433555011083887, "grad_norm": 1.2656354904174805, "learning_rate": 4.787729415890582e-05, "loss": 1.578, "step": 11400 }, { "epoch": 0.44724458445144477, "grad_norm": 0.48697930574417114, "learning_rate": 4.785781085609633e-05, "loss": 1.6215, "step": 11500 }, { "epoch": 0.4511336677945008, "grad_norm": 1.3055481910705566, "learning_rate": 4.783832755328684e-05, "loss": 1.5442, "step": 11600 }, { "epoch": 0.45502275113755686, "grad_norm": 2.0546064376831055, "learning_rate": 4.7818844250477343e-05, "loss": 1.5628, "step": 11700 }, { "epoch": 0.4589118344806129, "grad_norm": 0.6086755990982056, "learning_rate": 4.7799360947667847e-05, "loss": 1.618, "step": 11800 }, { "epoch": 0.46280091782366894, "grad_norm": 1.1556247472763062, "learning_rate": 4.7779877644858356e-05, "loss": 1.5716, "step": 11900 }, { "epoch": 0.466690001166725, "grad_norm": 2.299273729324341, "learning_rate": 4.7760394342048866e-05, "loss": 1.5647, "step": 12000 }, { "epoch": 0.466690001166725, "eval_accuracy": 0.4043388888888889, "eval_f1": 0.3772589933957149, "eval_loss": 1.5250067710876465, "eval_precision": 0.4065588393160107, "eval_recall": 0.40433888888888886, "eval_runtime": 6919.8553, "eval_samples_per_second": 26.012, "eval_steps_per_second": 0.407, "step": 12000 }, { "epoch": 0.47057908450978103, "grad_norm": 0.9896708130836487, "learning_rate": 4.774091103923937e-05, "loss": 1.5806, "step": 12100 }, { "epoch": 0.4744681678528371, "grad_norm": 1.1260895729064941, "learning_rate": 4.772142773642988e-05, "loss": 1.5557, "step": 12200 }, { "epoch": 0.4783572511958931, "grad_norm": 1.2843358516693115, "learning_rate": 4.770194443362039e-05, "loss": 1.5663, "step": 12300 }, { "epoch": 0.48224633453894916, "grad_norm": 2.415924549102783, "learning_rate": 4.76824611308109e-05, "loss": 1.584, "step": 12400 }, { "epoch": 0.4861354178820052, "grad_norm": 1.0588767528533936, "learning_rate": 4.76629778280014e-05, "loss": 1.5506, "step": 12500 }, { "epoch": 0.49002450122506125, "grad_norm": 0.8469063639640808, "learning_rate": 4.764349452519191e-05, "loss": 1.5539, "step": 12600 }, { "epoch": 0.4939135845681173, "grad_norm": 2.404540538787842, "learning_rate": 4.762401122238242e-05, "loss": 1.5237, "step": 12700 }, { "epoch": 0.49780266791117334, "grad_norm": 7.226748466491699, "learning_rate": 4.7604527919572925e-05, "loss": 1.5715, "step": 12800 }, { "epoch": 0.5016917512542294, "grad_norm": 0.8756240010261536, "learning_rate": 4.7585044616763435e-05, "loss": 1.5263, "step": 12900 }, { "epoch": 0.5055808345972854, "grad_norm": 0.7545004487037659, "learning_rate": 4.7565561313953945e-05, "loss": 1.5344, "step": 13000 }, { "epoch": 0.5055808345972854, "eval_accuracy": 0.42311666666666664, "eval_f1": 0.39571748310251914, "eval_loss": 1.5100597143173218, "eval_precision": 0.42507812210373896, "eval_recall": 0.42311666666666664, "eval_runtime": 6919.7379, "eval_samples_per_second": 26.013, "eval_steps_per_second": 0.407, "step": 13000 }, { "epoch": 0.5094699179403415, "grad_norm": 4.258695125579834, "learning_rate": 4.754607801114445e-05, "loss": 1.5387, "step": 13100 }, { "epoch": 0.5133590012833975, "grad_norm": 1.6335220336914062, "learning_rate": 4.752659470833496e-05, "loss": 1.5423, "step": 13200 }, { "epoch": 0.5172480846264536, "grad_norm": 1.2486385107040405, "learning_rate": 4.750730623855356e-05, "loss": 1.5211, "step": 13300 }, { "epoch": 0.5211371679695096, "grad_norm": 1.5692938566207886, "learning_rate": 4.7487822935744066e-05, "loss": 1.566, "step": 13400 }, { "epoch": 0.5250262513125656, "grad_norm": 1.6601624488830566, "learning_rate": 4.7468339632934576e-05, "loss": 1.5386, "step": 13500 }, { "epoch": 0.5289153346556217, "grad_norm": 2.1174402236938477, "learning_rate": 4.7448856330125086e-05, "loss": 1.515, "step": 13600 }, { "epoch": 0.5328044179986777, "grad_norm": 2.547882318496704, "learning_rate": 4.7429373027315596e-05, "loss": 1.5932, "step": 13700 }, { "epoch": 0.5366935013417338, "grad_norm": 1.103047490119934, "learning_rate": 4.74098897245061e-05, "loss": 1.6018, "step": 13800 }, { "epoch": 0.5405825846847898, "grad_norm": 0.9051464200019836, "learning_rate": 4.739040642169661e-05, "loss": 1.5514, "step": 13900 }, { "epoch": 0.5444716680278459, "grad_norm": 1.6535760164260864, "learning_rate": 4.737092311888712e-05, "loss": 1.558, "step": 14000 }, { "epoch": 0.5444716680278459, "eval_accuracy": 0.42880555555555555, "eval_f1": 0.4051677096770836, "eval_loss": 1.4953252077102661, "eval_precision": 0.4248738032823844, "eval_recall": 0.4288055555555556, "eval_runtime": 6923.2306, "eval_samples_per_second": 25.999, "eval_steps_per_second": 0.406, "step": 14000 }, { "epoch": 0.5483607513709019, "grad_norm": 0.698728621006012, "learning_rate": 4.735143981607762e-05, "loss": 1.53, "step": 14100 }, { "epoch": 0.5522498347139579, "grad_norm": 1.4138538837432861, "learning_rate": 4.733195651326813e-05, "loss": 1.5376, "step": 14200 }, { "epoch": 0.556138918057014, "grad_norm": 1.6942106485366821, "learning_rate": 4.731247321045864e-05, "loss": 1.5463, "step": 14300 }, { "epoch": 0.56002800140007, "grad_norm": 2.12391996383667, "learning_rate": 4.729298990764915e-05, "loss": 1.6055, "step": 14400 }, { "epoch": 0.5639170847431261, "grad_norm": 1.8027210235595703, "learning_rate": 4.7273506604839655e-05, "loss": 1.5587, "step": 14500 }, { "epoch": 0.5678061680861821, "grad_norm": 1.587385892868042, "learning_rate": 4.7254023302030165e-05, "loss": 1.5662, "step": 14600 }, { "epoch": 0.5716952514292382, "grad_norm": 1.7555272579193115, "learning_rate": 4.7234539999220675e-05, "loss": 1.6194, "step": 14700 }, { "epoch": 0.5755843347722942, "grad_norm": 1.5254045724868774, "learning_rate": 4.721505669641118e-05, "loss": 1.5865, "step": 14800 }, { "epoch": 0.5794734181153502, "grad_norm": 0.6278159022331238, "learning_rate": 4.719557339360168e-05, "loss": 1.5911, "step": 14900 }, { "epoch": 0.5833625014584063, "grad_norm": 0.8578253984451294, "learning_rate": 4.717609009079219e-05, "loss": 1.5119, "step": 15000 }, { "epoch": 0.5833625014584063, "eval_accuracy": 0.4318222222222222, "eval_f1": 0.4107886142987979, "eval_loss": 1.4901130199432373, "eval_precision": 0.432587371193817, "eval_recall": 0.43182222222222216, "eval_runtime": 6918.9918, "eval_samples_per_second": 26.015, "eval_steps_per_second": 0.407, "step": 15000 }, { "epoch": 0.5872515848014623, "grad_norm": 3.854048013687134, "learning_rate": 4.71566067879827e-05, "loss": 1.5255, "step": 15100 }, { "epoch": 0.5911406681445184, "grad_norm": 1.5823243856430054, "learning_rate": 4.7137123485173204e-05, "loss": 1.5273, "step": 15200 }, { "epoch": 0.5950297514875744, "grad_norm": 0.5427559018135071, "learning_rate": 4.711783501539181e-05, "loss": 1.5768, "step": 15300 }, { "epoch": 0.5989188348306305, "grad_norm": 1.58596670627594, "learning_rate": 4.709835171258232e-05, "loss": 1.5612, "step": 15400 }, { "epoch": 0.6028079181736865, "grad_norm": 1.3784738779067993, "learning_rate": 4.707886840977282e-05, "loss": 1.5585, "step": 15500 }, { "epoch": 0.6066970015167426, "grad_norm": 1.6441869735717773, "learning_rate": 4.705938510696333e-05, "loss": 1.5477, "step": 15600 }, { "epoch": 0.6105860848597986, "grad_norm": 1.1442049741744995, "learning_rate": 4.703990180415384e-05, "loss": 1.5434, "step": 15700 }, { "epoch": 0.6144751682028546, "grad_norm": 2.200047492980957, "learning_rate": 4.702041850134435e-05, "loss": 1.4949, "step": 15800 }, { "epoch": 0.6183642515459107, "grad_norm": 1.3911105394363403, "learning_rate": 4.7000935198534855e-05, "loss": 1.5202, "step": 15900 }, { "epoch": 0.6222533348889667, "grad_norm": 2.4547626972198486, "learning_rate": 4.6981451895725365e-05, "loss": 1.53, "step": 16000 }, { "epoch": 0.6222533348889667, "eval_accuracy": 0.4374, "eval_f1": 0.42028188743909817, "eval_loss": 1.4725229740142822, "eval_precision": 0.4316194576708012, "eval_recall": 0.43739999999999996, "eval_runtime": 6918.3093, "eval_samples_per_second": 26.018, "eval_steps_per_second": 0.407, "step": 16000 }, { "epoch": 0.6261424182320227, "grad_norm": 1.9801987409591675, "learning_rate": 4.6961968592915875e-05, "loss": 1.5688, "step": 16100 }, { "epoch": 0.6300315015750787, "grad_norm": 0.9716518521308899, "learning_rate": 4.694248529010638e-05, "loss": 1.5018, "step": 16200 }, { "epoch": 0.6339205849181347, "grad_norm": 3.6056666374206543, "learning_rate": 4.692300198729689e-05, "loss": 1.5309, "step": 16300 }, { "epoch": 0.6378096682611908, "grad_norm": 1.1704907417297363, "learning_rate": 4.69035186844874e-05, "loss": 1.5589, "step": 16400 }, { "epoch": 0.6416987516042468, "grad_norm": 2.138425350189209, "learning_rate": 4.688403538167791e-05, "loss": 1.5309, "step": 16500 }, { "epoch": 0.6455878349473029, "grad_norm": 0.9838487505912781, "learning_rate": 4.686455207886841e-05, "loss": 1.5175, "step": 16600 }, { "epoch": 0.6494769182903589, "grad_norm": 1.2963052988052368, "learning_rate": 4.684506877605892e-05, "loss": 1.5032, "step": 16700 }, { "epoch": 0.653366001633415, "grad_norm": 0.5901209115982056, "learning_rate": 4.682558547324943e-05, "loss": 1.6003, "step": 16800 }, { "epoch": 0.657255084976471, "grad_norm": 1.981461763381958, "learning_rate": 4.6806102170439934e-05, "loss": 1.5392, "step": 16900 }, { "epoch": 0.661144168319527, "grad_norm": 1.4778306484222412, "learning_rate": 4.6786618867630443e-05, "loss": 1.5029, "step": 17000 }, { "epoch": 0.661144168319527, "eval_accuracy": 0.43748333333333334, "eval_f1": 0.4130372917016616, "eval_loss": 1.46103036403656, "eval_precision": 0.4316719087591015, "eval_recall": 0.4374833333333334, "eval_runtime": 6918.5408, "eval_samples_per_second": 26.017, "eval_steps_per_second": 0.407, "step": 17000 }, { "epoch": 0.6650332516625831, "grad_norm": 2.8819820880889893, "learning_rate": 4.676713556482095e-05, "loss": 1.5122, "step": 17100 }, { "epoch": 0.6689223350056391, "grad_norm": 2.447704792022705, "learning_rate": 4.674765226201146e-05, "loss": 1.4853, "step": 17200 }, { "epoch": 0.6728114183486952, "grad_norm": 3.050201416015625, "learning_rate": 4.672836379223006e-05, "loss": 1.4987, "step": 17300 }, { "epoch": 0.6767005016917512, "grad_norm": 1.121934413909912, "learning_rate": 4.670888048942057e-05, "loss": 1.527, "step": 17400 }, { "epoch": 0.6805895850348073, "grad_norm": 3.5413713455200195, "learning_rate": 4.668939718661108e-05, "loss": 1.5341, "step": 17500 }, { "epoch": 0.6844786683778633, "grad_norm": 0.6533594131469727, "learning_rate": 4.6669913883801584e-05, "loss": 1.5871, "step": 17600 }, { "epoch": 0.6883677517209194, "grad_norm": 1.762682557106018, "learning_rate": 4.6650430580992094e-05, "loss": 1.5325, "step": 17700 }, { "epoch": 0.6922568350639754, "grad_norm": 2.551069498062134, "learning_rate": 4.6630947278182604e-05, "loss": 1.5108, "step": 17800 }, { "epoch": 0.6961459184070314, "grad_norm": 2.322578191757202, "learning_rate": 4.661146397537311e-05, "loss": 1.5235, "step": 17900 }, { "epoch": 0.7000350017500875, "grad_norm": 1.0806115865707397, "learning_rate": 4.659198067256362e-05, "loss": 1.5406, "step": 18000 }, { "epoch": 0.7000350017500875, "eval_accuracy": 0.4469666666666667, "eval_f1": 0.4340819499206566, "eval_loss": 1.4421067237854004, "eval_precision": 0.45889291209449373, "eval_recall": 0.4469666666666667, "eval_runtime": 6917.7545, "eval_samples_per_second": 26.02, "eval_steps_per_second": 0.407, "step": 18000 }, { "epoch": 0.7039240850931435, "grad_norm": 1.075889229774475, "learning_rate": 4.657249736975413e-05, "loss": 1.5382, "step": 18100 }, { "epoch": 0.7078131684361996, "grad_norm": 2.2131457328796387, "learning_rate": 4.655301406694463e-05, "loss": 1.5015, "step": 18200 }, { "epoch": 0.7117022517792556, "grad_norm": 3.4657328128814697, "learning_rate": 4.6533530764135133e-05, "loss": 1.4986, "step": 18300 }, { "epoch": 0.7155913351223117, "grad_norm": 2.0877413749694824, "learning_rate": 4.651404746132564e-05, "loss": 1.4817, "step": 18400 }, { "epoch": 0.7194804184653677, "grad_norm": 1.4970084428787231, "learning_rate": 4.649456415851615e-05, "loss": 1.4985, "step": 18500 }, { "epoch": 0.7233695018084237, "grad_norm": 2.7417938709259033, "learning_rate": 4.6475080855706656e-05, "loss": 1.6032, "step": 18600 }, { "epoch": 0.7272585851514798, "grad_norm": 1.1277765035629272, "learning_rate": 4.6455597552897166e-05, "loss": 1.5694, "step": 18700 }, { "epoch": 0.7311476684945358, "grad_norm": 2.7301125526428223, "learning_rate": 4.6436114250087676e-05, "loss": 1.5654, "step": 18800 }, { "epoch": 0.7350367518375919, "grad_norm": 1.4314488172531128, "learning_rate": 4.6416630947278186e-05, "loss": 1.5263, "step": 18900 }, { "epoch": 0.7389258351806479, "grad_norm": 0.7808900475502014, "learning_rate": 4.639714764446869e-05, "loss": 1.4774, "step": 19000 }, { "epoch": 0.7389258351806479, "eval_accuracy": 0.4537111111111111, "eval_f1": 0.42821433971791617, "eval_loss": 1.4334732294082642, "eval_precision": 0.4696531876183827, "eval_recall": 0.4537111111111111, "eval_runtime": 6919.4318, "eval_samples_per_second": 26.014, "eval_steps_per_second": 0.407, "step": 19000 }, { "epoch": 0.742814918523704, "grad_norm": 1.5978940725326538, "learning_rate": 4.63776643416592e-05, "loss": 1.5184, "step": 19100 }, { "epoch": 0.74670400186676, "grad_norm": 4.278934001922607, "learning_rate": 4.635818103884971e-05, "loss": 1.47, "step": 19200 }, { "epoch": 0.750593085209816, "grad_norm": 1.205673098564148, "learning_rate": 4.633889256906831e-05, "loss": 1.533, "step": 19300 }, { "epoch": 0.7544821685528721, "grad_norm": 0.9661447405815125, "learning_rate": 4.631940926625882e-05, "loss": 1.5145, "step": 19400 }, { "epoch": 0.7583712518959281, "grad_norm": 2.32865047454834, "learning_rate": 4.629992596344933e-05, "loss": 1.564, "step": 19500 }, { "epoch": 0.7622603352389842, "grad_norm": 1.4572992324829102, "learning_rate": 4.628044266063983e-05, "loss": 1.5504, "step": 19600 }, { "epoch": 0.7661494185820402, "grad_norm": 2.073317527770996, "learning_rate": 4.626095935783034e-05, "loss": 1.4991, "step": 19700 }, { "epoch": 0.7700385019250963, "grad_norm": 1.8326261043548584, "learning_rate": 4.624147605502085e-05, "loss": 1.5183, "step": 19800 }, { "epoch": 0.7739275852681523, "grad_norm": 1.5867055654525757, "learning_rate": 4.622199275221136e-05, "loss": 1.5437, "step": 19900 }, { "epoch": 0.7778166686112084, "grad_norm": 2.9834554195404053, "learning_rate": 4.620250944940186e-05, "loss": 1.5911, "step": 20000 }, { "epoch": 0.7778166686112084, "eval_accuracy": 0.4616777777777778, "eval_f1": 0.4440336453875553, "eval_loss": 1.4153549671173096, "eval_precision": 0.4506276710118032, "eval_recall": 0.4616777777777778, "eval_runtime": 6915.5787, "eval_samples_per_second": 26.028, "eval_steps_per_second": 0.407, "step": 20000 }, { "epoch": 0.7817057519542644, "grad_norm": 2.972684621810913, "learning_rate": 4.618302614659237e-05, "loss": 1.5485, "step": 20100 }, { "epoch": 0.7855948352973204, "grad_norm": 0.7130892276763916, "learning_rate": 4.616354284378288e-05, "loss": 1.5605, "step": 20200 }, { "epoch": 0.7894839186403765, "grad_norm": 1.277632236480713, "learning_rate": 4.6144059540973386e-05, "loss": 1.482, "step": 20300 }, { "epoch": 0.7933730019834325, "grad_norm": 2.062887668609619, "learning_rate": 4.6124576238163896e-05, "loss": 1.4974, "step": 20400 }, { "epoch": 0.7972620853264886, "grad_norm": 1.530131220817566, "learning_rate": 4.6105092935354406e-05, "loss": 1.5198, "step": 20500 }, { "epoch": 0.8011511686695446, "grad_norm": 1.9071687459945679, "learning_rate": 4.6085609632544916e-05, "loss": 1.5459, "step": 20600 }, { "epoch": 0.8050402520126007, "grad_norm": 0.6887432336807251, "learning_rate": 4.606612632973542e-05, "loss": 1.5155, "step": 20700 }, { "epoch": 0.8089293353556567, "grad_norm": 3.5989584922790527, "learning_rate": 4.604664302692593e-05, "loss": 1.626, "step": 20800 }, { "epoch": 0.8128184186987127, "grad_norm": 1.299272894859314, "learning_rate": 4.602715972411644e-05, "loss": 1.5392, "step": 20900 }, { "epoch": 0.8167075020417688, "grad_norm": 1.0159450769424438, "learning_rate": 4.600767642130694e-05, "loss": 1.5075, "step": 21000 }, { "epoch": 0.8167075020417688, "eval_accuracy": 0.43669444444444444, "eval_f1": 0.40434249199589684, "eval_loss": 1.4381849765777588, "eval_precision": 0.4716684253490582, "eval_recall": 0.4366944444444445, "eval_runtime": 6922.4892, "eval_samples_per_second": 26.002, "eval_steps_per_second": 0.406, "step": 21000 }, { "epoch": 0.8205965853848248, "grad_norm": 1.1864993572235107, "learning_rate": 4.598819311849745e-05, "loss": 1.5012, "step": 21100 }, { "epoch": 0.8244856687278809, "grad_norm": 1.2600886821746826, "learning_rate": 4.596870981568796e-05, "loss": 1.609, "step": 21200 }, { "epoch": 0.8283747520709369, "grad_norm": 1.3343676328659058, "learning_rate": 4.594942134590656e-05, "loss": 1.5595, "step": 21300 }, { "epoch": 0.832263835413993, "grad_norm": 1.019792914390564, "learning_rate": 4.592993804309707e-05, "loss": 1.4903, "step": 21400 }, { "epoch": 0.836152918757049, "grad_norm": 1.4472830295562744, "learning_rate": 4.591045474028758e-05, "loss": 1.5286, "step": 21500 }, { "epoch": 0.840042002100105, "grad_norm": 1.3431252241134644, "learning_rate": 4.589097143747808e-05, "loss": 1.527, "step": 21600 }, { "epoch": 0.8439310854431611, "grad_norm": 4.452381610870361, "learning_rate": 4.5871488134668586e-05, "loss": 1.5525, "step": 21700 }, { "epoch": 0.8478201687862171, "grad_norm": 1.3140783309936523, "learning_rate": 4.5852004831859096e-05, "loss": 1.5882, "step": 21800 }, { "epoch": 0.8517092521292732, "grad_norm": 2.004092216491699, "learning_rate": 4.5832521529049606e-05, "loss": 1.4876, "step": 21900 }, { "epoch": 0.8555983354723292, "grad_norm": 2.0795910358428955, "learning_rate": 4.5813038226240116e-05, "loss": 1.4361, "step": 22000 }, { "epoch": 0.8555983354723292, "eval_accuracy": 0.4541611111111111, "eval_f1": 0.44328350625010354, "eval_loss": 1.416523814201355, "eval_precision": 0.4564630413187092, "eval_recall": 0.45416111111111107, "eval_runtime": 6923.181, "eval_samples_per_second": 26.0, "eval_steps_per_second": 0.406, "step": 22000 }, { "epoch": 0.8594874188153853, "grad_norm": 1.5139399766921997, "learning_rate": 4.579355492343062e-05, "loss": 1.499, "step": 22100 }, { "epoch": 0.8633765021584413, "grad_norm": 0.84910649061203, "learning_rate": 4.577407162062113e-05, "loss": 1.4882, "step": 22200 }, { "epoch": 0.8672655855014973, "grad_norm": 1.0115416049957275, "learning_rate": 4.575458831781164e-05, "loss": 1.5866, "step": 22300 }, { "epoch": 0.8711546688445534, "grad_norm": 1.1214168071746826, "learning_rate": 4.573510501500214e-05, "loss": 1.5682, "step": 22400 }, { "epoch": 0.8750437521876093, "grad_norm": 2.3063182830810547, "learning_rate": 4.571562171219265e-05, "loss": 1.5629, "step": 22500 }, { "epoch": 0.8789328355306654, "grad_norm": 1.5863583087921143, "learning_rate": 4.569613840938316e-05, "loss": 1.5198, "step": 22600 }, { "epoch": 0.8828219188737214, "grad_norm": 0.935728907585144, "learning_rate": 4.567665510657367e-05, "loss": 1.4667, "step": 22700 }, { "epoch": 0.8867110022167775, "grad_norm": 1.2688827514648438, "learning_rate": 4.5657171803764174e-05, "loss": 1.496, "step": 22800 }, { "epoch": 0.8906000855598335, "grad_norm": 2.421876907348633, "learning_rate": 4.5637688500954684e-05, "loss": 1.5066, "step": 22900 }, { "epoch": 0.8944891689028895, "grad_norm": 3.670546531677246, "learning_rate": 4.5618205198145194e-05, "loss": 1.5074, "step": 23000 }, { "epoch": 0.8944891689028895, "eval_accuracy": 0.43969444444444444, "eval_f1": 0.42158360365815534, "eval_loss": 1.4402318000793457, "eval_precision": 0.4569888911835038, "eval_recall": 0.43969444444444444, "eval_runtime": 6919.2475, "eval_samples_per_second": 26.014, "eval_steps_per_second": 0.407, "step": 23000 }, { "epoch": 0.8983782522459456, "grad_norm": 1.4025524854660034, "learning_rate": 4.55987218953357e-05, "loss": 1.4495, "step": 23100 }, { "epoch": 0.9022673355890016, "grad_norm": 3.2960994243621826, "learning_rate": 4.557923859252621e-05, "loss": 1.5135, "step": 23200 }, { "epoch": 0.9061564189320577, "grad_norm": 1.7888227701187134, "learning_rate": 4.555995012274481e-05, "loss": 1.5041, "step": 23300 }, { "epoch": 0.9100455022751137, "grad_norm": 1.6867382526397705, "learning_rate": 4.5540466819935315e-05, "loss": 1.4921, "step": 23400 }, { "epoch": 0.9139345856181698, "grad_norm": 2.2651267051696777, "learning_rate": 4.5520983517125825e-05, "loss": 1.5319, "step": 23500 }, { "epoch": 0.9178236689612258, "grad_norm": 1.6917591094970703, "learning_rate": 4.5501500214316335e-05, "loss": 1.559, "step": 23600 }, { "epoch": 0.9217127523042818, "grad_norm": 4.099582195281982, "learning_rate": 4.548201691150684e-05, "loss": 1.5432, "step": 23700 }, { "epoch": 0.9256018356473379, "grad_norm": 6.133367538452148, "learning_rate": 4.546253360869735e-05, "loss": 1.5922, "step": 23800 }, { "epoch": 0.9294909189903939, "grad_norm": 1.169080138206482, "learning_rate": 4.544305030588786e-05, "loss": 1.4567, "step": 23900 }, { "epoch": 0.93338000233345, "grad_norm": 1.4176899194717407, "learning_rate": 4.542356700307837e-05, "loss": 1.5422, "step": 24000 }, { "epoch": 0.93338000233345, "eval_accuracy": 0.43241666666666667, "eval_f1": 0.41637294450105716, "eval_loss": 1.4387420415878296, "eval_precision": 0.46358158354288315, "eval_recall": 0.43241666666666667, "eval_runtime": 6918.5866, "eval_samples_per_second": 26.017, "eval_steps_per_second": 0.407, "step": 24000 }, { "epoch": 0.937269085676506, "grad_norm": 4.524631500244141, "learning_rate": 4.540408370026887e-05, "loss": 1.5353, "step": 24100 }, { "epoch": 0.9411581690195621, "grad_norm": 1.5323764085769653, "learning_rate": 4.538460039745938e-05, "loss": 1.5013, "step": 24200 }, { "epoch": 0.9450472523626181, "grad_norm": 12.01789379119873, "learning_rate": 4.536511709464989e-05, "loss": 1.4989, "step": 24300 }, { "epoch": 0.9489363357056741, "grad_norm": 0.81694096326828, "learning_rate": 4.5345633791840394e-05, "loss": 1.6157, "step": 24400 }, { "epoch": 0.9528254190487302, "grad_norm": 1.7594293355941772, "learning_rate": 4.5326150489030904e-05, "loss": 1.5368, "step": 24500 }, { "epoch": 0.9567145023917862, "grad_norm": 1.511576771736145, "learning_rate": 4.530686201924951e-05, "loss": 1.5116, "step": 24600 }, { "epoch": 0.9606035857348423, "grad_norm": 1.1703425645828247, "learning_rate": 4.528737871644001e-05, "loss": 1.5031, "step": 24700 }, { "epoch": 0.9644926690778983, "grad_norm": 1.5925825834274292, "learning_rate": 4.526789541363052e-05, "loss": 1.5342, "step": 24800 }, { "epoch": 0.9683817524209544, "grad_norm": 2.769982099533081, "learning_rate": 4.524841211082103e-05, "loss": 1.4714, "step": 24900 }, { "epoch": 0.9722708357640104, "grad_norm": 1.7503234148025513, "learning_rate": 4.5228928808011535e-05, "loss": 1.504, "step": 25000 }, { "epoch": 0.9722708357640104, "eval_accuracy": 0.46913333333333335, "eval_f1": 0.45728846423153247, "eval_loss": 1.395060420036316, "eval_precision": 0.4829160716210555, "eval_recall": 0.46913333333333335, "eval_runtime": 6916.4938, "eval_samples_per_second": 26.025, "eval_steps_per_second": 0.407, "step": 25000 }, { "epoch": 0.9761599191070665, "grad_norm": 1.9516292810440063, "learning_rate": 4.520944550520204e-05, "loss": 1.62, "step": 25100 }, { "epoch": 0.9800490024501225, "grad_norm": 1.844866394996643, "learning_rate": 4.518996220239255e-05, "loss": 1.5351, "step": 25200 }, { "epoch": 0.9839380857931785, "grad_norm": 2.2275328636169434, "learning_rate": 4.517047889958306e-05, "loss": 1.4892, "step": 25300 }, { "epoch": 0.9878271691362346, "grad_norm": 0.8399716019630432, "learning_rate": 4.515099559677357e-05, "loss": 1.4913, "step": 25400 }, { "epoch": 0.9917162524792906, "grad_norm": 1.929142713546753, "learning_rate": 4.513151229396407e-05, "loss": 1.5178, "step": 25500 }, { "epoch": 0.9956053358223467, "grad_norm": 0.8696715831756592, "learning_rate": 4.511202899115458e-05, "loss": 1.564, "step": 25600 }, { "epoch": 0.9994944191654027, "grad_norm": 0.9348818063735962, "learning_rate": 4.509254568834509e-05, "loss": 1.5361, "step": 25700 }, { "epoch": 1.0033835025084588, "grad_norm": 1.8244856595993042, "learning_rate": 4.5073062385535594e-05, "loss": 1.5003, "step": 25800 }, { "epoch": 1.0072725858515148, "grad_norm": 0.9805651903152466, "learning_rate": 4.5053579082726104e-05, "loss": 1.5936, "step": 25900 }, { "epoch": 1.0111616691945708, "grad_norm": 1.953909158706665, "learning_rate": 4.5034095779916614e-05, "loss": 1.589, "step": 26000 }, { "epoch": 1.0111616691945708, "eval_accuracy": 0.4568333333333333, "eval_f1": 0.439601133438253, "eval_loss": 1.407954454421997, "eval_precision": 0.4791982809585867, "eval_recall": 0.45683333333333337, "eval_runtime": 6919.6327, "eval_samples_per_second": 26.013, "eval_steps_per_second": 0.407, "step": 26000 }, { "epoch": 1.0150507525376269, "grad_norm": 0.9872944355010986, "learning_rate": 4.5014612477107124e-05, "loss": 1.4562, "step": 26100 }, { "epoch": 1.018939835880683, "grad_norm": 4.60278844833374, "learning_rate": 4.499512917429763e-05, "loss": 1.5829, "step": 26200 }, { "epoch": 1.022828919223739, "grad_norm": 0.9323475956916809, "learning_rate": 4.497564587148814e-05, "loss": 1.4658, "step": 26300 }, { "epoch": 1.026718002566795, "grad_norm": 1.7615915536880493, "learning_rate": 4.495616256867865e-05, "loss": 1.4876, "step": 26400 }, { "epoch": 1.030607085909851, "grad_norm": 2.387206792831421, "learning_rate": 4.493667926586915e-05, "loss": 1.5026, "step": 26500 }, { "epoch": 1.034496169252907, "grad_norm": 6.027796745300293, "learning_rate": 4.491719596305966e-05, "loss": 1.5572, "step": 26600 }, { "epoch": 1.0383852525959631, "grad_norm": 11.523850440979004, "learning_rate": 4.489771266025017e-05, "loss": 1.5728, "step": 26700 }, { "epoch": 1.0422743359390192, "grad_norm": 5.836169719696045, "learning_rate": 4.487822935744068e-05, "loss": 1.5262, "step": 26800 }, { "epoch": 1.0461634192820752, "grad_norm": 2.0479562282562256, "learning_rate": 4.485874605463118e-05, "loss": 1.5112, "step": 26900 }, { "epoch": 1.0500525026251313, "grad_norm": 3.660287380218506, "learning_rate": 4.483926275182169e-05, "loss": 1.5463, "step": 27000 }, { "epoch": 1.0500525026251313, "eval_accuracy": 0.47635, "eval_f1": 0.4611827577528125, "eval_loss": 1.3757803440093994, "eval_precision": 0.4911655019432253, "eval_recall": 0.47635, "eval_runtime": 6922.9522, "eval_samples_per_second": 26.0, "eval_steps_per_second": 0.406, "step": 27000 }, { "epoch": 1.0539415859681873, "grad_norm": 2.5233561992645264, "learning_rate": 4.48197794490122e-05, "loss": 1.4981, "step": 27100 }, { "epoch": 1.0578306693112434, "grad_norm": 2.7936601638793945, "learning_rate": 4.4800296146202706e-05, "loss": 1.5108, "step": 27200 }, { "epoch": 1.0617197526542994, "grad_norm": 1.6449183225631714, "learning_rate": 4.4780812843393216e-05, "loss": 1.4863, "step": 27300 }, { "epoch": 1.0656088359973555, "grad_norm": 1.9373325109481812, "learning_rate": 4.4761329540583725e-05, "loss": 1.4616, "step": 27400 }, { "epoch": 1.0694979193404115, "grad_norm": 2.2722465991973877, "learning_rate": 4.474184623777423e-05, "loss": 1.6371, "step": 27500 }, { "epoch": 1.0733870026834675, "grad_norm": 1.851396083831787, "learning_rate": 4.472236293496474e-05, "loss": 1.5153, "step": 27600 }, { "epoch": 1.0772760860265236, "grad_norm": 1.0622870922088623, "learning_rate": 4.470287963215524e-05, "loss": 1.5758, "step": 27700 }, { "epoch": 1.0811651693695796, "grad_norm": 2.084749937057495, "learning_rate": 4.468339632934575e-05, "loss": 1.5016, "step": 27800 }, { "epoch": 1.0850542527126357, "grad_norm": 1.63667631149292, "learning_rate": 4.466391302653626e-05, "loss": 1.466, "step": 27900 }, { "epoch": 1.0889433360556917, "grad_norm": 1.949187994003296, "learning_rate": 4.4644624556754866e-05, "loss": 1.5442, "step": 28000 }, { "epoch": 1.0889433360556917, "eval_accuracy": 0.48104444444444444, "eval_f1": 0.46032580810949497, "eval_loss": 1.3749234676361084, "eval_precision": 0.5009671891714765, "eval_recall": 0.48104444444444444, "eval_runtime": 6923.696, "eval_samples_per_second": 25.998, "eval_steps_per_second": 0.406, "step": 28000 }, { "epoch": 1.0928324193987478, "grad_norm": 1.5449155569076538, "learning_rate": 4.462514125394537e-05, "loss": 1.5092, "step": 28100 }, { "epoch": 1.0967215027418038, "grad_norm": 0.7816967368125916, "learning_rate": 4.460565795113588e-05, "loss": 1.5185, "step": 28200 }, { "epoch": 1.1006105860848598, "grad_norm": 1.3860362768173218, "learning_rate": 4.458617464832638e-05, "loss": 1.5291, "step": 28300 }, { "epoch": 1.1044996694279159, "grad_norm": 3.0052008628845215, "learning_rate": 4.456669134551689e-05, "loss": 1.5254, "step": 28400 }, { "epoch": 1.108388752770972, "grad_norm": 2.109093427658081, "learning_rate": 4.45472080427074e-05, "loss": 1.4753, "step": 28500 }, { "epoch": 1.112277836114028, "grad_norm": 1.6661689281463623, "learning_rate": 4.4527724739897906e-05, "loss": 1.4675, "step": 28600 }, { "epoch": 1.116166919457084, "grad_norm": 9.439106941223145, "learning_rate": 4.4508241437088415e-05, "loss": 1.4907, "step": 28700 }, { "epoch": 1.12005600280014, "grad_norm": 0.7358033657073975, "learning_rate": 4.4488758134278925e-05, "loss": 1.5673, "step": 28800 }, { "epoch": 1.123945086143196, "grad_norm": 1.2706105709075928, "learning_rate": 4.446927483146943e-05, "loss": 1.4795, "step": 28900 }, { "epoch": 1.1278341694862521, "grad_norm": 1.5376765727996826, "learning_rate": 4.444979152865994e-05, "loss": 1.5678, "step": 29000 }, { "epoch": 1.1278341694862521, "eval_accuracy": 0.48209444444444444, "eval_f1": 0.4679372010833767, "eval_loss": 1.3573288917541504, "eval_precision": 0.489781828006187, "eval_recall": 0.48209444444444455, "eval_runtime": 6920.2949, "eval_samples_per_second": 26.01, "eval_steps_per_second": 0.406, "step": 29000 }, { "epoch": 1.1317232528293082, "grad_norm": 3.3317878246307373, "learning_rate": 4.443030822585045e-05, "loss": 1.4951, "step": 29100 }, { "epoch": 1.1356123361723642, "grad_norm": 0.8894079923629761, "learning_rate": 4.441082492304096e-05, "loss": 1.6005, "step": 29200 }, { "epoch": 1.1395014195154203, "grad_norm": 2.0286788940429688, "learning_rate": 4.439134162023146e-05, "loss": 1.5617, "step": 29300 }, { "epoch": 1.1433905028584763, "grad_norm": 2.853468656539917, "learning_rate": 4.437185831742197e-05, "loss": 1.4894, "step": 29400 }, { "epoch": 1.1472795862015324, "grad_norm": 1.3234740495681763, "learning_rate": 4.435237501461248e-05, "loss": 1.491, "step": 29500 }, { "epoch": 1.1511686695445884, "grad_norm": 2.1143715381622314, "learning_rate": 4.4332891711802984e-05, "loss": 1.602, "step": 29600 }, { "epoch": 1.1550577528876445, "grad_norm": 1.7600969076156616, "learning_rate": 4.4313408408993494e-05, "loss": 1.521, "step": 29700 }, { "epoch": 1.1589468362307005, "grad_norm": 2.5929882526397705, "learning_rate": 4.4293925106184004e-05, "loss": 1.524, "step": 29800 }, { "epoch": 1.1628359195737565, "grad_norm": 3.945248603820801, "learning_rate": 4.4274441803374514e-05, "loss": 1.5259, "step": 29900 }, { "epoch": 1.1667250029168126, "grad_norm": 6.836494445800781, "learning_rate": 4.425495850056502e-05, "loss": 1.4957, "step": 30000 }, { "epoch": 1.1667250029168126, "eval_accuracy": 0.47733888888888887, "eval_f1": 0.4530996737993121, "eval_loss": 1.3753753900527954, "eval_precision": 0.4863612404471758, "eval_recall": 0.4773388888888889, "eval_runtime": 6924.5526, "eval_samples_per_second": 25.994, "eval_steps_per_second": 0.406, "step": 30000 }, { "epoch": 1.1706140862598686, "grad_norm": 1.6972854137420654, "learning_rate": 4.423547519775553e-05, "loss": 1.4976, "step": 30100 }, { "epoch": 1.1745031696029247, "grad_norm": 1.5346542596817017, "learning_rate": 4.421599189494604e-05, "loss": 1.5096, "step": 30200 }, { "epoch": 1.1783922529459807, "grad_norm": 1.8971202373504639, "learning_rate": 4.4196703425164635e-05, "loss": 1.5428, "step": 30300 }, { "epoch": 1.1822813362890368, "grad_norm": 1.5734083652496338, "learning_rate": 4.4177220122355145e-05, "loss": 1.536, "step": 30400 }, { "epoch": 1.1861704196320928, "grad_norm": 3.176875591278076, "learning_rate": 4.4157736819545655e-05, "loss": 1.666, "step": 30500 }, { "epoch": 1.1900595029751488, "grad_norm": 1.9147074222564697, "learning_rate": 4.413825351673616e-05, "loss": 1.5479, "step": 30600 }, { "epoch": 1.1939485863182049, "grad_norm": 9.967185020446777, "learning_rate": 4.411877021392667e-05, "loss": 1.5953, "step": 30700 }, { "epoch": 1.197837669661261, "grad_norm": 2.2550010681152344, "learning_rate": 4.409928691111718e-05, "loss": 1.4855, "step": 30800 }, { "epoch": 1.201726753004317, "grad_norm": 3.488682508468628, "learning_rate": 4.407980360830769e-05, "loss": 1.5057, "step": 30900 }, { "epoch": 1.205615836347373, "grad_norm": 1.368896245956421, "learning_rate": 4.406032030549819e-05, "loss": 1.4619, "step": 31000 }, { "epoch": 1.205615836347373, "eval_accuracy": 0.45829444444444445, "eval_f1": 0.43329385116832975, "eval_loss": 1.4045301675796509, "eval_precision": 0.4852257965679998, "eval_recall": 0.4582944444444445, "eval_runtime": 6926.0736, "eval_samples_per_second": 25.989, "eval_steps_per_second": 0.406, "step": 31000 }, { "epoch": 1.209504919690429, "grad_norm": 1.5100488662719727, "learning_rate": 4.4040837002688694e-05, "loss": 1.5019, "step": 31100 }, { "epoch": 1.213394003033485, "grad_norm": 3.054344415664673, "learning_rate": 4.4021353699879204e-05, "loss": 1.5176, "step": 31200 }, { "epoch": 1.2172830863765411, "grad_norm": 2.1956653594970703, "learning_rate": 4.4001870397069714e-05, "loss": 1.5149, "step": 31300 }, { "epoch": 1.2211721697195972, "grad_norm": 1.0573376417160034, "learning_rate": 4.398238709426022e-05, "loss": 1.4446, "step": 31400 }, { "epoch": 1.2250612530626532, "grad_norm": 3.8874189853668213, "learning_rate": 4.396290379145073e-05, "loss": 1.4979, "step": 31500 }, { "epoch": 1.2289503364057093, "grad_norm": 2.342007637023926, "learning_rate": 4.394342048864124e-05, "loss": 1.5106, "step": 31600 }, { "epoch": 1.2328394197487653, "grad_norm": 1.9617013931274414, "learning_rate": 4.392393718583174e-05, "loss": 1.503, "step": 31700 }, { "epoch": 1.2367285030918214, "grad_norm": 2.552952527999878, "learning_rate": 4.390445388302225e-05, "loss": 1.5006, "step": 31800 }, { "epoch": 1.2406175864348774, "grad_norm": 1.054480791091919, "learning_rate": 4.388497058021276e-05, "loss": 1.5531, "step": 31900 }, { "epoch": 1.2445066697779334, "grad_norm": 1.6759915351867676, "learning_rate": 4.386548727740327e-05, "loss": 1.5267, "step": 32000 }, { "epoch": 1.2445066697779334, "eval_accuracy": 0.4829777777777778, "eval_f1": 0.4658538690405656, "eval_loss": 1.3626240491867065, "eval_precision": 0.47971266584512445, "eval_recall": 0.4829777777777778, "eval_runtime": 6924.4982, "eval_samples_per_second": 25.995, "eval_steps_per_second": 0.406, "step": 32000 }, { "epoch": 1.2483957531209895, "grad_norm": 3.142086982727051, "learning_rate": 4.384600397459377e-05, "loss": 1.4794, "step": 32100 }, { "epoch": 1.2522848364640455, "grad_norm": 4.575799465179443, "learning_rate": 4.382652067178428e-05, "loss": 1.4676, "step": 32200 }, { "epoch": 1.2561739198071016, "grad_norm": 1.444787859916687, "learning_rate": 4.380703736897479e-05, "loss": 1.4731, "step": 32300 }, { "epoch": 1.2600630031501576, "grad_norm": 2.3002493381500244, "learning_rate": 4.3787554066165296e-05, "loss": 1.5237, "step": 32400 }, { "epoch": 1.2639520864932137, "grad_norm": 1.9470409154891968, "learning_rate": 4.3768070763355806e-05, "loss": 1.5579, "step": 32500 }, { "epoch": 1.2678411698362697, "grad_norm": 4.144384860992432, "learning_rate": 4.3748587460546316e-05, "loss": 1.5684, "step": 32600 }, { "epoch": 1.2717302531793258, "grad_norm": 1.1720077991485596, "learning_rate": 4.372910415773682e-05, "loss": 1.5046, "step": 32700 }, { "epoch": 1.2756193365223818, "grad_norm": 1.6448010206222534, "learning_rate": 4.370962085492733e-05, "loss": 1.4703, "step": 32800 }, { "epoch": 1.2795084198654378, "grad_norm": 1.4843181371688843, "learning_rate": 4.369013755211784e-05, "loss": 1.5422, "step": 32900 }, { "epoch": 1.2833975032084939, "grad_norm": 1.7714227437973022, "learning_rate": 4.367065424930835e-05, "loss": 1.4861, "step": 33000 }, { "epoch": 1.2833975032084939, "eval_accuracy": 0.47528888888888887, "eval_f1": 0.45598547067488376, "eval_loss": 1.3708680868148804, "eval_precision": 0.48182352480224927, "eval_recall": 0.4752888888888889, "eval_runtime": 6921.8755, "eval_samples_per_second": 26.005, "eval_steps_per_second": 0.406, "step": 33000 }, { "epoch": 1.28728658655155, "grad_norm": 6.331613063812256, "learning_rate": 4.365117094649885e-05, "loss": 1.4822, "step": 33100 }, { "epoch": 1.291175669894606, "grad_norm": 1.8281220197677612, "learning_rate": 4.363168764368936e-05, "loss": 1.4915, "step": 33200 }, { "epoch": 1.295064753237662, "grad_norm": 2.775228977203369, "learning_rate": 4.361220434087987e-05, "loss": 1.5299, "step": 33300 }, { "epoch": 1.298953836580718, "grad_norm": 2.4933676719665527, "learning_rate": 4.3592721038070375e-05, "loss": 1.5666, "step": 33400 }, { "epoch": 1.302842919923774, "grad_norm": 3.588634729385376, "learning_rate": 4.3573237735260884e-05, "loss": 1.4579, "step": 33500 }, { "epoch": 1.3067320032668301, "grad_norm": 1.5363836288452148, "learning_rate": 4.3553754432451394e-05, "loss": 1.5371, "step": 33600 }, { "epoch": 1.3106210866098862, "grad_norm": 1.7827937602996826, "learning_rate": 4.353446596266999e-05, "loss": 1.5085, "step": 33700 }, { "epoch": 1.314510169952942, "grad_norm": 1.874663233757019, "learning_rate": 4.35149826598605e-05, "loss": 1.5048, "step": 33800 }, { "epoch": 1.318399253295998, "grad_norm": 2.6663036346435547, "learning_rate": 4.349549935705101e-05, "loss": 1.5135, "step": 33900 }, { "epoch": 1.322288336639054, "grad_norm": 1.6510108709335327, "learning_rate": 4.347601605424152e-05, "loss": 1.532, "step": 34000 }, { "epoch": 1.322288336639054, "eval_accuracy": 0.46892222222222224, "eval_f1": 0.43179416641617346, "eval_loss": 1.3816001415252686, "eval_precision": 0.464711376954179, "eval_recall": 0.46892222222222224, "eval_runtime": 6924.9659, "eval_samples_per_second": 25.993, "eval_steps_per_second": 0.406, "step": 34000 }, { "epoch": 1.3261774199821101, "grad_norm": 1.7060939073562622, "learning_rate": 4.3456532751432025e-05, "loss": 1.4629, "step": 34100 }, { "epoch": 1.3300665033251662, "grad_norm": 0.9654697179794312, "learning_rate": 4.343704944862253e-05, "loss": 1.4274, "step": 34200 }, { "epoch": 1.3339555866682222, "grad_norm": 0.6503919959068298, "learning_rate": 4.341756614581304e-05, "loss": 1.52, "step": 34300 }, { "epoch": 1.3378446700112783, "grad_norm": 6.200971603393555, "learning_rate": 4.339808284300355e-05, "loss": 1.5464, "step": 34400 }, { "epoch": 1.3417337533543343, "grad_norm": 1.4895210266113281, "learning_rate": 4.337859954019405e-05, "loss": 1.4944, "step": 34500 }, { "epoch": 1.3456228366973904, "grad_norm": 6.046417713165283, "learning_rate": 4.335911623738456e-05, "loss": 1.554, "step": 34600 }, { "epoch": 1.3495119200404464, "grad_norm": 0.8353651762008667, "learning_rate": 4.333963293457507e-05, "loss": 1.4911, "step": 34700 }, { "epoch": 1.3534010033835024, "grad_norm": 1.4695459604263306, "learning_rate": 4.3320149631765574e-05, "loss": 1.5437, "step": 34800 }, { "epoch": 1.3572900867265585, "grad_norm": 2.4254767894744873, "learning_rate": 4.3300666328956084e-05, "loss": 1.6038, "step": 34900 }, { "epoch": 1.3611791700696145, "grad_norm": 1.708307147026062, "learning_rate": 4.3281183026146594e-05, "loss": 1.5705, "step": 35000 }, { "epoch": 1.3611791700696145, "eval_accuracy": 0.4839777777777778, "eval_f1": 0.4596618766334168, "eval_loss": 1.3662739992141724, "eval_precision": 0.48263652067050483, "eval_recall": 0.4839777777777778, "eval_runtime": 6924.2568, "eval_samples_per_second": 25.996, "eval_steps_per_second": 0.406, "step": 35000 }, { "epoch": 1.3650682534126706, "grad_norm": 1.7877470254898071, "learning_rate": 4.3261699723337104e-05, "loss": 1.433, "step": 35100 }, { "epoch": 1.3689573367557266, "grad_norm": 1.2170788049697876, "learning_rate": 4.324221642052761e-05, "loss": 1.5177, "step": 35200 }, { "epoch": 1.3728464200987827, "grad_norm": 0.9987074136734009, "learning_rate": 4.322273311771812e-05, "loss": 1.4882, "step": 35300 }, { "epoch": 1.3767355034418387, "grad_norm": 3.7571451663970947, "learning_rate": 4.320324981490863e-05, "loss": 1.4536, "step": 35400 }, { "epoch": 1.3806245867848947, "grad_norm": 1.4313801527023315, "learning_rate": 4.318376651209913e-05, "loss": 1.5289, "step": 35500 }, { "epoch": 1.3845136701279508, "grad_norm": 1.5941517353057861, "learning_rate": 4.316428320928964e-05, "loss": 1.4806, "step": 35600 }, { "epoch": 1.3884027534710068, "grad_norm": 1.5863670110702515, "learning_rate": 4.314479990648015e-05, "loss": 1.5147, "step": 35700 }, { "epoch": 1.3922918368140629, "grad_norm": 1.5929838418960571, "learning_rate": 4.312531660367066e-05, "loss": 1.4638, "step": 35800 }, { "epoch": 1.396180920157119, "grad_norm": 0.8685932755470276, "learning_rate": 4.310583330086116e-05, "loss": 1.4325, "step": 35900 }, { "epoch": 1.400070003500175, "grad_norm": 2.1601994037628174, "learning_rate": 4.308634999805167e-05, "loss": 1.4912, "step": 36000 }, { "epoch": 1.400070003500175, "eval_accuracy": 0.48543888888888886, "eval_f1": 0.46345299023374426, "eval_loss": 1.3535642623901367, "eval_precision": 0.4973366929111042, "eval_recall": 0.4854388888888889, "eval_runtime": 6925.2416, "eval_samples_per_second": 25.992, "eval_steps_per_second": 0.406, "step": 36000 }, { "epoch": 1.403959086843231, "grad_norm": 1.7985005378723145, "learning_rate": 4.306706152827028e-05, "loss": 1.4239, "step": 36100 }, { "epoch": 1.407848170186287, "grad_norm": 2.5690619945526123, "learning_rate": 4.304757822546078e-05, "loss": 1.4546, "step": 36200 }, { "epoch": 1.411737253529343, "grad_norm": 4.590076446533203, "learning_rate": 4.302809492265129e-05, "loss": 1.4859, "step": 36300 }, { "epoch": 1.4156263368723991, "grad_norm": 2.0801329612731934, "learning_rate": 4.30086116198418e-05, "loss": 1.5688, "step": 36400 }, { "epoch": 1.4195154202154552, "grad_norm": 1.539350152015686, "learning_rate": 4.2989128317032304e-05, "loss": 1.5167, "step": 36500 }, { "epoch": 1.4234045035585112, "grad_norm": 3.0692172050476074, "learning_rate": 4.2969645014222814e-05, "loss": 1.4283, "step": 36600 }, { "epoch": 1.4272935869015673, "grad_norm": 8.17684268951416, "learning_rate": 4.2950161711413324e-05, "loss": 1.5351, "step": 36700 }, { "epoch": 1.4311826702446233, "grad_norm": 2.5136616230010986, "learning_rate": 4.2930678408603834e-05, "loss": 1.4598, "step": 36800 }, { "epoch": 1.4350717535876794, "grad_norm": 1.7710645198822021, "learning_rate": 4.291119510579434e-05, "loss": 1.5669, "step": 36900 }, { "epoch": 1.4389608369307354, "grad_norm": 1.6682426929473877, "learning_rate": 4.289171180298485e-05, "loss": 1.4966, "step": 37000 }, { "epoch": 1.4389608369307354, "eval_accuracy": 0.49094444444444446, "eval_f1": 0.47019912468272396, "eval_loss": 1.349673867225647, "eval_precision": 0.4884062150694391, "eval_recall": 0.49094444444444446, "eval_runtime": 6925.1838, "eval_samples_per_second": 25.992, "eval_steps_per_second": 0.406, "step": 37000 }, { "epoch": 1.4428499202737914, "grad_norm": 3.45300555229187, "learning_rate": 4.287222850017536e-05, "loss": 1.4693, "step": 37100 }, { "epoch": 1.4467390036168475, "grad_norm": 1.3836597204208374, "learning_rate": 4.285274519736586e-05, "loss": 1.5583, "step": 37200 }, { "epoch": 1.4506280869599035, "grad_norm": 2.162076234817505, "learning_rate": 4.283326189455636e-05, "loss": 1.5375, "step": 37300 }, { "epoch": 1.4545171703029596, "grad_norm": 1.1180922985076904, "learning_rate": 4.281377859174687e-05, "loss": 1.4936, "step": 37400 }, { "epoch": 1.4584062536460156, "grad_norm": 2.3053739070892334, "learning_rate": 4.279429528893738e-05, "loss": 1.4424, "step": 37500 }, { "epoch": 1.4622953369890717, "grad_norm": 2.54809832572937, "learning_rate": 4.2774811986127886e-05, "loss": 1.5601, "step": 37600 }, { "epoch": 1.4661844203321277, "grad_norm": 1.6928530931472778, "learning_rate": 4.2755328683318396e-05, "loss": 1.546, "step": 37700 }, { "epoch": 1.4700735036751837, "grad_norm": 1.518432378768921, "learning_rate": 4.2735845380508906e-05, "loss": 1.4863, "step": 37800 }, { "epoch": 1.4739625870182398, "grad_norm": 2.8777289390563965, "learning_rate": 4.271636207769941e-05, "loss": 1.4364, "step": 37900 }, { "epoch": 1.4778516703612958, "grad_norm": 3.388749599456787, "learning_rate": 4.269687877488992e-05, "loss": 1.4327, "step": 38000 }, { "epoch": 1.4778516703612958, "eval_accuracy": 0.47997222222222224, "eval_f1": 0.46853581709565456, "eval_loss": 1.3591647148132324, "eval_precision": 0.48849434190338403, "eval_recall": 0.4799722222222222, "eval_runtime": 6924.7646, "eval_samples_per_second": 25.994, "eval_steps_per_second": 0.406, "step": 38000 }, { "epoch": 1.4817407537043519, "grad_norm": 1.8079581260681152, "learning_rate": 4.2677590305108524e-05, "loss": 1.5331, "step": 38100 }, { "epoch": 1.485629837047408, "grad_norm": 3.11663556098938, "learning_rate": 4.2658107002299034e-05, "loss": 1.5459, "step": 38200 }, { "epoch": 1.489518920390464, "grad_norm": 2.021717071533203, "learning_rate": 4.263862369948954e-05, "loss": 1.5445, "step": 38300 }, { "epoch": 1.49340800373352, "grad_norm": 0.9826100468635559, "learning_rate": 4.261914039668005e-05, "loss": 1.5047, "step": 38400 }, { "epoch": 1.497297087076576, "grad_norm": 2.145092725753784, "learning_rate": 4.2599657093870557e-05, "loss": 1.5009, "step": 38500 }, { "epoch": 1.501186170419632, "grad_norm": 1.794708490371704, "learning_rate": 4.258017379106106e-05, "loss": 1.5602, "step": 38600 }, { "epoch": 1.5050752537626881, "grad_norm": 4.789091110229492, "learning_rate": 4.256069048825157e-05, "loss": 1.4929, "step": 38700 }, { "epoch": 1.5089643371057442, "grad_norm": 1.9938353300094604, "learning_rate": 4.254120718544208e-05, "loss": 1.4662, "step": 38800 }, { "epoch": 1.5128534204488002, "grad_norm": 3.421252727508545, "learning_rate": 4.252172388263258e-05, "loss": 1.5619, "step": 38900 }, { "epoch": 1.5167425037918563, "grad_norm": 2.7918238639831543, "learning_rate": 4.250224057982309e-05, "loss": 1.5454, "step": 39000 }, { "epoch": 1.5167425037918563, "eval_accuracy": 0.5041777777777777, "eval_f1": 0.47730612124246585, "eval_loss": 1.318600058555603, "eval_precision": 0.5126080334796469, "eval_recall": 0.5041777777777777, "eval_runtime": 6925.1974, "eval_samples_per_second": 25.992, "eval_steps_per_second": 0.406, "step": 39000 }, { "epoch": 1.5206315871349123, "grad_norm": 1.5228238105773926, "learning_rate": 4.24827572770136e-05, "loss": 1.5898, "step": 39100 }, { "epoch": 1.5245206704779684, "grad_norm": 6.919374465942383, "learning_rate": 4.246327397420411e-05, "loss": 1.4946, "step": 39200 }, { "epoch": 1.5284097538210244, "grad_norm": 3.763805866241455, "learning_rate": 4.2443790671394616e-05, "loss": 1.5419, "step": 39300 }, { "epoch": 1.5322988371640804, "grad_norm": 3.971895456314087, "learning_rate": 4.2424307368585125e-05, "loss": 1.4804, "step": 39400 }, { "epoch": 1.5361879205071365, "grad_norm": 0.7379988431930542, "learning_rate": 4.2404824065775635e-05, "loss": 1.4567, "step": 39500 }, { "epoch": 1.5400770038501925, "grad_norm": 5.975132465362549, "learning_rate": 4.238534076296614e-05, "loss": 1.4298, "step": 39600 }, { "epoch": 1.5439660871932486, "grad_norm": 2.062204122543335, "learning_rate": 4.236585746015665e-05, "loss": 1.5175, "step": 39700 }, { "epoch": 1.5478551705363046, "grad_norm": 1.0677040815353394, "learning_rate": 4.234637415734716e-05, "loss": 1.5758, "step": 39800 }, { "epoch": 1.5517442538793607, "grad_norm": 2.2441163063049316, "learning_rate": 4.232689085453767e-05, "loss": 1.4753, "step": 39900 }, { "epoch": 1.5556333372224167, "grad_norm": 2.0439257621765137, "learning_rate": 4.230740755172817e-05, "loss": 1.4842, "step": 40000 }, { "epoch": 1.5556333372224167, "eval_accuracy": 0.5018166666666667, "eval_f1": 0.48603096276540336, "eval_loss": 1.3254088163375854, "eval_precision": 0.5038388869121881, "eval_recall": 0.5018166666666666, "eval_runtime": 6928.2188, "eval_samples_per_second": 25.981, "eval_steps_per_second": 0.406, "step": 40000 }, { "epoch": 1.5595224205654727, "grad_norm": 0.6183058023452759, "learning_rate": 4.228792424891868e-05, "loss": 1.4448, "step": 40100 }, { "epoch": 1.5634115039085288, "grad_norm": 1.647613525390625, "learning_rate": 4.226844094610919e-05, "loss": 1.4737, "step": 40200 }, { "epoch": 1.5673005872515848, "grad_norm": 1.5332592725753784, "learning_rate": 4.224915247632779e-05, "loss": 1.4805, "step": 40300 }, { "epoch": 1.5711896705946409, "grad_norm": 0.7199509739875793, "learning_rate": 4.22296691735183e-05, "loss": 1.5092, "step": 40400 }, { "epoch": 1.575078753937697, "grad_norm": 2.0468058586120605, "learning_rate": 4.221018587070881e-05, "loss": 1.4786, "step": 40500 }, { "epoch": 1.578967837280753, "grad_norm": 1.1315397024154663, "learning_rate": 4.219070256789931e-05, "loss": 1.4306, "step": 40600 }, { "epoch": 1.582856920623809, "grad_norm": 1.571642279624939, "learning_rate": 4.2171219265089815e-05, "loss": 1.4367, "step": 40700 }, { "epoch": 1.586746003966865, "grad_norm": 3.681966781616211, "learning_rate": 4.2151735962280325e-05, "loss": 1.4393, "step": 40800 }, { "epoch": 1.590635087309921, "grad_norm": 4.945619106292725, "learning_rate": 4.2132252659470835e-05, "loss": 1.52, "step": 40900 }, { "epoch": 1.5945241706529771, "grad_norm": 2.4411091804504395, "learning_rate": 4.211276935666134e-05, "loss": 1.4606, "step": 41000 }, { "epoch": 1.5945241706529771, "eval_accuracy": 0.49280555555555555, "eval_f1": 0.4626693485110001, "eval_loss": 1.3411296606063843, "eval_precision": 0.5006357790078183, "eval_recall": 0.49280555555555555, "eval_runtime": 6922.0143, "eval_samples_per_second": 26.004, "eval_steps_per_second": 0.406, "step": 41000 }, { "epoch": 1.5984132539960332, "grad_norm": 2.3696420192718506, "learning_rate": 4.209328605385185e-05, "loss": 1.5546, "step": 41100 }, { "epoch": 1.6023023373390892, "grad_norm": 2.1984386444091797, "learning_rate": 4.207380275104236e-05, "loss": 1.4946, "step": 41200 }, { "epoch": 1.6061914206821453, "grad_norm": 4.205782413482666, "learning_rate": 4.205431944823287e-05, "loss": 1.4628, "step": 41300 }, { "epoch": 1.6100805040252013, "grad_norm": 2.819122791290283, "learning_rate": 4.203483614542337e-05, "loss": 1.4831, "step": 41400 }, { "epoch": 1.6139695873682574, "grad_norm": 1.7004815340042114, "learning_rate": 4.201535284261388e-05, "loss": 1.6185, "step": 41500 }, { "epoch": 1.6178586707113134, "grad_norm": 2.0867366790771484, "learning_rate": 4.199586953980439e-05, "loss": 1.4367, "step": 41600 }, { "epoch": 1.6217477540543694, "grad_norm": 6.26092529296875, "learning_rate": 4.1976386236994894e-05, "loss": 1.4506, "step": 41700 }, { "epoch": 1.6256368373974253, "grad_norm": 5.34660530090332, "learning_rate": 4.1956902934185404e-05, "loss": 1.4689, "step": 41800 }, { "epoch": 1.6295259207404813, "grad_norm": 1.634814977645874, "learning_rate": 4.1937419631375914e-05, "loss": 1.4384, "step": 41900 }, { "epoch": 1.6334150040835373, "grad_norm": 3.5240957736968994, "learning_rate": 4.1917936328566424e-05, "loss": 1.4117, "step": 42000 }, { "epoch": 1.6334150040835373, "eval_accuracy": 0.5008777777777778, "eval_f1": 0.4915008025624472, "eval_loss": 1.3106316328048706, "eval_precision": 0.5219567248712275, "eval_recall": 0.5008777777777779, "eval_runtime": 6921.932, "eval_samples_per_second": 26.004, "eval_steps_per_second": 0.406, "step": 42000 }, { "epoch": 1.6373040874265934, "grad_norm": 2.3675084114074707, "learning_rate": 4.189845302575693e-05, "loss": 1.5044, "step": 42100 }, { "epoch": 1.6411931707696494, "grad_norm": 2.4834206104278564, "learning_rate": 4.187896972294744e-05, "loss": 1.539, "step": 42200 }, { "epoch": 1.6450822541127055, "grad_norm": 7.680070400238037, "learning_rate": 4.185948642013795e-05, "loss": 1.5159, "step": 42300 }, { "epoch": 1.6489713374557615, "grad_norm": 1.8970831632614136, "learning_rate": 4.1840197950356545e-05, "loss": 1.4278, "step": 42400 }, { "epoch": 1.6528604207988176, "grad_norm": 3.4313440322875977, "learning_rate": 4.1820714647547055e-05, "loss": 1.5347, "step": 42500 }, { "epoch": 1.6567495041418736, "grad_norm": 1.5850071907043457, "learning_rate": 4.1801231344737565e-05, "loss": 1.5622, "step": 42600 }, { "epoch": 1.6606385874849297, "grad_norm": 1.4508610963821411, "learning_rate": 4.178174804192807e-05, "loss": 1.4484, "step": 42700 }, { "epoch": 1.6645276708279857, "grad_norm": 4.697525978088379, "learning_rate": 4.176226473911858e-05, "loss": 1.5048, "step": 42800 }, { "epoch": 1.6684167541710417, "grad_norm": 4.6808929443359375, "learning_rate": 4.174278143630909e-05, "loss": 1.5189, "step": 42900 }, { "epoch": 1.6723058375140978, "grad_norm": 2.7978219985961914, "learning_rate": 4.172329813349959e-05, "loss": 1.4794, "step": 43000 }, { "epoch": 1.6723058375140978, "eval_accuracy": 0.5002444444444445, "eval_f1": 0.4821125031632569, "eval_loss": 1.3182132244110107, "eval_precision": 0.5227912368666515, "eval_recall": 0.5002444444444445, "eval_runtime": 6927.9431, "eval_samples_per_second": 25.982, "eval_steps_per_second": 0.406, "step": 43000 }, { "epoch": 1.6761949208571538, "grad_norm": 1.325310230255127, "learning_rate": 4.17038148306901e-05, "loss": 1.4516, "step": 43100 }, { "epoch": 1.6800840042002099, "grad_norm": 3.6837282180786133, "learning_rate": 4.168433152788061e-05, "loss": 1.4498, "step": 43200 }, { "epoch": 1.683973087543266, "grad_norm": 20.589027404785156, "learning_rate": 4.166484822507112e-05, "loss": 1.5332, "step": 43300 }, { "epoch": 1.687862170886322, "grad_norm": 4.345759868621826, "learning_rate": 4.1645364922261624e-05, "loss": 1.486, "step": 43400 }, { "epoch": 1.691751254229378, "grad_norm": 2.8407785892486572, "learning_rate": 4.1625881619452134e-05, "loss": 1.4273, "step": 43500 }, { "epoch": 1.695640337572434, "grad_norm": 3.0724005699157715, "learning_rate": 4.1606398316642644e-05, "loss": 1.5991, "step": 43600 }, { "epoch": 1.69952942091549, "grad_norm": 11.949922561645508, "learning_rate": 4.158691501383315e-05, "loss": 1.5091, "step": 43700 }, { "epoch": 1.7034185042585461, "grad_norm": 1.8871575593948364, "learning_rate": 4.156743171102365e-05, "loss": 1.4757, "step": 43800 }, { "epoch": 1.7073075876016022, "grad_norm": 1.8014636039733887, "learning_rate": 4.154794840821416e-05, "loss": 1.4687, "step": 43900 }, { "epoch": 1.7111966709446582, "grad_norm": 2.6237833499908447, "learning_rate": 4.152846510540467e-05, "loss": 1.5223, "step": 44000 }, { "epoch": 1.7111966709446582, "eval_accuracy": 0.5027, "eval_f1": 0.4897264032585422, "eval_loss": 1.310219645500183, "eval_precision": 0.5134581267716016, "eval_recall": 0.5027, "eval_runtime": 6919.8908, "eval_samples_per_second": 26.012, "eval_steps_per_second": 0.407, "step": 44000 }, { "epoch": 1.7150857542877143, "grad_norm": 1.9809656143188477, "learning_rate": 4.150898180259517e-05, "loss": 1.4074, "step": 44100 }, { "epoch": 1.7189748376307703, "grad_norm": 1.929825782775879, "learning_rate": 4.148949849978568e-05, "loss": 1.4786, "step": 44200 }, { "epoch": 1.7228639209738263, "grad_norm": 1.2615498304367065, "learning_rate": 4.147001519697619e-05, "loss": 1.5074, "step": 44300 }, { "epoch": 1.7267530043168824, "grad_norm": 1.481255292892456, "learning_rate": 4.14505318941667e-05, "loss": 1.508, "step": 44400 }, { "epoch": 1.7306420876599384, "grad_norm": 2.887732744216919, "learning_rate": 4.1431048591357206e-05, "loss": 1.4806, "step": 44500 }, { "epoch": 1.7345311710029945, "grad_norm": 2.379328489303589, "learning_rate": 4.141176012157581e-05, "loss": 1.4919, "step": 44600 }, { "epoch": 1.7384202543460505, "grad_norm": 2.6335151195526123, "learning_rate": 4.139227681876632e-05, "loss": 1.5577, "step": 44700 }, { "epoch": 1.7423093376891066, "grad_norm": 1.841733455657959, "learning_rate": 4.1372793515956824e-05, "loss": 1.4684, "step": 44800 }, { "epoch": 1.7461984210321626, "grad_norm": 2.1804990768432617, "learning_rate": 4.1353310213147334e-05, "loss": 1.4376, "step": 44900 }, { "epoch": 1.7500875043752186, "grad_norm": 2.4895553588867188, "learning_rate": 4.1333826910337843e-05, "loss": 1.5187, "step": 45000 }, { "epoch": 1.7500875043752186, "eval_accuracy": 0.5133666666666666, "eval_f1": 0.4991221445165863, "eval_loss": 1.2921651601791382, "eval_precision": 0.5089708992517388, "eval_recall": 0.5133666666666666, "eval_runtime": 6926.6907, "eval_samples_per_second": 25.986, "eval_steps_per_second": 0.406, "step": 45000 }, { "epoch": 1.7539765877182747, "grad_norm": 2.235046863555908, "learning_rate": 4.1314343607528347e-05, "loss": 1.5456, "step": 45100 }, { "epoch": 1.7578656710613307, "grad_norm": 1.44828200340271, "learning_rate": 4.1294860304718857e-05, "loss": 1.4235, "step": 45200 }, { "epoch": 1.7617547544043868, "grad_norm": 1.3528939485549927, "learning_rate": 4.1275377001909366e-05, "loss": 1.44, "step": 45300 }, { "epoch": 1.7656438377474428, "grad_norm": 3.447854518890381, "learning_rate": 4.1255893699099876e-05, "loss": 1.4979, "step": 45400 }, { "epoch": 1.7695329210904989, "grad_norm": 3.022505760192871, "learning_rate": 4.123641039629038e-05, "loss": 1.6423, "step": 45500 }, { "epoch": 1.773422004433555, "grad_norm": 1.982534646987915, "learning_rate": 4.121692709348089e-05, "loss": 1.4439, "step": 45600 }, { "epoch": 1.777311087776611, "grad_norm": 4.335754871368408, "learning_rate": 4.11974437906714e-05, "loss": 1.5034, "step": 45700 }, { "epoch": 1.781200171119667, "grad_norm": 10.928424835205078, "learning_rate": 4.11779604878619e-05, "loss": 1.4428, "step": 45800 }, { "epoch": 1.785089254462723, "grad_norm": 1.601288080215454, "learning_rate": 4.115847718505241e-05, "loss": 1.4617, "step": 45900 }, { "epoch": 1.788978337805779, "grad_norm": 3.146934986114502, "learning_rate": 4.113899388224292e-05, "loss": 1.6064, "step": 46000 }, { "epoch": 1.788978337805779, "eval_accuracy": 0.5104722222222222, "eval_f1": 0.49376746355865153, "eval_loss": 1.2987232208251953, "eval_precision": 0.5039246034978847, "eval_recall": 0.5104722222222222, "eval_runtime": 6923.0498, "eval_samples_per_second": 26.0, "eval_steps_per_second": 0.406, "step": 46000 }, { "epoch": 1.7928674211488351, "grad_norm": 2.64028000831604, "learning_rate": 4.111951057943343e-05, "loss": 1.401, "step": 46100 }, { "epoch": 1.7967565044918912, "grad_norm": 4.496203899383545, "learning_rate": 4.1100027276623935e-05, "loss": 1.444, "step": 46200 }, { "epoch": 1.8006455878349472, "grad_norm": 1.7508708238601685, "learning_rate": 4.1080543973814445e-05, "loss": 1.5105, "step": 46300 }, { "epoch": 1.8045346711780033, "grad_norm": 1.515021562576294, "learning_rate": 4.1061060671004955e-05, "loss": 1.469, "step": 46400 }, { "epoch": 1.8084237545210593, "grad_norm": 1.2749133110046387, "learning_rate": 4.104157736819546e-05, "loss": 1.5334, "step": 46500 }, { "epoch": 1.8123128378641153, "grad_norm": 4.872331619262695, "learning_rate": 4.102228889841406e-05, "loss": 1.4593, "step": 46600 }, { "epoch": 1.8162019212071714, "grad_norm": 2.711014986038208, "learning_rate": 4.100280559560457e-05, "loss": 1.5574, "step": 46700 }, { "epoch": 1.8200910045502274, "grad_norm": 6.409644603729248, "learning_rate": 4.0983322292795076e-05, "loss": 1.6121, "step": 46800 }, { "epoch": 1.8239800878932835, "grad_norm": 3.7824854850769043, "learning_rate": 4.0963838989985586e-05, "loss": 1.5707, "step": 46900 }, { "epoch": 1.8278691712363395, "grad_norm": 1.1713926792144775, "learning_rate": 4.0944355687176096e-05, "loss": 1.5322, "step": 47000 }, { "epoch": 1.8278691712363395, "eval_accuracy": 0.5080611111111111, "eval_f1": 0.4830704257905025, "eval_loss": 1.3015059232711792, "eval_precision": 0.49974865705147, "eval_recall": 0.5080611111111112, "eval_runtime": 6927.217, "eval_samples_per_second": 25.984, "eval_steps_per_second": 0.406, "step": 47000 }, { "epoch": 1.8317582545793956, "grad_norm": 2.5867340564727783, "learning_rate": 4.09248723843666e-05, "loss": 1.5058, "step": 47100 }, { "epoch": 1.8356473379224516, "grad_norm": 4.36709451675415, "learning_rate": 4.09053890815571e-05, "loss": 1.6186, "step": 47200 }, { "epoch": 1.8395364212655076, "grad_norm": 4.5439534187316895, "learning_rate": 4.088590577874761e-05, "loss": 1.5189, "step": 47300 }, { "epoch": 1.8434255046085637, "grad_norm": 2.053961753845215, "learning_rate": 4.086642247593812e-05, "loss": 1.4275, "step": 47400 }, { "epoch": 1.8473145879516197, "grad_norm": 2.312055826187134, "learning_rate": 4.084693917312863e-05, "loss": 1.454, "step": 47500 }, { "epoch": 1.8512036712946758, "grad_norm": 1.556337594985962, "learning_rate": 4.0827455870319135e-05, "loss": 1.4626, "step": 47600 }, { "epoch": 1.8550927546377318, "grad_norm": 2.2775464057922363, "learning_rate": 4.0807972567509645e-05, "loss": 1.4741, "step": 47700 }, { "epoch": 1.8589818379807879, "grad_norm": 4.9023332595825195, "learning_rate": 4.0788489264700155e-05, "loss": 1.3897, "step": 47800 }, { "epoch": 1.862870921323844, "grad_norm": 1.9933898448944092, "learning_rate": 4.076900596189066e-05, "loss": 1.499, "step": 47900 }, { "epoch": 1.8667600046669, "grad_norm": 3.941758871078491, "learning_rate": 4.074952265908117e-05, "loss": 1.4831, "step": 48000 }, { "epoch": 1.8667600046669, "eval_accuracy": 0.49182777777777775, "eval_f1": 0.4703639604559811, "eval_loss": 1.327980637550354, "eval_precision": 0.5077214747615623, "eval_recall": 0.4918277777777778, "eval_runtime": 6921.6888, "eval_samples_per_second": 26.005, "eval_steps_per_second": 0.406, "step": 48000 }, { "epoch": 1.870649088009956, "grad_norm": 1.7348090410232544, "learning_rate": 4.073003935627168e-05, "loss": 1.4761, "step": 48100 }, { "epoch": 1.874538171353012, "grad_norm": 1.2797527313232422, "learning_rate": 4.071055605346218e-05, "loss": 1.4484, "step": 48200 }, { "epoch": 1.878427254696068, "grad_norm": 2.7091002464294434, "learning_rate": 4.069107275065269e-05, "loss": 1.5461, "step": 48300 }, { "epoch": 1.8823163380391241, "grad_norm": 2.4347574710845947, "learning_rate": 4.06715894478432e-05, "loss": 1.4937, "step": 48400 }, { "epoch": 1.8862054213821802, "grad_norm": 3.6463019847869873, "learning_rate": 4.065210614503371e-05, "loss": 1.5339, "step": 48500 }, { "epoch": 1.8900945047252362, "grad_norm": 2.381664276123047, "learning_rate": 4.0632622842224214e-05, "loss": 1.4579, "step": 48600 }, { "epoch": 1.8939835880682923, "grad_norm": 1.1204228401184082, "learning_rate": 4.061333437244282e-05, "loss": 1.4747, "step": 48700 }, { "epoch": 1.8978726714113483, "grad_norm": 3.403123617172241, "learning_rate": 4.059385106963333e-05, "loss": 1.4582, "step": 48800 }, { "epoch": 1.9017617547544043, "grad_norm": 3.0999250411987305, "learning_rate": 4.057436776682383e-05, "loss": 1.4876, "step": 48900 }, { "epoch": 1.9056508380974604, "grad_norm": 3.5905752182006836, "learning_rate": 4.055488446401434e-05, "loss": 1.4726, "step": 49000 }, { "epoch": 1.9056508380974604, "eval_accuracy": 0.5011, "eval_f1": 0.48222814154510596, "eval_loss": 1.3041846752166748, "eval_precision": 0.5145421338706816, "eval_recall": 0.5011, "eval_runtime": 6917.9433, "eval_samples_per_second": 26.019, "eval_steps_per_second": 0.407, "step": 49000 }, { "epoch": 1.9095399214405164, "grad_norm": 2.3676178455352783, "learning_rate": 4.053540116120485e-05, "loss": 1.4609, "step": 49100 }, { "epoch": 1.9134290047835725, "grad_norm": 1.8708083629608154, "learning_rate": 4.0515917858395355e-05, "loss": 1.5201, "step": 49200 }, { "epoch": 1.9173180881266285, "grad_norm": 2.277036428451538, "learning_rate": 4.0496434555585865e-05, "loss": 1.4119, "step": 49300 }, { "epoch": 1.9212071714696846, "grad_norm": 1.6405575275421143, "learning_rate": 4.0476951252776375e-05, "loss": 1.615, "step": 49400 }, { "epoch": 1.9250962548127406, "grad_norm": 6.596516132354736, "learning_rate": 4.0457467949966885e-05, "loss": 1.5075, "step": 49500 }, { "epoch": 1.9289853381557966, "grad_norm": 1.932853102684021, "learning_rate": 4.043798464715739e-05, "loss": 1.4063, "step": 49600 }, { "epoch": 1.9328744214988527, "grad_norm": 2.785126209259033, "learning_rate": 4.04185013443479e-05, "loss": 1.4446, "step": 49700 }, { "epoch": 1.9367635048419087, "grad_norm": 2.0607738494873047, "learning_rate": 4.039901804153841e-05, "loss": 1.4918, "step": 49800 }, { "epoch": 1.9406525881849648, "grad_norm": 2.577690601348877, "learning_rate": 4.037953473872891e-05, "loss": 1.4651, "step": 49900 }, { "epoch": 1.9445416715280208, "grad_norm": 2.027348518371582, "learning_rate": 4.036005143591942e-05, "loss": 1.5298, "step": 50000 }, { "epoch": 1.9445416715280208, "eval_accuracy": 0.5162333333333333, "eval_f1": 0.5027608433898361, "eval_loss": 1.281640887260437, "eval_precision": 0.5206004606294624, "eval_recall": 0.5162333333333333, "eval_runtime": 6915.4012, "eval_samples_per_second": 26.029, "eval_steps_per_second": 0.407, "step": 50000 }, { "epoch": 1.9484307548710769, "grad_norm": 2.1175947189331055, "learning_rate": 4.034056813310993e-05, "loss": 1.4962, "step": 50100 }, { "epoch": 1.952319838214133, "grad_norm": 3.4548656940460205, "learning_rate": 4.0321084830300434e-05, "loss": 1.6003, "step": 50200 }, { "epoch": 1.956208921557189, "grad_norm": 2.3409931659698486, "learning_rate": 4.030160152749094e-05, "loss": 1.4315, "step": 50300 }, { "epoch": 1.960098004900245, "grad_norm": 0.5664477944374084, "learning_rate": 4.028211822468145e-05, "loss": 1.4635, "step": 50400 }, { "epoch": 1.963987088243301, "grad_norm": 2.373842477798462, "learning_rate": 4.0262634921871957e-05, "loss": 1.4653, "step": 50500 }, { "epoch": 1.967876171586357, "grad_norm": 4.323079586029053, "learning_rate": 4.0243151619062466e-05, "loss": 1.4341, "step": 50600 }, { "epoch": 1.9717652549294131, "grad_norm": 2.4967925548553467, "learning_rate": 4.022366831625297e-05, "loss": 1.4684, "step": 50700 }, { "epoch": 1.9756543382724692, "grad_norm": 1.0622392892837524, "learning_rate": 4.020418501344348e-05, "loss": 1.5081, "step": 50800 }, { "epoch": 1.9795434216155252, "grad_norm": 3.0449016094207764, "learning_rate": 4.0184896543662084e-05, "loss": 1.5466, "step": 50900 }, { "epoch": 1.9834325049585813, "grad_norm": 3.4834582805633545, "learning_rate": 4.016541324085259e-05, "loss": 1.559, "step": 51000 }, { "epoch": 1.9834325049585813, "eval_accuracy": 0.5132888888888889, "eval_f1": 0.4969088256579832, "eval_loss": 1.2904843091964722, "eval_precision": 0.5131499829680555, "eval_recall": 0.5132888888888889, "eval_runtime": 6919.1434, "eval_samples_per_second": 26.015, "eval_steps_per_second": 0.407, "step": 51000 }, { "epoch": 1.9873215883016373, "grad_norm": 2.2791547775268555, "learning_rate": 4.01459299380431e-05, "loss": 1.5012, "step": 51100 }, { "epoch": 1.9912106716446933, "grad_norm": 5.908763408660889, "learning_rate": 4.012644663523361e-05, "loss": 1.4362, "step": 51200 }, { "epoch": 1.9950997549877494, "grad_norm": 1.0892328023910522, "learning_rate": 4.010696333242411e-05, "loss": 1.4786, "step": 51300 }, { "epoch": 1.9989888383308054, "grad_norm": 1.9322205781936646, "learning_rate": 4.008748002961462e-05, "loss": 1.4874, "step": 51400 }, { "epoch": 2.0028779216738615, "grad_norm": 2.1485540866851807, "learning_rate": 4.006799672680513e-05, "loss": 1.4607, "step": 51500 }, { "epoch": 2.0067670050169175, "grad_norm": 2.911259174346924, "learning_rate": 4.004851342399564e-05, "loss": 1.5157, "step": 51600 }, { "epoch": 2.0106560883599736, "grad_norm": 3.2644150257110596, "learning_rate": 4.002903012118614e-05, "loss": 1.4448, "step": 51700 }, { "epoch": 2.0145451717030296, "grad_norm": 1.4416627883911133, "learning_rate": 4.000954681837665e-05, "loss": 1.5104, "step": 51800 }, { "epoch": 2.0184342550460856, "grad_norm": 9.540861129760742, "learning_rate": 3.999006351556716e-05, "loss": 1.5254, "step": 51900 }, { "epoch": 2.0223233383891417, "grad_norm": 1.7113875150680542, "learning_rate": 3.9970580212757666e-05, "loss": 1.5835, "step": 52000 }, { "epoch": 2.0223233383891417, "eval_accuracy": 0.5198166666666667, "eval_f1": 0.5097005382468719, "eval_loss": 1.274101734161377, "eval_precision": 0.5248180838708613, "eval_recall": 0.5198166666666667, "eval_runtime": 6919.0263, "eval_samples_per_second": 26.015, "eval_steps_per_second": 0.407, "step": 52000 }, { "epoch": 2.0262124217321977, "grad_norm": 2.8907666206359863, "learning_rate": 3.9951096909948176e-05, "loss": 1.4871, "step": 52100 }, { "epoch": 2.0301015050752538, "grad_norm": 1.7047004699707031, "learning_rate": 3.9931613607138686e-05, "loss": 1.4776, "step": 52200 }, { "epoch": 2.03399058841831, "grad_norm": 3.570894479751587, "learning_rate": 3.9912130304329196e-05, "loss": 1.4335, "step": 52300 }, { "epoch": 2.037879671761366, "grad_norm": 2.042680501937866, "learning_rate": 3.98926470015197e-05, "loss": 1.5456, "step": 52400 }, { "epoch": 2.041768755104422, "grad_norm": 3.9115734100341797, "learning_rate": 3.987316369871021e-05, "loss": 1.505, "step": 52500 }, { "epoch": 2.045657838447478, "grad_norm": 1.6295313835144043, "learning_rate": 3.985368039590072e-05, "loss": 1.4132, "step": 52600 }, { "epoch": 2.049546921790534, "grad_norm": 4.39829158782959, "learning_rate": 3.983419709309122e-05, "loss": 1.4523, "step": 52700 }, { "epoch": 2.05343600513359, "grad_norm": 1.337613582611084, "learning_rate": 3.981471379028173e-05, "loss": 1.4487, "step": 52800 }, { "epoch": 2.057325088476646, "grad_norm": 2.888352870941162, "learning_rate": 3.979523048747224e-05, "loss": 1.3876, "step": 52900 }, { "epoch": 2.061214171819702, "grad_norm": 2.3697891235351562, "learning_rate": 3.977594201769084e-05, "loss": 1.5087, "step": 53000 }, { "epoch": 2.061214171819702, "eval_accuracy": 0.5124666666666666, "eval_f1": 0.5039953776147957, "eval_loss": 1.2827510833740234, "eval_precision": 0.5205618613943376, "eval_recall": 0.5124666666666667, "eval_runtime": 6919.7149, "eval_samples_per_second": 26.013, "eval_steps_per_second": 0.407, "step": 53000 }, { "epoch": 2.065103255162758, "grad_norm": 2.370325803756714, "learning_rate": 3.975645871488135e-05, "loss": 1.4697, "step": 53100 }, { "epoch": 2.068992338505814, "grad_norm": 2.4759280681610107, "learning_rate": 3.973697541207186e-05, "loss": 1.4281, "step": 53200 }, { "epoch": 2.0728814218488703, "grad_norm": 2.260918140411377, "learning_rate": 3.971749210926236e-05, "loss": 1.5372, "step": 53300 }, { "epoch": 2.0767705051919263, "grad_norm": 2.231809616088867, "learning_rate": 3.969800880645287e-05, "loss": 1.3957, "step": 53400 }, { "epoch": 2.0806595885349823, "grad_norm": 4.17244291305542, "learning_rate": 3.967852550364338e-05, "loss": 1.4436, "step": 53500 }, { "epoch": 2.0845486718780384, "grad_norm": 5.138910293579102, "learning_rate": 3.9659042200833886e-05, "loss": 1.4967, "step": 53600 }, { "epoch": 2.0884377552210944, "grad_norm": 2.101656675338745, "learning_rate": 3.9639558898024396e-05, "loss": 1.6475, "step": 53700 }, { "epoch": 2.0923268385641505, "grad_norm": 1.2947981357574463, "learning_rate": 3.96200755952149e-05, "loss": 1.4343, "step": 53800 }, { "epoch": 2.0962159219072065, "grad_norm": 6.457822322845459, "learning_rate": 3.960059229240541e-05, "loss": 1.6926, "step": 53900 }, { "epoch": 2.1001050052502626, "grad_norm": 1.930901050567627, "learning_rate": 3.958110898959592e-05, "loss": 1.4915, "step": 54000 }, { "epoch": 2.1001050052502626, "eval_accuracy": 0.5115, "eval_f1": 0.4951575531238473, "eval_loss": 1.2896674871444702, "eval_precision": 0.5184965712943675, "eval_recall": 0.5115, "eval_runtime": 6921.8149, "eval_samples_per_second": 26.005, "eval_steps_per_second": 0.406, "step": 54000 }, { "epoch": 2.1039940885933186, "grad_norm": 4.779944896697998, "learning_rate": 3.956162568678642e-05, "loss": 1.3715, "step": 54100 }, { "epoch": 2.1078831719363746, "grad_norm": 1.3874750137329102, "learning_rate": 3.954214238397693e-05, "loss": 1.5466, "step": 54200 }, { "epoch": 2.1117722552794307, "grad_norm": 1.971626877784729, "learning_rate": 3.952265908116744e-05, "loss": 1.4555, "step": 54300 }, { "epoch": 2.1156613386224867, "grad_norm": 2.4656877517700195, "learning_rate": 3.9503175778357945e-05, "loss": 1.4496, "step": 54400 }, { "epoch": 2.1195504219655428, "grad_norm": 2.099677562713623, "learning_rate": 3.9483692475548455e-05, "loss": 1.4663, "step": 54500 }, { "epoch": 2.123439505308599, "grad_norm": 4.3085432052612305, "learning_rate": 3.9464209172738965e-05, "loss": 1.4939, "step": 54600 }, { "epoch": 2.127328588651655, "grad_norm": 2.331584930419922, "learning_rate": 3.9444725869929475e-05, "loss": 1.4806, "step": 54700 }, { "epoch": 2.131217671994711, "grad_norm": 6.540185451507568, "learning_rate": 3.942524256711998e-05, "loss": 1.4891, "step": 54800 }, { "epoch": 2.135106755337767, "grad_norm": 3.610234260559082, "learning_rate": 3.940575926431049e-05, "loss": 1.432, "step": 54900 }, { "epoch": 2.138995838680823, "grad_norm": 2.0148210525512695, "learning_rate": 3.938647079452909e-05, "loss": 1.482, "step": 55000 }, { "epoch": 2.138995838680823, "eval_accuracy": 0.5137611111111111, "eval_f1": 0.502368485149073, "eval_loss": 1.2792205810546875, "eval_precision": 0.5219131247895477, "eval_recall": 0.5137611111111111, "eval_runtime": 6915.2031, "eval_samples_per_second": 26.03, "eval_steps_per_second": 0.407, "step": 55000 }, { "epoch": 2.142884922023879, "grad_norm": 1.5455659627914429, "learning_rate": 3.9366987491719596e-05, "loss": 1.5648, "step": 55100 }, { "epoch": 2.146774005366935, "grad_norm": 1.5735197067260742, "learning_rate": 3.9347504188910106e-05, "loss": 1.5511, "step": 55200 }, { "epoch": 2.150663088709991, "grad_norm": 5.023078441619873, "learning_rate": 3.9328020886100616e-05, "loss": 1.4263, "step": 55300 }, { "epoch": 2.154552172053047, "grad_norm": 1.7076622247695923, "learning_rate": 3.930853758329112e-05, "loss": 1.5976, "step": 55400 }, { "epoch": 2.158441255396103, "grad_norm": 2.378161668777466, "learning_rate": 3.928905428048163e-05, "loss": 1.4935, "step": 55500 }, { "epoch": 2.1623303387391593, "grad_norm": 1.0609374046325684, "learning_rate": 3.926957097767214e-05, "loss": 1.4879, "step": 55600 }, { "epoch": 2.1662194220822153, "grad_norm": 2.627474069595337, "learning_rate": 3.925008767486265e-05, "loss": 1.4622, "step": 55700 }, { "epoch": 2.1701085054252713, "grad_norm": 3.122551918029785, "learning_rate": 3.923060437205315e-05, "loss": 1.4824, "step": 55800 }, { "epoch": 2.1739975887683274, "grad_norm": 2.7925078868865967, "learning_rate": 3.921112106924366e-05, "loss": 1.5005, "step": 55900 }, { "epoch": 2.1778866721113834, "grad_norm": 5.574724197387695, "learning_rate": 3.919163776643417e-05, "loss": 1.5485, "step": 56000 }, { "epoch": 2.1778866721113834, "eval_accuracy": 0.51805, "eval_f1": 0.5035770496274456, "eval_loss": 1.278883934020996, "eval_precision": 0.5281524581674084, "eval_recall": 0.51805, "eval_runtime": 6915.1686, "eval_samples_per_second": 26.03, "eval_steps_per_second": 0.407, "step": 56000 }, { "epoch": 2.1817757554544395, "grad_norm": 2.8789098262786865, "learning_rate": 3.9172154463624675e-05, "loss": 1.5265, "step": 56100 }, { "epoch": 2.1856648387974955, "grad_norm": 3.5366103649139404, "learning_rate": 3.915286599384328e-05, "loss": 1.5136, "step": 56200 }, { "epoch": 2.1895539221405516, "grad_norm": 3.1149890422821045, "learning_rate": 3.913338269103379e-05, "loss": 1.3914, "step": 56300 }, { "epoch": 2.1934430054836076, "grad_norm": 2.8636319637298584, "learning_rate": 3.911389938822429e-05, "loss": 1.4717, "step": 56400 }, { "epoch": 2.1973320888266636, "grad_norm": 1.9396897554397583, "learning_rate": 3.90944160854148e-05, "loss": 1.428, "step": 56500 }, { "epoch": 2.2012211721697197, "grad_norm": 2.4849867820739746, "learning_rate": 3.907493278260531e-05, "loss": 1.4055, "step": 56600 }, { "epoch": 2.2051102555127757, "grad_norm": 2.598407506942749, "learning_rate": 3.905544947979582e-05, "loss": 1.5791, "step": 56700 }, { "epoch": 2.2089993388558318, "grad_norm": 6.4984941482543945, "learning_rate": 3.9035966176986325e-05, "loss": 1.5704, "step": 56800 }, { "epoch": 2.212888422198888, "grad_norm": 4.056187152862549, "learning_rate": 3.901648287417683e-05, "loss": 1.453, "step": 56900 }, { "epoch": 2.216777505541944, "grad_norm": 5.150730609893799, "learning_rate": 3.899699957136734e-05, "loss": 1.5636, "step": 57000 }, { "epoch": 2.216777505541944, "eval_accuracy": 0.5150833333333333, "eval_f1": 0.5005228198666992, "eval_loss": 1.2837995290756226, "eval_precision": 0.5256703869896923, "eval_recall": 0.5150833333333332, "eval_runtime": 6912.3348, "eval_samples_per_second": 26.04, "eval_steps_per_second": 0.407, "step": 57000 }, { "epoch": 2.220666588885, "grad_norm": 5.92357873916626, "learning_rate": 3.897751626855785e-05, "loss": 1.5519, "step": 57100 }, { "epoch": 2.224555672228056, "grad_norm": 2.6834442615509033, "learning_rate": 3.895803296574835e-05, "loss": 1.4962, "step": 57200 }, { "epoch": 2.228444755571112, "grad_norm": 3.7636308670043945, "learning_rate": 3.893854966293886e-05, "loss": 1.5937, "step": 57300 }, { "epoch": 2.232333838914168, "grad_norm": 2.5705254077911377, "learning_rate": 3.891906636012937e-05, "loss": 1.4348, "step": 57400 }, { "epoch": 2.236222922257224, "grad_norm": 1.9181768894195557, "learning_rate": 3.8899583057319874e-05, "loss": 1.4403, "step": 57500 }, { "epoch": 2.24011200560028, "grad_norm": 2.5499064922332764, "learning_rate": 3.8880099754510384e-05, "loss": 1.4861, "step": 57600 }, { "epoch": 2.244001088943336, "grad_norm": 1.429231882095337, "learning_rate": 3.8860616451700894e-05, "loss": 1.4445, "step": 57700 }, { "epoch": 2.247890172286392, "grad_norm": 2.975102663040161, "learning_rate": 3.8841133148891404e-05, "loss": 1.5084, "step": 57800 }, { "epoch": 2.2517792556294483, "grad_norm": 6.002315521240234, "learning_rate": 3.882164984608191e-05, "loss": 1.5039, "step": 57900 }, { "epoch": 2.2556683389725043, "grad_norm": 1.8137871026992798, "learning_rate": 3.880216654327242e-05, "loss": 1.4106, "step": 58000 }, { "epoch": 2.2556683389725043, "eval_accuracy": 0.5131777777777777, "eval_f1": 0.49201684737850687, "eval_loss": 1.2850462198257446, "eval_precision": 0.5161282592985267, "eval_recall": 0.5131777777777778, "eval_runtime": 6914.2038, "eval_samples_per_second": 26.033, "eval_steps_per_second": 0.407, "step": 58000 }, { "epoch": 2.2595574223155603, "grad_norm": 1.433063268661499, "learning_rate": 3.878268324046293e-05, "loss": 1.4767, "step": 58100 }, { "epoch": 2.2634465056586164, "grad_norm": 2.2618744373321533, "learning_rate": 3.876319993765343e-05, "loss": 1.5252, "step": 58200 }, { "epoch": 2.2673355890016724, "grad_norm": 2.4970383644104004, "learning_rate": 3.874371663484394e-05, "loss": 1.4892, "step": 58300 }, { "epoch": 2.2712246723447285, "grad_norm": 3.234539747238159, "learning_rate": 3.872423333203445e-05, "loss": 1.4994, "step": 58400 }, { "epoch": 2.2751137556877845, "grad_norm": 1.5240565538406372, "learning_rate": 3.870475002922495e-05, "loss": 1.5102, "step": 58500 }, { "epoch": 2.2790028390308406, "grad_norm": 4.139353275299072, "learning_rate": 3.868526672641546e-05, "loss": 1.4605, "step": 58600 }, { "epoch": 2.2828919223738966, "grad_norm": 2.7904891967773438, "learning_rate": 3.866578342360597e-05, "loss": 1.4213, "step": 58700 }, { "epoch": 2.2867810057169526, "grad_norm": 2.8196616172790527, "learning_rate": 3.864630012079648e-05, "loss": 1.533, "step": 58800 }, { "epoch": 2.2906700890600087, "grad_norm": 1.0799874067306519, "learning_rate": 3.8626816817986986e-05, "loss": 1.4468, "step": 58900 }, { "epoch": 2.2945591724030647, "grad_norm": 3.041388511657715, "learning_rate": 3.8607333515177496e-05, "loss": 1.4449, "step": 59000 }, { "epoch": 2.2945591724030647, "eval_accuracy": 0.503, "eval_f1": 0.47715803210060787, "eval_loss": 1.3000366687774658, "eval_precision": 0.5147462857098445, "eval_recall": 0.503, "eval_runtime": 6915.4652, "eval_samples_per_second": 26.029, "eval_steps_per_second": 0.407, "step": 59000 }, { "epoch": 2.2984482557461208, "grad_norm": 5.764691352844238, "learning_rate": 3.8587850212368006e-05, "loss": 1.485, "step": 59100 }, { "epoch": 2.302337339089177, "grad_norm": 3.296891927719116, "learning_rate": 3.856836690955851e-05, "loss": 1.5378, "step": 59200 }, { "epoch": 2.306226422432233, "grad_norm": 3.443152666091919, "learning_rate": 3.854888360674902e-05, "loss": 1.4843, "step": 59300 }, { "epoch": 2.310115505775289, "grad_norm": 2.508540391921997, "learning_rate": 3.852940030393953e-05, "loss": 1.6216, "step": 59400 }, { "epoch": 2.314004589118345, "grad_norm": 4.305025577545166, "learning_rate": 3.850991700113004e-05, "loss": 1.3884, "step": 59500 }, { "epoch": 2.317893672461401, "grad_norm": 1.861815333366394, "learning_rate": 3.849043369832054e-05, "loss": 1.4954, "step": 59600 }, { "epoch": 2.321782755804457, "grad_norm": 1.7361329793930054, "learning_rate": 3.8470950395511045e-05, "loss": 1.5577, "step": 59700 }, { "epoch": 2.325671839147513, "grad_norm": 1.8300331830978394, "learning_rate": 3.8451467092701555e-05, "loss": 1.3964, "step": 59800 }, { "epoch": 2.329560922490569, "grad_norm": 1.4046690464019775, "learning_rate": 3.8431983789892065e-05, "loss": 1.5418, "step": 59900 }, { "epoch": 2.333450005833625, "grad_norm": 2.8141868114471436, "learning_rate": 3.841250048708257e-05, "loss": 1.4786, "step": 60000 }, { "epoch": 2.333450005833625, "eval_accuracy": 0.5203111111111111, "eval_f1": 0.5043213687557786, "eval_loss": 1.2671455144882202, "eval_precision": 0.543184670297051, "eval_recall": 0.5203111111111111, "eval_runtime": 6915.472, "eval_samples_per_second": 26.029, "eval_steps_per_second": 0.407, "step": 60000 }, { "epoch": 2.337339089176681, "grad_norm": 0.6962966918945312, "learning_rate": 3.839301718427308e-05, "loss": 1.4855, "step": 60100 }, { "epoch": 2.3412281725197372, "grad_norm": 6.413865566253662, "learning_rate": 3.837353388146359e-05, "loss": 1.4748, "step": 60200 }, { "epoch": 2.3451172558627933, "grad_norm": 14.765268325805664, "learning_rate": 3.8354245411682186e-05, "loss": 1.5982, "step": 60300 }, { "epoch": 2.3490063392058493, "grad_norm": 2.275315761566162, "learning_rate": 3.8334762108872696e-05, "loss": 1.3955, "step": 60400 }, { "epoch": 2.3528954225489054, "grad_norm": 2.4248054027557373, "learning_rate": 3.8315278806063206e-05, "loss": 1.5263, "step": 60500 }, { "epoch": 2.3567845058919614, "grad_norm": 4.580677032470703, "learning_rate": 3.829579550325371e-05, "loss": 1.394, "step": 60600 }, { "epoch": 2.3606735892350175, "grad_norm": 1.8957401514053345, "learning_rate": 3.827631220044422e-05, "loss": 1.4933, "step": 60700 }, { "epoch": 2.3645626725780735, "grad_norm": 2.3226609230041504, "learning_rate": 3.825682889763473e-05, "loss": 1.4325, "step": 60800 }, { "epoch": 2.3684517559211296, "grad_norm": 3.2334964275360107, "learning_rate": 3.823734559482524e-05, "loss": 1.5103, "step": 60900 }, { "epoch": 2.3723408392641856, "grad_norm": 4.6108503341674805, "learning_rate": 3.821786229201574e-05, "loss": 1.4684, "step": 61000 }, { "epoch": 2.3723408392641856, "eval_accuracy": 0.5206444444444445, "eval_f1": 0.509108863224646, "eval_loss": 1.267065405845642, "eval_precision": 0.5356476096377615, "eval_recall": 0.5206444444444444, "eval_runtime": 6847.3974, "eval_samples_per_second": 26.287, "eval_steps_per_second": 0.411, "step": 61000 }, { "epoch": 2.3762299226072416, "grad_norm": 1.4568179845809937, "learning_rate": 3.819837898920625e-05, "loss": 1.5268, "step": 61100 }, { "epoch": 2.3801190059502977, "grad_norm": 3.366525888442993, "learning_rate": 3.817889568639676e-05, "loss": 1.4376, "step": 61200 }, { "epoch": 2.3840080892933537, "grad_norm": 2.341616630554199, "learning_rate": 3.8159412383587265e-05, "loss": 1.4716, "step": 61300 }, { "epoch": 2.3878971726364098, "grad_norm": 2.8608622550964355, "learning_rate": 3.8139929080777775e-05, "loss": 1.4848, "step": 61400 }, { "epoch": 2.391786255979466, "grad_norm": 1.5050898790359497, "learning_rate": 3.8120445777968285e-05, "loss": 1.4234, "step": 61500 }, { "epoch": 2.395675339322522, "grad_norm": 1.4847673177719116, "learning_rate": 3.8100962475158794e-05, "loss": 1.4391, "step": 61600 }, { "epoch": 2.399564422665578, "grad_norm": 3.393547296524048, "learning_rate": 3.80814791723493e-05, "loss": 1.5327, "step": 61700 }, { "epoch": 2.403453506008634, "grad_norm": 1.623116374015808, "learning_rate": 3.806199586953981e-05, "loss": 1.4757, "step": 61800 }, { "epoch": 2.40734258935169, "grad_norm": 2.493985176086426, "learning_rate": 3.804251256673032e-05, "loss": 1.4125, "step": 61900 }, { "epoch": 2.411231672694746, "grad_norm": 4.183272361755371, "learning_rate": 3.802302926392082e-05, "loss": 1.4268, "step": 62000 }, { "epoch": 2.411231672694746, "eval_accuracy": 0.5223, "eval_f1": 0.5089101994626068, "eval_loss": 1.265812635421753, "eval_precision": 0.526864793298291, "eval_recall": 0.5223, "eval_runtime": 6821.7035, "eval_samples_per_second": 26.386, "eval_steps_per_second": 0.412, "step": 62000 }, { "epoch": 2.415120756037802, "grad_norm": 3.4213192462921143, "learning_rate": 3.800354596111133e-05, "loss": 1.5395, "step": 62100 }, { "epoch": 2.419009839380858, "grad_norm": 2.0048041343688965, "learning_rate": 3.798406265830184e-05, "loss": 1.5042, "step": 62200 }, { "epoch": 2.422898922723914, "grad_norm": 2.6902973651885986, "learning_rate": 3.796477418852044e-05, "loss": 1.5023, "step": 62300 }, { "epoch": 2.42678800606697, "grad_norm": 1.3102980852127075, "learning_rate": 3.794529088571095e-05, "loss": 1.5001, "step": 62400 }, { "epoch": 2.4306770894100262, "grad_norm": 2.556906223297119, "learning_rate": 3.792580758290146e-05, "loss": 1.5348, "step": 62500 }, { "epoch": 2.4345661727530823, "grad_norm": 5.616855144500732, "learning_rate": 3.790632428009197e-05, "loss": 1.4379, "step": 62600 }, { "epoch": 2.4384552560961383, "grad_norm": 1.6795768737792969, "learning_rate": 3.788684097728247e-05, "loss": 1.4531, "step": 62700 }, { "epoch": 2.4423443394391944, "grad_norm": 2.739314079284668, "learning_rate": 3.786735767447298e-05, "loss": 1.5667, "step": 62800 }, { "epoch": 2.4462334227822504, "grad_norm": 2.2297704219818115, "learning_rate": 3.784787437166349e-05, "loss": 1.6512, "step": 62900 }, { "epoch": 2.4501225061253065, "grad_norm": 1.220768690109253, "learning_rate": 3.7828391068853994e-05, "loss": 1.4774, "step": 63000 }, { "epoch": 2.4501225061253065, "eval_accuracy": 0.5295833333333333, "eval_f1": 0.5181107913566995, "eval_loss": 1.2523529529571533, "eval_precision": 0.5370875381412643, "eval_recall": 0.5295833333333333, "eval_runtime": 6864.8441, "eval_samples_per_second": 26.221, "eval_steps_per_second": 0.41, "step": 63000 }, { "epoch": 2.4540115894683625, "grad_norm": 4.039364814758301, "learning_rate": 3.78089077660445e-05, "loss": 1.4855, "step": 63100 }, { "epoch": 2.4579006728114186, "grad_norm": 5.02944803237915, "learning_rate": 3.778942446323501e-05, "loss": 1.592, "step": 63200 }, { "epoch": 2.4617897561544746, "grad_norm": 6.020185947418213, "learning_rate": 3.776994116042552e-05, "loss": 1.5211, "step": 63300 }, { "epoch": 2.4656788394975306, "grad_norm": 3.12703800201416, "learning_rate": 3.775045785761602e-05, "loss": 1.5238, "step": 63400 }, { "epoch": 2.4695679228405867, "grad_norm": 1.0247185230255127, "learning_rate": 3.773097455480653e-05, "loss": 1.4766, "step": 63500 }, { "epoch": 2.4734570061836427, "grad_norm": 9.523953437805176, "learning_rate": 3.771149125199704e-05, "loss": 1.4593, "step": 63600 }, { "epoch": 2.4773460895266988, "grad_norm": 1.6876049041748047, "learning_rate": 3.769200794918754e-05, "loss": 1.5314, "step": 63700 }, { "epoch": 2.481235172869755, "grad_norm": 4.099940776824951, "learning_rate": 3.767252464637805e-05, "loss": 1.507, "step": 63800 }, { "epoch": 2.485124256212811, "grad_norm": 3.023209810256958, "learning_rate": 3.765304134356856e-05, "loss": 1.5503, "step": 63900 }, { "epoch": 2.489013339555867, "grad_norm": 2.473200798034668, "learning_rate": 3.763355804075907e-05, "loss": 1.4325, "step": 64000 }, { "epoch": 2.489013339555867, "eval_accuracy": 0.5201555555555556, "eval_f1": 0.5058755895521055, "eval_loss": 1.2673362493515015, "eval_precision": 0.5249835870001732, "eval_recall": 0.5201555555555556, "eval_runtime": 6914.4039, "eval_samples_per_second": 26.033, "eval_steps_per_second": 0.407, "step": 64000 }, { "epoch": 2.492902422898923, "grad_norm": 2.9715492725372314, "learning_rate": 3.7614074737949576e-05, "loss": 1.4808, "step": 64100 }, { "epoch": 2.496791506241979, "grad_norm": 2.040937662124634, "learning_rate": 3.7594591435140086e-05, "loss": 1.4977, "step": 64200 }, { "epoch": 2.500680589585035, "grad_norm": 2.1972007751464844, "learning_rate": 3.7575108132330596e-05, "loss": 1.3918, "step": 64300 }, { "epoch": 2.504569672928091, "grad_norm": 4.0868096351623535, "learning_rate": 3.75556248295211e-05, "loss": 1.505, "step": 64400 }, { "epoch": 2.508458756271147, "grad_norm": 1.2922204732894897, "learning_rate": 3.753614152671161e-05, "loss": 1.5154, "step": 64500 }, { "epoch": 2.512347839614203, "grad_norm": 3.5834853649139404, "learning_rate": 3.7516853056930214e-05, "loss": 1.436, "step": 64600 }, { "epoch": 2.516236922957259, "grad_norm": 1.885155200958252, "learning_rate": 3.749736975412072e-05, "loss": 1.4043, "step": 64700 }, { "epoch": 2.5201260063003152, "grad_norm": 1.623671531677246, "learning_rate": 3.747788645131123e-05, "loss": 1.4962, "step": 64800 }, { "epoch": 2.5240150896433713, "grad_norm": 1.065475583076477, "learning_rate": 3.745840314850174e-05, "loss": 1.5771, "step": 64900 }, { "epoch": 2.5279041729864273, "grad_norm": 2.158216714859009, "learning_rate": 3.743891984569225e-05, "loss": 1.5087, "step": 65000 }, { "epoch": 2.5279041729864273, "eval_accuracy": 0.4971333333333333, "eval_f1": 0.4755216784689531, "eval_loss": 1.3083690404891968, "eval_precision": 0.5249741354207467, "eval_recall": 0.49713333333333337, "eval_runtime": 6915.6381, "eval_samples_per_second": 26.028, "eval_steps_per_second": 0.407, "step": 65000 }, { "epoch": 2.5317932563294834, "grad_norm": 4.420532703399658, "learning_rate": 3.741943654288275e-05, "loss": 1.3467, "step": 65100 }, { "epoch": 2.5356823396725394, "grad_norm": 4.243893146514893, "learning_rate": 3.739995324007326e-05, "loss": 1.5014, "step": 65200 }, { "epoch": 2.5395714230155955, "grad_norm": 2.7769765853881836, "learning_rate": 3.738046993726377e-05, "loss": 1.4773, "step": 65300 }, { "epoch": 2.5434605063586515, "grad_norm": 2.0368502140045166, "learning_rate": 3.736098663445427e-05, "loss": 1.4853, "step": 65400 }, { "epoch": 2.5473495897017076, "grad_norm": 1.5213009119033813, "learning_rate": 3.734150333164478e-05, "loss": 1.6136, "step": 65500 }, { "epoch": 2.5512386730447636, "grad_norm": 1.3371280431747437, "learning_rate": 3.732202002883529e-05, "loss": 1.5857, "step": 65600 }, { "epoch": 2.5551277563878196, "grad_norm": 2.005507230758667, "learning_rate": 3.73025367260258e-05, "loss": 1.4517, "step": 65700 }, { "epoch": 2.5590168397308757, "grad_norm": 1.4904299974441528, "learning_rate": 3.7283053423216306e-05, "loss": 1.441, "step": 65800 }, { "epoch": 2.5629059230739317, "grad_norm": 1.5665432214736938, "learning_rate": 3.7263570120406816e-05, "loss": 1.4534, "step": 65900 }, { "epoch": 2.5667950064169878, "grad_norm": 4.096823692321777, "learning_rate": 3.7244086817597326e-05, "loss": 1.4453, "step": 66000 }, { "epoch": 2.5667950064169878, "eval_accuracy": 0.5123277777777778, "eval_f1": 0.5017360381589772, "eval_loss": 1.285812497138977, "eval_precision": 0.5275709232971476, "eval_recall": 0.5123277777777778, "eval_runtime": 6912.5267, "eval_samples_per_second": 26.04, "eval_steps_per_second": 0.407, "step": 66000 }, { "epoch": 2.570684089760044, "grad_norm": 3.4297842979431152, "learning_rate": 3.7224798347815924e-05, "loss": 1.5087, "step": 66100 }, { "epoch": 2.5745731731031, "grad_norm": 4.533112049102783, "learning_rate": 3.7205315045006434e-05, "loss": 1.4309, "step": 66200 }, { "epoch": 2.578462256446156, "grad_norm": 2.5619709491729736, "learning_rate": 3.7185831742196944e-05, "loss": 1.4939, "step": 66300 }, { "epoch": 2.582351339789212, "grad_norm": 4.829930305480957, "learning_rate": 3.716634843938745e-05, "loss": 1.5231, "step": 66400 }, { "epoch": 2.586240423132268, "grad_norm": 3.2137644290924072, "learning_rate": 3.714686513657795e-05, "loss": 1.5625, "step": 66500 }, { "epoch": 2.590129506475324, "grad_norm": 1.2220337390899658, "learning_rate": 3.712738183376846e-05, "loss": 1.4393, "step": 66600 }, { "epoch": 2.59401858981838, "grad_norm": 3.033386468887329, "learning_rate": 3.710789853095897e-05, "loss": 1.4684, "step": 66700 }, { "epoch": 2.597907673161436, "grad_norm": 1.2126824855804443, "learning_rate": 3.708841522814947e-05, "loss": 1.5108, "step": 66800 }, { "epoch": 2.601796756504492, "grad_norm": 2.6969711780548096, "learning_rate": 3.706893192533998e-05, "loss": 1.4643, "step": 66900 }, { "epoch": 2.605685839847548, "grad_norm": 4.320705413818359, "learning_rate": 3.704944862253049e-05, "loss": 1.476, "step": 67000 }, { "epoch": 2.605685839847548, "eval_accuracy": 0.5233277777777777, "eval_f1": 0.5089441086696971, "eval_loss": 1.2625819444656372, "eval_precision": 0.5223223186793178, "eval_recall": 0.5233277777777777, "eval_runtime": 6910.1514, "eval_samples_per_second": 26.049, "eval_steps_per_second": 0.407, "step": 67000 }, { "epoch": 2.6095749231906042, "grad_norm": 3.7080917358398438, "learning_rate": 3.7029965319721e-05, "loss": 1.4147, "step": 67100 }, { "epoch": 2.6134640065336603, "grad_norm": 2.8422083854675293, "learning_rate": 3.7010482016911506e-05, "loss": 1.5807, "step": 67200 }, { "epoch": 2.6173530898767163, "grad_norm": 4.01373291015625, "learning_rate": 3.6990998714102016e-05, "loss": 1.5373, "step": 67300 }, { "epoch": 2.6212421732197724, "grad_norm": 2.32670521736145, "learning_rate": 3.6971515411292525e-05, "loss": 1.5223, "step": 67400 }, { "epoch": 2.625131256562828, "grad_norm": 2.5643904209136963, "learning_rate": 3.695203210848303e-05, "loss": 1.3992, "step": 67500 }, { "epoch": 2.629020339905884, "grad_norm": 2.5861825942993164, "learning_rate": 3.693254880567354e-05, "loss": 1.5374, "step": 67600 }, { "epoch": 2.63290942324894, "grad_norm": 1.5305594205856323, "learning_rate": 3.691306550286405e-05, "loss": 1.5272, "step": 67700 }, { "epoch": 2.636798506591996, "grad_norm": 4.329915523529053, "learning_rate": 3.689358220005456e-05, "loss": 1.5243, "step": 67800 }, { "epoch": 2.640687589935052, "grad_norm": 6.374677658081055, "learning_rate": 3.687409889724506e-05, "loss": 1.4841, "step": 67900 }, { "epoch": 2.644576673278108, "grad_norm": 1.4117354154586792, "learning_rate": 3.685461559443557e-05, "loss": 1.4795, "step": 68000 }, { "epoch": 2.644576673278108, "eval_accuracy": 0.5158722222222222, "eval_f1": 0.4971760350687801, "eval_loss": 1.277724027633667, "eval_precision": 0.5278084295464277, "eval_recall": 0.5158722222222222, "eval_runtime": 6910.4506, "eval_samples_per_second": 26.048, "eval_steps_per_second": 0.407, "step": 68000 }, { "epoch": 2.6484657566211642, "grad_norm": 3.2912166118621826, "learning_rate": 3.683513229162608e-05, "loss": 1.5414, "step": 68100 }, { "epoch": 2.6523548399642203, "grad_norm": 1.7500919103622437, "learning_rate": 3.6815648988816584e-05, "loss": 1.4294, "step": 68200 }, { "epoch": 2.6562439233072763, "grad_norm": 2.303539514541626, "learning_rate": 3.6796165686007094e-05, "loss": 1.5453, "step": 68300 }, { "epoch": 2.6601330066503324, "grad_norm": 4.3748393058776855, "learning_rate": 3.6776682383197604e-05, "loss": 1.43, "step": 68400 }, { "epoch": 2.6640220899933884, "grad_norm": 3.3885514736175537, "learning_rate": 3.675719908038811e-05, "loss": 1.4867, "step": 68500 }, { "epoch": 2.6679111733364445, "grad_norm": 1.9170160293579102, "learning_rate": 3.673771577757862e-05, "loss": 1.5454, "step": 68600 }, { "epoch": 2.6718002566795005, "grad_norm": 2.2803707122802734, "learning_rate": 3.671823247476913e-05, "loss": 1.4648, "step": 68700 }, { "epoch": 2.6756893400225565, "grad_norm": 1.8329061269760132, "learning_rate": 3.669874917195964e-05, "loss": 1.4319, "step": 68800 }, { "epoch": 2.6795784233656126, "grad_norm": 1.64739191532135, "learning_rate": 3.667926586915014e-05, "loss": 1.4614, "step": 68900 }, { "epoch": 2.6834675067086686, "grad_norm": 4.172203063964844, "learning_rate": 3.665978256634065e-05, "loss": 1.4468, "step": 69000 }, { "epoch": 2.6834675067086686, "eval_accuracy": 0.5299222222222222, "eval_f1": 0.5125979621612252, "eval_loss": 1.250376582145691, "eval_precision": 0.5283470641996468, "eval_recall": 0.5299222222222223, "eval_runtime": 6912.3065, "eval_samples_per_second": 26.041, "eval_steps_per_second": 0.407, "step": 69000 }, { "epoch": 2.6873565900517247, "grad_norm": 4.025543212890625, "learning_rate": 3.664029926353116e-05, "loss": 1.4327, "step": 69100 }, { "epoch": 2.6912456733947807, "grad_norm": 1.769545555114746, "learning_rate": 3.662081596072166e-05, "loss": 1.5304, "step": 69200 }, { "epoch": 2.6951347567378368, "grad_norm": 5.884199619293213, "learning_rate": 3.6601332657912166e-05, "loss": 1.4926, "step": 69300 }, { "epoch": 2.699023840080893, "grad_norm": 10.459078788757324, "learning_rate": 3.6581849355102676e-05, "loss": 1.5439, "step": 69400 }, { "epoch": 2.702912923423949, "grad_norm": 3.0361380577087402, "learning_rate": 3.6562366052293186e-05, "loss": 1.4369, "step": 69500 }, { "epoch": 2.706802006767005, "grad_norm": 1.473408579826355, "learning_rate": 3.654288274948369e-05, "loss": 1.4099, "step": 69600 }, { "epoch": 2.710691090110061, "grad_norm": 6.6853132247924805, "learning_rate": 3.65233994466742e-05, "loss": 1.4852, "step": 69700 }, { "epoch": 2.714580173453117, "grad_norm": 2.988797664642334, "learning_rate": 3.650391614386471e-05, "loss": 1.4145, "step": 69800 }, { "epoch": 2.718469256796173, "grad_norm": 1.0234298706054688, "learning_rate": 3.648443284105522e-05, "loss": 1.4687, "step": 69900 }, { "epoch": 2.722358340139229, "grad_norm": 3.9906296730041504, "learning_rate": 3.646494953824572e-05, "loss": 1.4137, "step": 70000 }, { "epoch": 2.722358340139229, "eval_accuracy": 0.52895, "eval_f1": 0.5176008892335311, "eval_loss": 1.2510871887207031, "eval_precision": 0.5377407783213627, "eval_recall": 0.5289499999999999, "eval_runtime": 6909.8408, "eval_samples_per_second": 26.05, "eval_steps_per_second": 0.407, "step": 70000 }, { "epoch": 2.726247423482285, "grad_norm": 2.316157817840576, "learning_rate": 3.644566106846433e-05, "loss": 1.4555, "step": 70100 }, { "epoch": 2.730136506825341, "grad_norm": 3.6121482849121094, "learning_rate": 3.642617776565484e-05, "loss": 1.4374, "step": 70200 }, { "epoch": 2.734025590168397, "grad_norm": 1.4393208026885986, "learning_rate": 3.640669446284534e-05, "loss": 1.3998, "step": 70300 }, { "epoch": 2.7379146735114532, "grad_norm": 4.360203742980957, "learning_rate": 3.638721116003585e-05, "loss": 1.4522, "step": 70400 }, { "epoch": 2.7418037568545093, "grad_norm": 2.1226320266723633, "learning_rate": 3.636772785722636e-05, "loss": 1.4271, "step": 70500 }, { "epoch": 2.7456928401975653, "grad_norm": 2.5116186141967773, "learning_rate": 3.634824455441686e-05, "loss": 1.4454, "step": 70600 }, { "epoch": 2.7495819235406214, "grad_norm": 5.584084510803223, "learning_rate": 3.632876125160737e-05, "loss": 1.5994, "step": 70700 }, { "epoch": 2.7534710068836774, "grad_norm": 3.4446094036102295, "learning_rate": 3.630927794879788e-05, "loss": 1.4979, "step": 70800 }, { "epoch": 2.7573600902267335, "grad_norm": 2.8625946044921875, "learning_rate": 3.628979464598839e-05, "loss": 1.4353, "step": 70900 }, { "epoch": 2.7612491735697895, "grad_norm": 2.3226401805877686, "learning_rate": 3.6270311343178896e-05, "loss": 1.5105, "step": 71000 }, { "epoch": 2.7612491735697895, "eval_accuracy": 0.53835, "eval_f1": 0.5298006010927822, "eval_loss": 1.234232783317566, "eval_precision": 0.5430185052889823, "eval_recall": 0.53835, "eval_runtime": 6911.2977, "eval_samples_per_second": 26.044, "eval_steps_per_second": 0.407, "step": 71000 }, { "epoch": 2.7651382569128455, "grad_norm": 3.718851089477539, "learning_rate": 3.6250828040369406e-05, "loss": 1.4768, "step": 71100 }, { "epoch": 2.7690273402559016, "grad_norm": 1.5269882678985596, "learning_rate": 3.6231344737559916e-05, "loss": 1.398, "step": 71200 }, { "epoch": 2.7729164235989576, "grad_norm": 3.2476511001586914, "learning_rate": 3.621186143475042e-05, "loss": 1.5542, "step": 71300 }, { "epoch": 2.7768055069420137, "grad_norm": 3.7317845821380615, "learning_rate": 3.619237813194093e-05, "loss": 1.4119, "step": 71400 }, { "epoch": 2.7806945902850697, "grad_norm": 2.310847759246826, "learning_rate": 3.617289482913144e-05, "loss": 1.4975, "step": 71500 }, { "epoch": 2.7845836736281258, "grad_norm": 5.3131327629089355, "learning_rate": 3.615341152632195e-05, "loss": 1.3982, "step": 71600 }, { "epoch": 2.788472756971182, "grad_norm": 1.672605037689209, "learning_rate": 3.613392822351245e-05, "loss": 1.6385, "step": 71700 }, { "epoch": 2.792361840314238, "grad_norm": 3.4212090969085693, "learning_rate": 3.611444492070296e-05, "loss": 1.5813, "step": 71800 }, { "epoch": 2.796250923657294, "grad_norm": 2.389017343521118, "learning_rate": 3.609496161789347e-05, "loss": 1.4966, "step": 71900 }, { "epoch": 2.80014000700035, "grad_norm": 3.3344855308532715, "learning_rate": 3.6075478315083975e-05, "loss": 1.4906, "step": 72000 }, { "epoch": 2.80014000700035, "eval_accuracy": 0.5271055555555556, "eval_f1": 0.5137478989155827, "eval_loss": 1.2549831867218018, "eval_precision": 0.5294872099263003, "eval_recall": 0.5271055555555555, "eval_runtime": 6910.989, "eval_samples_per_second": 26.045, "eval_steps_per_second": 0.407, "step": 72000 }, { "epoch": 2.804029090343406, "grad_norm": 3.995737314224243, "learning_rate": 3.6055995012274485e-05, "loss": 1.5134, "step": 72100 }, { "epoch": 2.807918173686462, "grad_norm": 2.3223719596862793, "learning_rate": 3.6036511709464994e-05, "loss": 1.4361, "step": 72200 }, { "epoch": 2.811807257029518, "grad_norm": 9.234646797180176, "learning_rate": 3.601722323968359e-05, "loss": 1.4786, "step": 72300 }, { "epoch": 2.815696340372574, "grad_norm": 1.7271496057510376, "learning_rate": 3.59977399368741e-05, "loss": 1.5204, "step": 72400 }, { "epoch": 2.81958542371563, "grad_norm": 3.63987135887146, "learning_rate": 3.597825663406461e-05, "loss": 1.4272, "step": 72500 }, { "epoch": 2.823474507058686, "grad_norm": 2.1597232818603516, "learning_rate": 3.5958773331255116e-05, "loss": 1.53, "step": 72600 }, { "epoch": 2.8273635904017422, "grad_norm": 1.8460849523544312, "learning_rate": 3.593929002844562e-05, "loss": 1.412, "step": 72700 }, { "epoch": 2.8312526737447983, "grad_norm": 3.5416347980499268, "learning_rate": 3.591980672563613e-05, "loss": 1.4609, "step": 72800 }, { "epoch": 2.8351417570878543, "grad_norm": 2.8623690605163574, "learning_rate": 3.590032342282664e-05, "loss": 1.3971, "step": 72900 }, { "epoch": 2.8390308404309104, "grad_norm": 2.7609617710113525, "learning_rate": 3.588084012001715e-05, "loss": 1.4464, "step": 73000 }, { "epoch": 2.8390308404309104, "eval_accuracy": 0.5272944444444444, "eval_f1": 0.5117906417303691, "eval_loss": 1.251205563545227, "eval_precision": 0.5384000887561028, "eval_recall": 0.5272944444444444, "eval_runtime": 6908.2856, "eval_samples_per_second": 26.056, "eval_steps_per_second": 0.407, "step": 73000 }, { "epoch": 2.8429199237739664, "grad_norm": 1.230092167854309, "learning_rate": 3.586135681720765e-05, "loss": 1.4644, "step": 73100 }, { "epoch": 2.8468090071170224, "grad_norm": 2.3442318439483643, "learning_rate": 3.584187351439816e-05, "loss": 1.5019, "step": 73200 }, { "epoch": 2.8506980904600785, "grad_norm": 2.150620937347412, "learning_rate": 3.582239021158867e-05, "loss": 1.4328, "step": 73300 }, { "epoch": 2.8545871738031345, "grad_norm": 1.382699966430664, "learning_rate": 3.5802906908779175e-05, "loss": 1.4984, "step": 73400 }, { "epoch": 2.8584762571461906, "grad_norm": 1.5719258785247803, "learning_rate": 3.5783423605969684e-05, "loss": 1.5228, "step": 73500 }, { "epoch": 2.8623653404892466, "grad_norm": 3.59023118019104, "learning_rate": 3.5763940303160194e-05, "loss": 1.5691, "step": 73600 }, { "epoch": 2.8662544238323027, "grad_norm": 1.9726593494415283, "learning_rate": 3.57444570003507e-05, "loss": 1.4259, "step": 73700 }, { "epoch": 2.8701435071753587, "grad_norm": 2.544160842895508, "learning_rate": 3.572497369754121e-05, "loss": 1.4794, "step": 73800 }, { "epoch": 2.8740325905184148, "grad_norm": 2.0282540321350098, "learning_rate": 3.570549039473172e-05, "loss": 1.4067, "step": 73900 }, { "epoch": 2.877921673861471, "grad_norm": 1.6904420852661133, "learning_rate": 3.568600709192223e-05, "loss": 1.6306, "step": 74000 }, { "epoch": 2.877921673861471, "eval_accuracy": 0.5300055555555555, "eval_f1": 0.5160154543679175, "eval_loss": 1.2465791702270508, "eval_precision": 0.5319652508751151, "eval_recall": 0.5300055555555555, "eval_runtime": 6908.3392, "eval_samples_per_second": 26.055, "eval_steps_per_second": 0.407, "step": 74000 }, { "epoch": 2.881810757204527, "grad_norm": 1.719482421875, "learning_rate": 3.566652378911273e-05, "loss": 1.5683, "step": 74100 }, { "epoch": 2.885699840547583, "grad_norm": 1.0922502279281616, "learning_rate": 3.564704048630324e-05, "loss": 1.4343, "step": 74200 }, { "epoch": 2.889588923890639, "grad_norm": 5.20039701461792, "learning_rate": 3.5627752016521845e-05, "loss": 1.5597, "step": 74300 }, { "epoch": 2.893478007233695, "grad_norm": 3.1543145179748535, "learning_rate": 3.560826871371235e-05, "loss": 1.4995, "step": 74400 }, { "epoch": 2.897367090576751, "grad_norm": 1.8255199193954468, "learning_rate": 3.558878541090286e-05, "loss": 1.5523, "step": 74500 }, { "epoch": 2.901256173919807, "grad_norm": 2.7781059741973877, "learning_rate": 3.556930210809337e-05, "loss": 1.4103, "step": 74600 }, { "epoch": 2.905145257262863, "grad_norm": 2.3760812282562256, "learning_rate": 3.554981880528387e-05, "loss": 1.6, "step": 74700 }, { "epoch": 2.909034340605919, "grad_norm": 2.6225008964538574, "learning_rate": 3.553033550247438e-05, "loss": 1.4248, "step": 74800 }, { "epoch": 2.912923423948975, "grad_norm": 8.386560440063477, "learning_rate": 3.551085219966489e-05, "loss": 1.4498, "step": 74900 }, { "epoch": 2.9168125072920312, "grad_norm": 1.8043631315231323, "learning_rate": 3.54913688968554e-05, "loss": 1.4965, "step": 75000 }, { "epoch": 2.9168125072920312, "eval_accuracy": 0.5222444444444444, "eval_f1": 0.5078181582560581, "eval_loss": 1.2595036029815674, "eval_precision": 0.5358497207405869, "eval_recall": 0.5222444444444444, "eval_runtime": 6911.7993, "eval_samples_per_second": 26.042, "eval_steps_per_second": 0.407, "step": 75000 }, { "epoch": 2.9207015906350873, "grad_norm": 1.2892789840698242, "learning_rate": 3.5471885594045904e-05, "loss": 1.4623, "step": 75100 }, { "epoch": 2.9245906739781433, "grad_norm": 4.191807746887207, "learning_rate": 3.5452402291236414e-05, "loss": 1.4575, "step": 75200 }, { "epoch": 2.9284797573211994, "grad_norm": 1.43287992477417, "learning_rate": 3.5432918988426924e-05, "loss": 1.4894, "step": 75300 }, { "epoch": 2.9323688406642554, "grad_norm": 3.685853958129883, "learning_rate": 3.541343568561743e-05, "loss": 1.3625, "step": 75400 }, { "epoch": 2.9362579240073114, "grad_norm": 4.943828582763672, "learning_rate": 3.539395238280794e-05, "loss": 1.5657, "step": 75500 }, { "epoch": 2.9401470073503675, "grad_norm": 1.1644781827926636, "learning_rate": 3.537446907999845e-05, "loss": 1.526, "step": 75600 }, { "epoch": 2.9440360906934235, "grad_norm": 1.8887754678726196, "learning_rate": 3.535498577718895e-05, "loss": 1.5746, "step": 75700 }, { "epoch": 2.9479251740364796, "grad_norm": 1.2870804071426392, "learning_rate": 3.533550247437945e-05, "loss": 1.4261, "step": 75800 }, { "epoch": 2.9518142573795356, "grad_norm": 5.799267768859863, "learning_rate": 3.531601917156996e-05, "loss": 1.4467, "step": 75900 }, { "epoch": 2.9557033407225917, "grad_norm": 4.901100158691406, "learning_rate": 3.529653586876047e-05, "loss": 1.4079, "step": 76000 }, { "epoch": 2.9557033407225917, "eval_accuracy": 0.5227166666666667, "eval_f1": 0.5091863205742707, "eval_loss": 1.2536406517028809, "eval_precision": 0.5230572200115179, "eval_recall": 0.5227166666666667, "eval_runtime": 6909.3367, "eval_samples_per_second": 26.052, "eval_steps_per_second": 0.407, "step": 76000 }, { "epoch": 2.9595924240656477, "grad_norm": 2.065781354904175, "learning_rate": 3.527705256595098e-05, "loss": 1.3984, "step": 76100 }, { "epoch": 2.9634815074087038, "grad_norm": 3.6431174278259277, "learning_rate": 3.5257569263141486e-05, "loss": 1.4746, "step": 76200 }, { "epoch": 2.96737059075176, "grad_norm": 2.580937147140503, "learning_rate": 3.5238085960331996e-05, "loss": 1.4275, "step": 76300 }, { "epoch": 2.971259674094816, "grad_norm": 4.700362205505371, "learning_rate": 3.52187974905506e-05, "loss": 1.4471, "step": 76400 }, { "epoch": 2.975148757437872, "grad_norm": 3.477555751800537, "learning_rate": 3.5199314187741104e-05, "loss": 1.3964, "step": 76500 }, { "epoch": 2.979037840780928, "grad_norm": 2.57208514213562, "learning_rate": 3.5179830884931614e-05, "loss": 1.5128, "step": 76600 }, { "epoch": 2.982926924123984, "grad_norm": 2.2942159175872803, "learning_rate": 3.5160347582122124e-05, "loss": 1.4181, "step": 76700 }, { "epoch": 2.98681600746704, "grad_norm": 2.8143696784973145, "learning_rate": 3.514086427931263e-05, "loss": 1.4913, "step": 76800 }, { "epoch": 2.990705090810096, "grad_norm": 2.507946014404297, "learning_rate": 3.512138097650314e-05, "loss": 1.4448, "step": 76900 }, { "epoch": 2.994594174153152, "grad_norm": 4.431018352508545, "learning_rate": 3.510189767369365e-05, "loss": 1.448, "step": 77000 }, { "epoch": 2.994594174153152, "eval_accuracy": 0.5230277777777778, "eval_f1": 0.49912731638134405, "eval_loss": 1.2699962854385376, "eval_precision": 0.5295102464960207, "eval_recall": 0.5230277777777778, "eval_runtime": 6911.7011, "eval_samples_per_second": 26.043, "eval_steps_per_second": 0.407, "step": 77000 }, { "epoch": 2.998483257496208, "grad_norm": 3.957139253616333, "learning_rate": 3.508241437088416e-05, "loss": 1.524, "step": 77100 }, { "epoch": 3.002372340839264, "grad_norm": 6.480105400085449, "learning_rate": 3.506293106807466e-05, "loss": 1.4249, "step": 77200 }, { "epoch": 3.0062614241823202, "grad_norm": 5.916323184967041, "learning_rate": 3.504344776526517e-05, "loss": 1.4499, "step": 77300 }, { "epoch": 3.0101505075253763, "grad_norm": 4.3208746910095215, "learning_rate": 3.502396446245568e-05, "loss": 1.6268, "step": 77400 }, { "epoch": 3.0140395908684323, "grad_norm": 2.295536756515503, "learning_rate": 3.500448115964618e-05, "loss": 1.4243, "step": 77500 }, { "epoch": 3.0179286742114884, "grad_norm": 2.495159149169922, "learning_rate": 3.498499785683669e-05, "loss": 1.607, "step": 77600 }, { "epoch": 3.0218177575545444, "grad_norm": 1.9367072582244873, "learning_rate": 3.49655145540272e-05, "loss": 1.4669, "step": 77700 }, { "epoch": 3.0257068408976004, "grad_norm": 2.0649592876434326, "learning_rate": 3.4946031251217706e-05, "loss": 1.3807, "step": 77800 }, { "epoch": 3.0295959242406565, "grad_norm": 2.820770263671875, "learning_rate": 3.4926547948408216e-05, "loss": 1.5512, "step": 77900 }, { "epoch": 3.0334850075837125, "grad_norm": 1.4520803689956665, "learning_rate": 3.4907064645598726e-05, "loss": 1.6561, "step": 78000 }, { "epoch": 3.0334850075837125, "eval_accuracy": 0.5347555555555555, "eval_f1": 0.5200373727116002, "eval_loss": 1.2381483316421509, "eval_precision": 0.5236600350341908, "eval_recall": 0.5347555555555555, "eval_runtime": 6909.6244, "eval_samples_per_second": 26.051, "eval_steps_per_second": 0.407, "step": 78000 }, { "epoch": 3.0373740909267686, "grad_norm": 2.1084792613983154, "learning_rate": 3.4887581342789235e-05, "loss": 1.4722, "step": 78100 }, { "epoch": 3.0412631742698246, "grad_norm": 5.091939449310303, "learning_rate": 3.486809803997974e-05, "loss": 1.4254, "step": 78200 }, { "epoch": 3.0451522576128807, "grad_norm": 1.5510835647583008, "learning_rate": 3.484861473717025e-05, "loss": 1.4545, "step": 78300 }, { "epoch": 3.0490413409559367, "grad_norm": 2.412325859069824, "learning_rate": 3.4829326267388853e-05, "loss": 1.4686, "step": 78400 }, { "epoch": 3.0529304242989928, "grad_norm": 1.5352369546890259, "learning_rate": 3.4809842964579357e-05, "loss": 1.5195, "step": 78500 }, { "epoch": 3.056819507642049, "grad_norm": 4.5553460121154785, "learning_rate": 3.4790359661769866e-05, "loss": 1.3806, "step": 78600 }, { "epoch": 3.060708590985105, "grad_norm": 5.692063808441162, "learning_rate": 3.4770876358960376e-05, "loss": 1.4885, "step": 78700 }, { "epoch": 3.064597674328161, "grad_norm": 7.003379821777344, "learning_rate": 3.475139305615088e-05, "loss": 1.5297, "step": 78800 }, { "epoch": 3.068486757671217, "grad_norm": 3.1653382778167725, "learning_rate": 3.473190975334139e-05, "loss": 1.5036, "step": 78900 }, { "epoch": 3.072375841014273, "grad_norm": 1.9598782062530518, "learning_rate": 3.47124264505319e-05, "loss": 1.5103, "step": 79000 }, { "epoch": 3.072375841014273, "eval_accuracy": 0.5333777777777777, "eval_f1": 0.5216458172684665, "eval_loss": 1.2392634153366089, "eval_precision": 0.5451280359616497, "eval_recall": 0.5333777777777778, "eval_runtime": 6910.4694, "eval_samples_per_second": 26.047, "eval_steps_per_second": 0.407, "step": 79000 }, { "epoch": 3.076264924357329, "grad_norm": 2.5020530223846436, "learning_rate": 3.46929431477224e-05, "loss": 1.4434, "step": 79100 }, { "epoch": 3.080154007700385, "grad_norm": 3.5124645233154297, "learning_rate": 3.4673459844912906e-05, "loss": 1.3787, "step": 79200 }, { "epoch": 3.084043091043441, "grad_norm": 3.7496237754821777, "learning_rate": 3.4653976542103416e-05, "loss": 1.5237, "step": 79300 }, { "epoch": 3.087932174386497, "grad_norm": 2.222031354904175, "learning_rate": 3.4634493239293925e-05, "loss": 1.4525, "step": 79400 }, { "epoch": 3.091821257729553, "grad_norm": 2.167424201965332, "learning_rate": 3.4615009936484435e-05, "loss": 1.4645, "step": 79500 }, { "epoch": 3.0957103410726092, "grad_norm": 3.0269103050231934, "learning_rate": 3.459552663367494e-05, "loss": 1.6045, "step": 79600 }, { "epoch": 3.0995994244156653, "grad_norm": 1.8566100597381592, "learning_rate": 3.457604333086545e-05, "loss": 1.4705, "step": 79700 }, { "epoch": 3.1034885077587213, "grad_norm": 3.1511504650115967, "learning_rate": 3.455656002805596e-05, "loss": 1.4716, "step": 79800 }, { "epoch": 3.1073775911017774, "grad_norm": 4.367226600646973, "learning_rate": 3.453707672524646e-05, "loss": 1.4765, "step": 79900 }, { "epoch": 3.1112666744448334, "grad_norm": 7.672006607055664, "learning_rate": 3.451759342243697e-05, "loss": 1.5148, "step": 80000 }, { "epoch": 3.1112666744448334, "eval_accuracy": 0.5307111111111111, "eval_f1": 0.5090762227986002, "eval_loss": 1.2488751411437988, "eval_precision": 0.5474357850441001, "eval_recall": 0.5307111111111111, "eval_runtime": 6910.374, "eval_samples_per_second": 26.048, "eval_steps_per_second": 0.407, "step": 80000 }, { "epoch": 3.1151557577878894, "grad_norm": 1.5646942853927612, "learning_rate": 3.449811011962748e-05, "loss": 1.4182, "step": 80100 }, { "epoch": 3.1190448411309455, "grad_norm": 7.787454605102539, "learning_rate": 3.447862681681799e-05, "loss": 1.4889, "step": 80200 }, { "epoch": 3.1229339244740015, "grad_norm": 5.97632360458374, "learning_rate": 3.4459143514008494e-05, "loss": 1.4924, "step": 80300 }, { "epoch": 3.1268230078170576, "grad_norm": 2.278153896331787, "learning_rate": 3.44398550442271e-05, "loss": 1.3719, "step": 80400 }, { "epoch": 3.1307120911601136, "grad_norm": 3.6953320503234863, "learning_rate": 3.442037174141761e-05, "loss": 1.5188, "step": 80500 }, { "epoch": 3.1346011745031697, "grad_norm": 3.6085543632507324, "learning_rate": 3.440088843860811e-05, "loss": 1.5156, "step": 80600 }, { "epoch": 3.1384902578462257, "grad_norm": 2.0844812393188477, "learning_rate": 3.438140513579862e-05, "loss": 1.5404, "step": 80700 }, { "epoch": 3.1423793411892817, "grad_norm": 4.745670318603516, "learning_rate": 3.436192183298913e-05, "loss": 1.5536, "step": 80800 }, { "epoch": 3.146268424532338, "grad_norm": 2.3797929286956787, "learning_rate": 3.4342438530179635e-05, "loss": 1.353, "step": 80900 }, { "epoch": 3.150157507875394, "grad_norm": 1.8599907159805298, "learning_rate": 3.4322955227370145e-05, "loss": 1.4129, "step": 81000 }, { "epoch": 3.150157507875394, "eval_accuracy": 0.5378611111111111, "eval_f1": 0.523783360960598, "eval_loss": 1.2319035530090332, "eval_precision": 0.5291547766476672, "eval_recall": 0.5378611111111111, "eval_runtime": 6912.5985, "eval_samples_per_second": 26.039, "eval_steps_per_second": 0.407, "step": 81000 }, { "epoch": 3.15404659121845, "grad_norm": 5.802682876586914, "learning_rate": 3.4303471924560655e-05, "loss": 1.4697, "step": 81100 }, { "epoch": 3.157935674561506, "grad_norm": 2.57497501373291, "learning_rate": 3.4283988621751165e-05, "loss": 1.4276, "step": 81200 }, { "epoch": 3.161824757904562, "grad_norm": 4.213287830352783, "learning_rate": 3.426450531894167e-05, "loss": 1.4693, "step": 81300 }, { "epoch": 3.165713841247618, "grad_norm": 2.8778562545776367, "learning_rate": 3.424502201613218e-05, "loss": 1.4155, "step": 81400 }, { "epoch": 3.169602924590674, "grad_norm": 2.942298650741577, "learning_rate": 3.422553871332269e-05, "loss": 1.4407, "step": 81500 }, { "epoch": 3.17349200793373, "grad_norm": 4.570230960845947, "learning_rate": 3.420605541051319e-05, "loss": 1.6354, "step": 81600 }, { "epoch": 3.177381091276786, "grad_norm": 2.640789747238159, "learning_rate": 3.41865721077037e-05, "loss": 1.4042, "step": 81700 }, { "epoch": 3.181270174619842, "grad_norm": 2.1968119144439697, "learning_rate": 3.416708880489421e-05, "loss": 1.369, "step": 81800 }, { "epoch": 3.1851592579628982, "grad_norm": 1.6662214994430542, "learning_rate": 3.414760550208472e-05, "loss": 1.3394, "step": 81900 }, { "epoch": 3.1890483413059543, "grad_norm": 1.5794618129730225, "learning_rate": 3.4128122199275224e-05, "loss": 1.6654, "step": 82000 }, { "epoch": 3.1890483413059543, "eval_accuracy": 0.5334777777777778, "eval_f1": 0.5165435942420319, "eval_loss": 1.2415425777435303, "eval_precision": 0.537212675776369, "eval_recall": 0.5334777777777777, "eval_runtime": 6912.7376, "eval_samples_per_second": 26.039, "eval_steps_per_second": 0.407, "step": 82000 }, { "epoch": 3.1929374246490103, "grad_norm": 1.5804766416549683, "learning_rate": 3.410863889646573e-05, "loss": 1.5859, "step": 82100 }, { "epoch": 3.1968265079920664, "grad_norm": 1.6711596250534058, "learning_rate": 3.408915559365624e-05, "loss": 1.4225, "step": 82200 }, { "epoch": 3.2007155913351224, "grad_norm": 2.4756999015808105, "learning_rate": 3.406967229084675e-05, "loss": 1.5321, "step": 82300 }, { "epoch": 3.2046046746781784, "grad_norm": 1.3101505041122437, "learning_rate": 3.4050383821065345e-05, "loss": 1.4287, "step": 82400 }, { "epoch": 3.2084937580212345, "grad_norm": 2.612647533416748, "learning_rate": 3.4030900518255855e-05, "loss": 1.5022, "step": 82500 }, { "epoch": 3.2123828413642905, "grad_norm": 2.7441556453704834, "learning_rate": 3.4011417215446365e-05, "loss": 1.3848, "step": 82600 }, { "epoch": 3.2162719247073466, "grad_norm": 6.483684062957764, "learning_rate": 3.399193391263687e-05, "loss": 1.5699, "step": 82700 }, { "epoch": 3.2201610080504026, "grad_norm": 2.391362428665161, "learning_rate": 3.397245060982738e-05, "loss": 1.3818, "step": 82800 }, { "epoch": 3.2240500913934587, "grad_norm": 2.4806442260742188, "learning_rate": 3.395296730701789e-05, "loss": 1.5447, "step": 82900 }, { "epoch": 3.2279391747365147, "grad_norm": 1.6498297452926636, "learning_rate": 3.393348400420839e-05, "loss": 1.4226, "step": 83000 }, { "epoch": 3.2279391747365147, "eval_accuracy": 0.5335777777777778, "eval_f1": 0.5209760571541341, "eval_loss": 1.234297513961792, "eval_precision": 0.5477767190080981, "eval_recall": 0.5335777777777778, "eval_runtime": 6909.3729, "eval_samples_per_second": 26.052, "eval_steps_per_second": 0.407, "step": 83000 }, { "epoch": 3.2318282580795707, "grad_norm": 1.7672158479690552, "learning_rate": 3.39140007013989e-05, "loss": 1.3944, "step": 83100 }, { "epoch": 3.235717341422627, "grad_norm": 1.9855223894119263, "learning_rate": 3.389451739858941e-05, "loss": 1.5197, "step": 83200 }, { "epoch": 3.239606424765683, "grad_norm": 3.5026133060455322, "learning_rate": 3.387503409577992e-05, "loss": 1.4445, "step": 83300 }, { "epoch": 3.243495508108739, "grad_norm": 6.431142330169678, "learning_rate": 3.3855550792970424e-05, "loss": 1.4937, "step": 83400 }, { "epoch": 3.247384591451795, "grad_norm": 2.3527064323425293, "learning_rate": 3.3836067490160934e-05, "loss": 1.5212, "step": 83500 }, { "epoch": 3.251273674794851, "grad_norm": 2.1277318000793457, "learning_rate": 3.3816584187351444e-05, "loss": 1.5078, "step": 83600 }, { "epoch": 3.255162758137907, "grad_norm": 3.605158805847168, "learning_rate": 3.379710088454195e-05, "loss": 1.4417, "step": 83700 }, { "epoch": 3.259051841480963, "grad_norm": 3.393941879272461, "learning_rate": 3.3777617581732457e-05, "loss": 1.5249, "step": 83800 }, { "epoch": 3.262940924824019, "grad_norm": 4.771339416503906, "learning_rate": 3.3758134278922967e-05, "loss": 1.4829, "step": 83900 }, { "epoch": 3.266830008167075, "grad_norm": 3.713279962539673, "learning_rate": 3.373865097611347e-05, "loss": 1.3913, "step": 84000 }, { "epoch": 3.266830008167075, "eval_accuracy": 0.5380777777777778, "eval_f1": 0.525089071010359, "eval_loss": 1.2316546440124512, "eval_precision": 0.5344209995335217, "eval_recall": 0.5380777777777778, "eval_runtime": 6911.515, "eval_samples_per_second": 26.043, "eval_steps_per_second": 0.407, "step": 84000 }, { "epoch": 3.270719091510131, "grad_norm": 4.5872883796691895, "learning_rate": 3.371916767330398e-05, "loss": 1.4941, "step": 84100 }, { "epoch": 3.2746081748531872, "grad_norm": 3.600613832473755, "learning_rate": 3.369968437049449e-05, "loss": 1.4041, "step": 84200 }, { "epoch": 3.2784972581962433, "grad_norm": 2.655857801437378, "learning_rate": 3.3680201067685e-05, "loss": 1.5394, "step": 84300 }, { "epoch": 3.2823863415392993, "grad_norm": 8.35605525970459, "learning_rate": 3.36607177648755e-05, "loss": 1.468, "step": 84400 }, { "epoch": 3.2862754248823554, "grad_norm": 1.755724310874939, "learning_rate": 3.364142929509411e-05, "loss": 1.4329, "step": 84500 }, { "epoch": 3.2901645082254114, "grad_norm": 5.045989990234375, "learning_rate": 3.362194599228462e-05, "loss": 1.4391, "step": 84600 }, { "epoch": 3.2940535915684674, "grad_norm": 2.3028769493103027, "learning_rate": 3.360246268947512e-05, "loss": 1.506, "step": 84700 }, { "epoch": 3.2979426749115235, "grad_norm": 7.664376258850098, "learning_rate": 3.358297938666563e-05, "loss": 1.4932, "step": 84800 }, { "epoch": 3.3018317582545795, "grad_norm": 2.613117218017578, "learning_rate": 3.356349608385614e-05, "loss": 1.5781, "step": 84900 }, { "epoch": 3.3057208415976356, "grad_norm": 1.27363121509552, "learning_rate": 3.3544012781046643e-05, "loss": 1.4628, "step": 85000 }, { "epoch": 3.3057208415976356, "eval_accuracy": 0.5240277777777778, "eval_f1": 0.5141865828117671, "eval_loss": 1.249597191810608, "eval_precision": 0.5327245478599365, "eval_recall": 0.5240277777777778, "eval_runtime": 6905.8287, "eval_samples_per_second": 26.065, "eval_steps_per_second": 0.407, "step": 85000 }, { "epoch": 3.3096099249406916, "grad_norm": 2.3392293453216553, "learning_rate": 3.352452947823715e-05, "loss": 1.5245, "step": 85100 }, { "epoch": 3.3134990082837477, "grad_norm": 4.100246429443359, "learning_rate": 3.350504617542766e-05, "loss": 1.5242, "step": 85200 }, { "epoch": 3.3173880916268037, "grad_norm": 2.0368542671203613, "learning_rate": 3.348556287261817e-05, "loss": 1.4661, "step": 85300 }, { "epoch": 3.3212771749698597, "grad_norm": 2.281926393508911, "learning_rate": 3.3466079569808676e-05, "loss": 1.3476, "step": 85400 }, { "epoch": 3.325166258312916, "grad_norm": 3.094721794128418, "learning_rate": 3.344659626699918e-05, "loss": 1.4371, "step": 85500 }, { "epoch": 3.329055341655972, "grad_norm": 3.3457040786743164, "learning_rate": 3.342730779721779e-05, "loss": 1.3815, "step": 85600 }, { "epoch": 3.332944424999028, "grad_norm": 1.8253796100616455, "learning_rate": 3.3407824494408294e-05, "loss": 1.428, "step": 85700 }, { "epoch": 3.336833508342084, "grad_norm": 3.65969181060791, "learning_rate": 3.33883411915988e-05, "loss": 1.6003, "step": 85800 }, { "epoch": 3.34072259168514, "grad_norm": 1.5105876922607422, "learning_rate": 3.336885788878931e-05, "loss": 1.3746, "step": 85900 }, { "epoch": 3.344611675028196, "grad_norm": 4.094034671783447, "learning_rate": 3.334937458597982e-05, "loss": 1.3775, "step": 86000 }, { "epoch": 3.344611675028196, "eval_accuracy": 0.5305166666666666, "eval_f1": 0.515887716972914, "eval_loss": 1.2400094270706177, "eval_precision": 0.5383361192003241, "eval_recall": 0.5305166666666666, "eval_runtime": 6908.41, "eval_samples_per_second": 26.055, "eval_steps_per_second": 0.407, "step": 86000 }, { "epoch": 3.348500758371252, "grad_norm": 1.144547700881958, "learning_rate": 3.332989128317032e-05, "loss": 1.4686, "step": 86100 }, { "epoch": 3.352389841714308, "grad_norm": 5.948587894439697, "learning_rate": 3.331040798036083e-05, "loss": 1.4057, "step": 86200 }, { "epoch": 3.356278925057364, "grad_norm": 7.0730743408203125, "learning_rate": 3.329092467755134e-05, "loss": 1.5197, "step": 86300 }, { "epoch": 3.36016800840042, "grad_norm": 2.0767617225646973, "learning_rate": 3.327144137474184e-05, "loss": 1.4089, "step": 86400 }, { "epoch": 3.3640570917434762, "grad_norm": 2.9248766899108887, "learning_rate": 3.325195807193235e-05, "loss": 1.5999, "step": 86500 }, { "epoch": 3.3679461750865323, "grad_norm": 3.00128173828125, "learning_rate": 3.323247476912286e-05, "loss": 1.494, "step": 86600 }, { "epoch": 3.3718352584295883, "grad_norm": 2.8783106803894043, "learning_rate": 3.321299146631337e-05, "loss": 1.5326, "step": 86700 }, { "epoch": 3.3757243417726444, "grad_norm": 3.5770957469940186, "learning_rate": 3.3193508163503876e-05, "loss": 1.438, "step": 86800 }, { "epoch": 3.3796134251157004, "grad_norm": 2.661364793777466, "learning_rate": 3.3174024860694386e-05, "loss": 1.3948, "step": 86900 }, { "epoch": 3.3835025084587564, "grad_norm": 2.5023088455200195, "learning_rate": 3.3154541557884896e-05, "loss": 1.4292, "step": 87000 }, { "epoch": 3.3835025084587564, "eval_accuracy": 0.5140444444444444, "eval_f1": 0.4944784145574171, "eval_loss": 1.2726635932922363, "eval_precision": 0.5328939579556536, "eval_recall": 0.5140444444444444, "eval_runtime": 6907.0421, "eval_samples_per_second": 26.06, "eval_steps_per_second": 0.407, "step": 87000 }, { "epoch": 3.3873915918018125, "grad_norm": 5.323450565338135, "learning_rate": 3.31350582550754e-05, "loss": 1.5882, "step": 87100 }, { "epoch": 3.3912806751448685, "grad_norm": 3.141676902770996, "learning_rate": 3.311557495226591e-05, "loss": 1.4397, "step": 87200 }, { "epoch": 3.3951697584879246, "grad_norm": 2.97125244140625, "learning_rate": 3.309609164945642e-05, "loss": 1.4553, "step": 87300 }, { "epoch": 3.3990588418309806, "grad_norm": 2.067903995513916, "learning_rate": 3.307660834664693e-05, "loss": 1.4065, "step": 87400 }, { "epoch": 3.4029479251740367, "grad_norm": 2.253473997116089, "learning_rate": 3.305712504383743e-05, "loss": 1.4425, "step": 87500 }, { "epoch": 3.4068370085170927, "grad_norm": 1.8572195768356323, "learning_rate": 3.303764174102794e-05, "loss": 1.4007, "step": 87600 }, { "epoch": 3.4107260918601487, "grad_norm": 5.023209571838379, "learning_rate": 3.301815843821845e-05, "loss": 1.5408, "step": 87700 }, { "epoch": 3.414615175203205, "grad_norm": 2.7116568088531494, "learning_rate": 3.2998675135408955e-05, "loss": 1.4393, "step": 87800 }, { "epoch": 3.418504258546261, "grad_norm": 2.3123013973236084, "learning_rate": 3.2979191832599465e-05, "loss": 1.4659, "step": 87900 }, { "epoch": 3.422393341889317, "grad_norm": 5.335228443145752, "learning_rate": 3.2959708529789975e-05, "loss": 1.5157, "step": 88000 }, { "epoch": 3.422393341889317, "eval_accuracy": 0.52435, "eval_f1": 0.5145524868631783, "eval_loss": 1.2418718338012695, "eval_precision": 0.5501746531634734, "eval_recall": 0.52435, "eval_runtime": 6907.6758, "eval_samples_per_second": 26.058, "eval_steps_per_second": 0.407, "step": 88000 }, { "epoch": 3.426282425232373, "grad_norm": 3.2962825298309326, "learning_rate": 3.294022522698048e-05, "loss": 1.4191, "step": 88100 }, { "epoch": 3.430171508575429, "grad_norm": 2.73140549659729, "learning_rate": 3.292074192417099e-05, "loss": 1.381, "step": 88200 }, { "epoch": 3.434060591918485, "grad_norm": 3.1027870178222656, "learning_rate": 3.29012586213615e-05, "loss": 1.5029, "step": 88300 }, { "epoch": 3.4379496752615406, "grad_norm": 3.2218024730682373, "learning_rate": 3.288177531855201e-05, "loss": 1.4777, "step": 88400 }, { "epoch": 3.4418387586045966, "grad_norm": 4.671781063079834, "learning_rate": 3.286229201574251e-05, "loss": 1.4502, "step": 88500 }, { "epoch": 3.4457278419476527, "grad_norm": 3.207301378250122, "learning_rate": 3.2842808712933014e-05, "loss": 1.5657, "step": 88600 }, { "epoch": 3.4496169252907087, "grad_norm": 4.7438740730285645, "learning_rate": 3.2823325410123524e-05, "loss": 1.4669, "step": 88700 }, { "epoch": 3.453506008633765, "grad_norm": 2.093123197555542, "learning_rate": 3.2803842107314034e-05, "loss": 1.5345, "step": 88800 }, { "epoch": 3.457395091976821, "grad_norm": 2.286372184753418, "learning_rate": 3.278435880450454e-05, "loss": 1.4579, "step": 88900 }, { "epoch": 3.461284175319877, "grad_norm": 4.389770984649658, "learning_rate": 3.276487550169505e-05, "loss": 1.4581, "step": 89000 }, { "epoch": 3.461284175319877, "eval_accuracy": 0.5317555555555555, "eval_f1": 0.5244768974714855, "eval_loss": 1.2296249866485596, "eval_precision": 0.5524447443453462, "eval_recall": 0.5317555555555555, "eval_runtime": 6904.5142, "eval_samples_per_second": 26.07, "eval_steps_per_second": 0.407, "step": 89000 }, { "epoch": 3.465173258662933, "grad_norm": 1.5768963098526, "learning_rate": 3.274539219888556e-05, "loss": 1.5242, "step": 89100 }, { "epoch": 3.469062342005989, "grad_norm": 3.1671791076660156, "learning_rate": 3.272590889607606e-05, "loss": 1.604, "step": 89200 }, { "epoch": 3.472951425349045, "grad_norm": 1.7382651567459106, "learning_rate": 3.270642559326657e-05, "loss": 1.5311, "step": 89300 }, { "epoch": 3.476840508692101, "grad_norm": 3.7668511867523193, "learning_rate": 3.268694229045708e-05, "loss": 1.5564, "step": 89400 }, { "epoch": 3.480729592035157, "grad_norm": 3.8879308700561523, "learning_rate": 3.266745898764759e-05, "loss": 1.5757, "step": 89500 }, { "epoch": 3.484618675378213, "grad_norm": 3.6786577701568604, "learning_rate": 3.264817051786619e-05, "loss": 1.4641, "step": 89600 }, { "epoch": 3.488507758721269, "grad_norm": 3.2170217037200928, "learning_rate": 3.26286872150567e-05, "loss": 1.5229, "step": 89700 }, { "epoch": 3.492396842064325, "grad_norm": 2.683622360229492, "learning_rate": 3.260920391224721e-05, "loss": 1.4206, "step": 89800 }, { "epoch": 3.4962859254073813, "grad_norm": 2.545144557952881, "learning_rate": 3.258972060943771e-05, "loss": 1.3846, "step": 89900 }, { "epoch": 3.5001750087504373, "grad_norm": 2.6119747161865234, "learning_rate": 3.257023730662822e-05, "loss": 1.3873, "step": 90000 }, { "epoch": 3.5001750087504373, "eval_accuracy": 0.5314222222222222, "eval_f1": 0.5210512166787294, "eval_loss": 1.2380450963974, "eval_precision": 0.5436252778769791, "eval_recall": 0.5314222222222222, "eval_runtime": 6906.1393, "eval_samples_per_second": 26.064, "eval_steps_per_second": 0.407, "step": 90000 }, { "epoch": 3.5040640920934933, "grad_norm": 4.578128337860107, "learning_rate": 3.255075400381873e-05, "loss": 1.4386, "step": 90100 }, { "epoch": 3.5079531754365494, "grad_norm": 3.7066702842712402, "learning_rate": 3.2531270701009234e-05, "loss": 1.5053, "step": 90200 }, { "epoch": 3.5118422587796054, "grad_norm": 5.37288761138916, "learning_rate": 3.2511787398199743e-05, "loss": 1.3969, "step": 90300 }, { "epoch": 3.5157313421226615, "grad_norm": 1.5149224996566772, "learning_rate": 3.249230409539025e-05, "loss": 1.4005, "step": 90400 }, { "epoch": 3.5196204254657175, "grad_norm": 5.852210998535156, "learning_rate": 3.247282079258076e-05, "loss": 1.5061, "step": 90500 }, { "epoch": 3.5235095088087736, "grad_norm": 7.044808387756348, "learning_rate": 3.2453337489771266e-05, "loss": 1.5304, "step": 90600 }, { "epoch": 3.5273985921518296, "grad_norm": 3.984828233718872, "learning_rate": 3.2433854186961776e-05, "loss": 1.3334, "step": 90700 }, { "epoch": 3.5312876754948856, "grad_norm": 3.2413089275360107, "learning_rate": 3.2414370884152286e-05, "loss": 1.4639, "step": 90800 }, { "epoch": 3.5351767588379417, "grad_norm": 3.3417320251464844, "learning_rate": 3.239488758134279e-05, "loss": 1.4769, "step": 90900 }, { "epoch": 3.5390658421809977, "grad_norm": 2.9251792430877686, "learning_rate": 3.23754042785333e-05, "loss": 1.425, "step": 91000 }, { "epoch": 3.5390658421809977, "eval_accuracy": 0.5370944444444444, "eval_f1": 0.524226127701542, "eval_loss": 1.2300379276275635, "eval_precision": 0.5420144247562839, "eval_recall": 0.5370944444444444, "eval_runtime": 6910.6281, "eval_samples_per_second": 26.047, "eval_steps_per_second": 0.407, "step": 91000 }, { "epoch": 3.5429549255240538, "grad_norm": 4.897426605224609, "learning_rate": 3.235592097572381e-05, "loss": 1.4667, "step": 91100 }, { "epoch": 3.54684400886711, "grad_norm": 4.216968536376953, "learning_rate": 3.233643767291432e-05, "loss": 1.4562, "step": 91200 }, { "epoch": 3.550733092210166, "grad_norm": 2.4964957237243652, "learning_rate": 3.231714920313292e-05, "loss": 1.6334, "step": 91300 }, { "epoch": 3.554622175553222, "grad_norm": 3.075579881668091, "learning_rate": 3.229766590032343e-05, "loss": 1.5005, "step": 91400 }, { "epoch": 3.558511258896278, "grad_norm": 1.1100494861602783, "learning_rate": 3.227818259751394e-05, "loss": 1.5379, "step": 91500 }, { "epoch": 3.562400342239334, "grad_norm": 2.181279420852661, "learning_rate": 3.225869929470444e-05, "loss": 1.5184, "step": 91600 }, { "epoch": 3.56628942558239, "grad_norm": 2.5938663482666016, "learning_rate": 3.223921599189495e-05, "loss": 1.3902, "step": 91700 }, { "epoch": 3.570178508925446, "grad_norm": 5.668018341064453, "learning_rate": 3.221973268908546e-05, "loss": 1.5088, "step": 91800 }, { "epoch": 3.574067592268502, "grad_norm": 2.3485522270202637, "learning_rate": 3.220024938627596e-05, "loss": 1.4451, "step": 91900 }, { "epoch": 3.577956675611558, "grad_norm": 3.0312836170196533, "learning_rate": 3.2180766083466466e-05, "loss": 1.4202, "step": 92000 }, { "epoch": 3.577956675611558, "eval_accuracy": 0.5430333333333334, "eval_f1": 0.5281645726373084, "eval_loss": 1.2211241722106934, "eval_precision": 0.5475140447841081, "eval_recall": 0.5430333333333334, "eval_runtime": 6912.5648, "eval_samples_per_second": 26.04, "eval_steps_per_second": 0.407, "step": 92000 }, { "epoch": 3.581845758954614, "grad_norm": 2.030238151550293, "learning_rate": 3.2161282780656976e-05, "loss": 1.3589, "step": 92100 }, { "epoch": 3.5857348422976703, "grad_norm": 4.6265339851379395, "learning_rate": 3.2141799477847486e-05, "loss": 1.4529, "step": 92200 }, { "epoch": 3.5896239256407263, "grad_norm": 3.8593695163726807, "learning_rate": 3.212231617503799e-05, "loss": 1.5468, "step": 92300 }, { "epoch": 3.5935130089837823, "grad_norm": 4.27279806137085, "learning_rate": 3.21028328722285e-05, "loss": 1.4329, "step": 92400 }, { "epoch": 3.5974020923268384, "grad_norm": 1.7422428131103516, "learning_rate": 3.208334956941901e-05, "loss": 1.4766, "step": 92500 }, { "epoch": 3.6012911756698944, "grad_norm": 3.824481725692749, "learning_rate": 3.206386626660952e-05, "loss": 1.4972, "step": 92600 }, { "epoch": 3.6051802590129505, "grad_norm": 2.899637460708618, "learning_rate": 3.204438296380002e-05, "loss": 1.4455, "step": 92700 }, { "epoch": 3.6090693423560065, "grad_norm": 2.803008794784546, "learning_rate": 3.202489966099053e-05, "loss": 1.5941, "step": 92800 }, { "epoch": 3.6129584256990626, "grad_norm": 9.752340316772461, "learning_rate": 3.200541635818104e-05, "loss": 1.5172, "step": 92900 }, { "epoch": 3.6168475090421186, "grad_norm": 1.9785783290863037, "learning_rate": 3.1985933055371545e-05, "loss": 1.4748, "step": 93000 }, { "epoch": 3.6168475090421186, "eval_accuracy": 0.5406611111111111, "eval_f1": 0.5273135707608471, "eval_loss": 1.2256098985671997, "eval_precision": 0.5421876713223109, "eval_recall": 0.5406611111111111, "eval_runtime": 6871.5398, "eval_samples_per_second": 26.195, "eval_steps_per_second": 0.409, "step": 93000 }, { "epoch": 3.6207365923851746, "grad_norm": 2.6909830570220947, "learning_rate": 3.1966449752562055e-05, "loss": 1.3939, "step": 93100 }, { "epoch": 3.6246256757282307, "grad_norm": 4.296261310577393, "learning_rate": 3.1946966449752565e-05, "loss": 1.6542, "step": 93200 }, { "epoch": 3.6285147590712867, "grad_norm": 2.1016464233398438, "learning_rate": 3.192748314694307e-05, "loss": 1.4961, "step": 93300 }, { "epoch": 3.6324038424143428, "grad_norm": 2.224618911743164, "learning_rate": 3.190799984413358e-05, "loss": 1.5326, "step": 93400 }, { "epoch": 3.636292925757399, "grad_norm": 2.056445837020874, "learning_rate": 3.188851654132409e-05, "loss": 1.4225, "step": 93500 }, { "epoch": 3.640182009100455, "grad_norm": 3.9805376529693604, "learning_rate": 3.18690332385146e-05, "loss": 1.3959, "step": 93600 }, { "epoch": 3.644071092443511, "grad_norm": 3.560811758041382, "learning_rate": 3.18495499357051e-05, "loss": 1.6185, "step": 93700 }, { "epoch": 3.647960175786567, "grad_norm": 2.4125289916992188, "learning_rate": 3.183006663289561e-05, "loss": 1.4547, "step": 93800 }, { "epoch": 3.651849259129623, "grad_norm": 1.1717995405197144, "learning_rate": 3.181058333008612e-05, "loss": 1.3667, "step": 93900 }, { "epoch": 3.655738342472679, "grad_norm": 3.3690996170043945, "learning_rate": 3.1791100027276624e-05, "loss": 1.4289, "step": 94000 }, { "epoch": 3.655738342472679, "eval_accuracy": 0.5351111111111111, "eval_f1": 0.5229684918616838, "eval_loss": 1.2292935848236084, "eval_precision": 0.5426125512618215, "eval_recall": 0.5351111111111111, "eval_runtime": 6836.2329, "eval_samples_per_second": 26.33, "eval_steps_per_second": 0.411, "step": 94000 }, { "epoch": 3.659627425815735, "grad_norm": 1.971055507659912, "learning_rate": 3.1771616724467134e-05, "loss": 1.5459, "step": 94100 }, { "epoch": 3.663516509158791, "grad_norm": 4.219365119934082, "learning_rate": 3.1752133421657644e-05, "loss": 1.5644, "step": 94200 }, { "epoch": 3.667405592501847, "grad_norm": 5.449632167816162, "learning_rate": 3.1732650118848154e-05, "loss": 1.3922, "step": 94300 }, { "epoch": 3.671294675844903, "grad_norm": 3.65008282661438, "learning_rate": 3.171316681603866e-05, "loss": 1.5592, "step": 94400 }, { "epoch": 3.6751837591879593, "grad_norm": 2.6982064247131348, "learning_rate": 3.1693683513229167e-05, "loss": 1.4608, "step": 94500 }, { "epoch": 3.6790728425310153, "grad_norm": 2.473337411880493, "learning_rate": 3.1674200210419676e-05, "loss": 1.3511, "step": 94600 }, { "epoch": 3.6829619258740713, "grad_norm": 3.28353214263916, "learning_rate": 3.165471690761018e-05, "loss": 1.536, "step": 94700 }, { "epoch": 3.6868510092171274, "grad_norm": 2.562896728515625, "learning_rate": 3.163523360480068e-05, "loss": 1.3409, "step": 94800 }, { "epoch": 3.6907400925601834, "grad_norm": 2.9679698944091797, "learning_rate": 3.161575030199119e-05, "loss": 1.4758, "step": 94900 }, { "epoch": 3.6946291759032395, "grad_norm": 4.904481410980225, "learning_rate": 3.15962669991817e-05, "loss": 1.4312, "step": 95000 }, { "epoch": 3.6946291759032395, "eval_accuracy": 0.5405166666666666, "eval_f1": 0.531362227038674, "eval_loss": 1.218002200126648, "eval_precision": 0.5482963541551074, "eval_recall": 0.5405166666666666, "eval_runtime": 6846.9867, "eval_samples_per_second": 26.289, "eval_steps_per_second": 0.411, "step": 95000 }, { "epoch": 3.6985182592462955, "grad_norm": 4.831879615783691, "learning_rate": 3.1576783696372206e-05, "loss": 1.498, "step": 95100 }, { "epoch": 3.7024073425893516, "grad_norm": 6.772608280181885, "learning_rate": 3.1557300393562716e-05, "loss": 1.5479, "step": 95200 }, { "epoch": 3.7062964259324076, "grad_norm": 5.110821723937988, "learning_rate": 3.153801192378132e-05, "loss": 1.4197, "step": 95300 }, { "epoch": 3.7101855092754636, "grad_norm": 3.7198827266693115, "learning_rate": 3.1518528620971824e-05, "loss": 1.3604, "step": 95400 }, { "epoch": 3.7140745926185197, "grad_norm": 2.8027944564819336, "learning_rate": 3.1499045318162334e-05, "loss": 1.3437, "step": 95500 }, { "epoch": 3.7179636759615757, "grad_norm": 4.930880069732666, "learning_rate": 3.1479562015352844e-05, "loss": 1.4653, "step": 95600 }, { "epoch": 3.7218527593046318, "grad_norm": 6.228787422180176, "learning_rate": 3.1460078712543353e-05, "loss": 1.5787, "step": 95700 }, { "epoch": 3.725741842647688, "grad_norm": 3.9334332942962646, "learning_rate": 3.1440595409733857e-05, "loss": 1.5889, "step": 95800 }, { "epoch": 3.729630925990744, "grad_norm": 3.9678547382354736, "learning_rate": 3.1421112106924366e-05, "loss": 1.6025, "step": 95900 }, { "epoch": 3.7335200093338, "grad_norm": 4.714648723602295, "learning_rate": 3.1401628804114876e-05, "loss": 1.4342, "step": 96000 }, { "epoch": 3.7335200093338, "eval_accuracy": 0.5255833333333333, "eval_f1": 0.5085488349227097, "eval_loss": 1.2435342073440552, "eval_precision": 0.542007457202552, "eval_recall": 0.5255833333333334, "eval_runtime": 6839.5168, "eval_samples_per_second": 26.318, "eval_steps_per_second": 0.411, "step": 96000 }, { "epoch": 3.737409092676856, "grad_norm": 3.004689931869507, "learning_rate": 3.138214550130538e-05, "loss": 1.4544, "step": 96100 }, { "epoch": 3.741298176019912, "grad_norm": 3.6549532413482666, "learning_rate": 3.136266219849589e-05, "loss": 1.4443, "step": 96200 }, { "epoch": 3.745187259362968, "grad_norm": 2.7034502029418945, "learning_rate": 3.13431788956864e-05, "loss": 1.455, "step": 96300 }, { "epoch": 3.749076342706024, "grad_norm": 12.869380950927734, "learning_rate": 3.132369559287691e-05, "loss": 1.5586, "step": 96400 }, { "epoch": 3.75296542604908, "grad_norm": 4.254218578338623, "learning_rate": 3.130421229006741e-05, "loss": 1.4488, "step": 96500 }, { "epoch": 3.756854509392136, "grad_norm": 4.682438850402832, "learning_rate": 3.128472898725792e-05, "loss": 1.4627, "step": 96600 }, { "epoch": 3.760743592735192, "grad_norm": 2.730894088745117, "learning_rate": 3.126524568444843e-05, "loss": 1.6195, "step": 96700 }, { "epoch": 3.7646326760782483, "grad_norm": 13.36895751953125, "learning_rate": 3.1245762381638935e-05, "loss": 1.5486, "step": 96800 }, { "epoch": 3.7685217594213043, "grad_norm": 1.6608645915985107, "learning_rate": 3.1226279078829445e-05, "loss": 1.3975, "step": 96900 }, { "epoch": 3.7724108427643603, "grad_norm": 3.6878271102905273, "learning_rate": 3.1206795776019955e-05, "loss": 1.8241, "step": 97000 }, { "epoch": 3.7724108427643603, "eval_accuracy": 0.5335166666666666, "eval_f1": 0.5138022716295895, "eval_loss": 1.238901972770691, "eval_precision": 0.5383889232771816, "eval_recall": 0.5335166666666666, "eval_runtime": 6850.9082, "eval_samples_per_second": 26.274, "eval_steps_per_second": 0.411, "step": 97000 }, { "epoch": 3.7762999261074164, "grad_norm": 3.079059600830078, "learning_rate": 3.118731247321046e-05, "loss": 1.3467, "step": 97100 }, { "epoch": 3.7801890094504724, "grad_norm": 3.2475154399871826, "learning_rate": 3.116782917040097e-05, "loss": 1.5389, "step": 97200 }, { "epoch": 3.7840780927935285, "grad_norm": 3.0948684215545654, "learning_rate": 3.114834586759148e-05, "loss": 1.4863, "step": 97300 }, { "epoch": 3.7879671761365845, "grad_norm": 4.115257263183594, "learning_rate": 3.112905739781008e-05, "loss": 1.6748, "step": 97400 }, { "epoch": 3.7918562594796406, "grad_norm": 8.039111137390137, "learning_rate": 3.1109574095000586e-05, "loss": 1.4786, "step": 97500 }, { "epoch": 3.7957453428226966, "grad_norm": 2.388096809387207, "learning_rate": 3.1090090792191096e-05, "loss": 1.5358, "step": 97600 }, { "epoch": 3.7996344261657526, "grad_norm": 1.2469233274459839, "learning_rate": 3.1070607489381606e-05, "loss": 1.4466, "step": 97700 }, { "epoch": 3.8035235095088087, "grad_norm": 2.8977973461151123, "learning_rate": 3.105112418657211e-05, "loss": 1.4507, "step": 97800 }, { "epoch": 3.8074125928518647, "grad_norm": 2.833513021469116, "learning_rate": 3.103164088376262e-05, "loss": 1.4395, "step": 97900 }, { "epoch": 3.8113016761949208, "grad_norm": 5.450898170471191, "learning_rate": 3.101215758095313e-05, "loss": 1.4589, "step": 98000 }, { "epoch": 3.8113016761949208, "eval_accuracy": 0.5222277777777777, "eval_f1": 0.5069632218089345, "eval_loss": 1.248403787612915, "eval_precision": 0.5458329628101077, "eval_recall": 0.5222277777777778, "eval_runtime": 6841.1186, "eval_samples_per_second": 26.311, "eval_steps_per_second": 0.411, "step": 98000 }, { "epoch": 3.815190759537977, "grad_norm": 3.2640798091888428, "learning_rate": 3.099267427814363e-05, "loss": 1.432, "step": 98100 }, { "epoch": 3.819079842881033, "grad_norm": 4.337706089019775, "learning_rate": 3.0973190975334135e-05, "loss": 1.3439, "step": 98200 }, { "epoch": 3.822968926224089, "grad_norm": 6.150230407714844, "learning_rate": 3.0953707672524645e-05, "loss": 1.4173, "step": 98300 }, { "epoch": 3.826858009567145, "grad_norm": 2.4226467609405518, "learning_rate": 3.0934224369715155e-05, "loss": 1.5314, "step": 98400 }, { "epoch": 3.830747092910201, "grad_norm": 2.2430450916290283, "learning_rate": 3.091474106690566e-05, "loss": 1.5104, "step": 98500 }, { "epoch": 3.834636176253257, "grad_norm": 2.210641384124756, "learning_rate": 3.089525776409617e-05, "loss": 1.4336, "step": 98600 }, { "epoch": 3.838525259596313, "grad_norm": 4.93291711807251, "learning_rate": 3.087577446128668e-05, "loss": 1.4597, "step": 98700 }, { "epoch": 3.842414342939369, "grad_norm": 1.8021893501281738, "learning_rate": 3.085629115847719e-05, "loss": 1.497, "step": 98800 }, { "epoch": 3.846303426282425, "grad_norm": 0.9007977843284607, "learning_rate": 3.083680785566769e-05, "loss": 1.5145, "step": 98900 }, { "epoch": 3.850192509625481, "grad_norm": 2.0668535232543945, "learning_rate": 3.08173245528582e-05, "loss": 1.4884, "step": 99000 }, { "epoch": 3.850192509625481, "eval_accuracy": 0.5230833333333333, "eval_f1": 0.49956756154354204, "eval_loss": 1.261033296585083, "eval_precision": 0.5310719205895091, "eval_recall": 0.5230833333333333, "eval_runtime": 6845.7879, "eval_samples_per_second": 26.294, "eval_steps_per_second": 0.411, "step": 99000 }, { "epoch": 3.8540815929685373, "grad_norm": 2.922607660293579, "learning_rate": 3.079784125004871e-05, "loss": 1.541, "step": 99100 }, { "epoch": 3.8579706763115933, "grad_norm": 5.37409782409668, "learning_rate": 3.0778357947239214e-05, "loss": 1.51, "step": 99200 }, { "epoch": 3.8618597596546493, "grad_norm": 3.6619250774383545, "learning_rate": 3.075906947745782e-05, "loss": 1.4972, "step": 99300 }, { "epoch": 3.8657488429977054, "grad_norm": 4.875216484069824, "learning_rate": 3.073958617464833e-05, "loss": 1.4652, "step": 99400 }, { "epoch": 3.8696379263407614, "grad_norm": 3.563903331756592, "learning_rate": 3.072010287183883e-05, "loss": 1.5334, "step": 99500 }, { "epoch": 3.8735270096838175, "grad_norm": 1.8776129484176636, "learning_rate": 3.070061956902934e-05, "loss": 1.5916, "step": 99600 }, { "epoch": 3.8774160930268735, "grad_norm": 3.1906237602233887, "learning_rate": 3.068113626621985e-05, "loss": 1.4902, "step": 99700 }, { "epoch": 3.8813051763699296, "grad_norm": 3.3250601291656494, "learning_rate": 3.066165296341036e-05, "loss": 1.5923, "step": 99800 }, { "epoch": 3.8851942597129856, "grad_norm": 1.4924834966659546, "learning_rate": 3.0642169660600865e-05, "loss": 1.3895, "step": 99900 }, { "epoch": 3.8890833430560416, "grad_norm": 3.7679309844970703, "learning_rate": 3.0622686357791375e-05, "loss": 1.5725, "step": 100000 }, { "epoch": 3.8890833430560416, "eval_accuracy": 0.5468277777777778, "eval_f1": 0.5383463372491776, "eval_loss": 1.2073739767074585, "eval_precision": 0.5455507210739236, "eval_recall": 0.5468277777777778, "eval_runtime": 6842.9564, "eval_samples_per_second": 26.304, "eval_steps_per_second": 0.411, "step": 100000 }, { "epoch": 3.8929724263990977, "grad_norm": 2.7060225009918213, "learning_rate": 3.0603203054981885e-05, "loss": 1.3446, "step": 100100 }, { "epoch": 3.8968615097421537, "grad_norm": 9.772302627563477, "learning_rate": 3.058371975217239e-05, "loss": 1.4563, "step": 100200 }, { "epoch": 3.9007505930852098, "grad_norm": 3.157963752746582, "learning_rate": 3.05642364493629e-05, "loss": 1.4846, "step": 100300 }, { "epoch": 3.904639676428266, "grad_norm": 7.174985885620117, "learning_rate": 3.054475314655341e-05, "loss": 1.411, "step": 100400 }, { "epoch": 3.908528759771322, "grad_norm": 1.970221996307373, "learning_rate": 3.052526984374392e-05, "loss": 1.4157, "step": 100500 }, { "epoch": 3.912417843114378, "grad_norm": 13.166536331176758, "learning_rate": 3.0505786540934424e-05, "loss": 1.5182, "step": 100600 }, { "epoch": 3.916306926457434, "grad_norm": 0.7776059508323669, "learning_rate": 3.048630323812493e-05, "loss": 1.3543, "step": 100700 }, { "epoch": 3.92019600980049, "grad_norm": 5.25037145614624, "learning_rate": 3.0466819935315437e-05, "loss": 1.403, "step": 100800 }, { "epoch": 3.924085093143546, "grad_norm": 2.5896990299224854, "learning_rate": 3.0447336632505947e-05, "loss": 1.4005, "step": 100900 }, { "epoch": 3.927974176486602, "grad_norm": 2.696503162384033, "learning_rate": 3.0427853329696453e-05, "loss": 1.4603, "step": 101000 }, { "epoch": 3.927974176486602, "eval_accuracy": 0.5408833333333334, "eval_f1": 0.5260873021305368, "eval_loss": 1.2154476642608643, "eval_precision": 0.5470781921096964, "eval_recall": 0.5408833333333334, "eval_runtime": 6839.3443, "eval_samples_per_second": 26.318, "eval_steps_per_second": 0.411, "step": 101000 }, { "epoch": 3.931863259829658, "grad_norm": 4.802069187164307, "learning_rate": 3.0408370026886963e-05, "loss": 1.475, "step": 101100 }, { "epoch": 3.935752343172714, "grad_norm": 3.790762424468994, "learning_rate": 3.0388886724077463e-05, "loss": 1.4561, "step": 101200 }, { "epoch": 3.93964142651577, "grad_norm": 11.385169982910156, "learning_rate": 3.0369403421267973e-05, "loss": 1.5322, "step": 101300 }, { "epoch": 3.9435305098588262, "grad_norm": 1.9238953590393066, "learning_rate": 3.034992011845848e-05, "loss": 1.4935, "step": 101400 }, { "epoch": 3.9474195932018823, "grad_norm": 4.184492588043213, "learning_rate": 3.033043681564899e-05, "loss": 1.4731, "step": 101500 }, { "epoch": 3.9513086765449383, "grad_norm": 2.118429183959961, "learning_rate": 3.031114834586759e-05, "loss": 1.4725, "step": 101600 }, { "epoch": 3.9551977598879944, "grad_norm": 2.096726655960083, "learning_rate": 3.0291665043058098e-05, "loss": 1.3433, "step": 101700 }, { "epoch": 3.9590868432310504, "grad_norm": 2.4613711833953857, "learning_rate": 3.0272181740248607e-05, "loss": 1.5071, "step": 101800 }, { "epoch": 3.9629759265741065, "grad_norm": 1.8555899858474731, "learning_rate": 3.0252698437439114e-05, "loss": 1.3879, "step": 101900 }, { "epoch": 3.9668650099171625, "grad_norm": 2.742436170578003, "learning_rate": 3.0233215134629624e-05, "loss": 1.4581, "step": 102000 }, { "epoch": 3.9668650099171625, "eval_accuracy": 0.5365444444444445, "eval_f1": 0.5221179751453194, "eval_loss": 1.2234210968017578, "eval_precision": 0.5352321854619643, "eval_recall": 0.5365444444444445, "eval_runtime": 6854.4981, "eval_samples_per_second": 26.26, "eval_steps_per_second": 0.41, "step": 102000 }, { "epoch": 3.9707540932602186, "grad_norm": 3.3799407482147217, "learning_rate": 3.021373183182013e-05, "loss": 1.4894, "step": 102100 }, { "epoch": 3.9746431766032746, "grad_norm": 3.8582425117492676, "learning_rate": 3.0194248529010637e-05, "loss": 1.4631, "step": 102200 }, { "epoch": 3.9785322599463306, "grad_norm": 4.433928966522217, "learning_rate": 3.0174765226201147e-05, "loss": 1.4693, "step": 102300 }, { "epoch": 3.9824213432893867, "grad_norm": 2.170273542404175, "learning_rate": 3.0155281923391653e-05, "loss": 1.4232, "step": 102400 }, { "epoch": 3.9863104266324427, "grad_norm": 3.546382427215576, "learning_rate": 3.0135798620582163e-05, "loss": 1.3303, "step": 102500 }, { "epoch": 3.9901995099754988, "grad_norm": 7.057014465332031, "learning_rate": 3.011631531777267e-05, "loss": 1.5843, "step": 102600 }, { "epoch": 3.994088593318555, "grad_norm": 2.3445522785186768, "learning_rate": 3.0096832014963176e-05, "loss": 1.459, "step": 102700 }, { "epoch": 3.997977676661611, "grad_norm": 1.9798797369003296, "learning_rate": 3.0077348712153686e-05, "loss": 1.5475, "step": 102800 }, { "epoch": 4.001866760004667, "grad_norm": 4.743581295013428, "learning_rate": 3.0057865409344193e-05, "loss": 1.4123, "step": 102900 }, { "epoch": 4.005755843347723, "grad_norm": 2.8648383617401123, "learning_rate": 3.0038382106534703e-05, "loss": 1.5738, "step": 103000 }, { "epoch": 4.005755843347723, "eval_accuracy": 0.5339333333333334, "eval_f1": 0.5205015838934941, "eval_loss": 1.2247358560562134, "eval_precision": 0.5445449783940667, "eval_recall": 0.5339333333333333, "eval_runtime": 6857.2242, "eval_samples_per_second": 26.25, "eval_steps_per_second": 0.41, "step": 103000 }, { "epoch": 4.009644926690779, "grad_norm": 2.6690895557403564, "learning_rate": 3.001889880372521e-05, "loss": 1.3686, "step": 103100 }, { "epoch": 4.013534010033835, "grad_norm": 6.729251861572266, "learning_rate": 2.999941550091572e-05, "loss": 1.5015, "step": 103200 }, { "epoch": 4.017423093376891, "grad_norm": 4.156710624694824, "learning_rate": 2.9979932198106226e-05, "loss": 1.4908, "step": 103300 }, { "epoch": 4.021312176719947, "grad_norm": 2.230139970779419, "learning_rate": 2.9960448895296732e-05, "loss": 1.3837, "step": 103400 }, { "epoch": 4.025201260063003, "grad_norm": 2.624476194381714, "learning_rate": 2.9940965592487242e-05, "loss": 1.5173, "step": 103500 }, { "epoch": 4.029090343406059, "grad_norm": 6.094375133514404, "learning_rate": 2.992148228967775e-05, "loss": 1.4096, "step": 103600 }, { "epoch": 4.032979426749115, "grad_norm": 2.4608802795410156, "learning_rate": 2.990199898686826e-05, "loss": 1.4072, "step": 103700 }, { "epoch": 4.036868510092171, "grad_norm": 2.691469430923462, "learning_rate": 2.9882515684058765e-05, "loss": 1.4108, "step": 103800 }, { "epoch": 4.040757593435227, "grad_norm": 3.353543758392334, "learning_rate": 2.986303238124927e-05, "loss": 1.3734, "step": 103900 }, { "epoch": 4.044646676778283, "grad_norm": 3.5121490955352783, "learning_rate": 2.984354907843978e-05, "loss": 1.593, "step": 104000 }, { "epoch": 4.044646676778283, "eval_accuracy": 0.5209888888888888, "eval_f1": 0.5066901472131443, "eval_loss": 1.2526583671569824, "eval_precision": 0.5370310659546432, "eval_recall": 0.5209888888888888, "eval_runtime": 6856.0894, "eval_samples_per_second": 26.254, "eval_steps_per_second": 0.41, "step": 104000 }, { "epoch": 4.048535760121339, "grad_norm": 7.335382461547852, "learning_rate": 2.9824065775630288e-05, "loss": 1.4853, "step": 104100 }, { "epoch": 4.0524248434643955, "grad_norm": 2.609539270401001, "learning_rate": 2.9804582472820798e-05, "loss": 1.5121, "step": 104200 }, { "epoch": 4.0563139268074515, "grad_norm": 2.9453988075256348, "learning_rate": 2.9785099170011298e-05, "loss": 1.4398, "step": 104300 }, { "epoch": 4.0602030101505076, "grad_norm": 5.097457408905029, "learning_rate": 2.9765615867201807e-05, "loss": 1.4111, "step": 104400 }, { "epoch": 4.064092093493564, "grad_norm": 2.3189709186553955, "learning_rate": 2.9746132564392314e-05, "loss": 1.4033, "step": 104500 }, { "epoch": 4.06798117683662, "grad_norm": 3.1011669635772705, "learning_rate": 2.9726649261582824e-05, "loss": 1.5425, "step": 104600 }, { "epoch": 4.071870260179676, "grad_norm": 7.870682716369629, "learning_rate": 2.970716595877333e-05, "loss": 1.4959, "step": 104700 }, { "epoch": 4.075759343522732, "grad_norm": 4.668301582336426, "learning_rate": 2.968768265596384e-05, "loss": 1.4066, "step": 104800 }, { "epoch": 4.079648426865788, "grad_norm": 3.0361533164978027, "learning_rate": 2.9668199353154347e-05, "loss": 1.3578, "step": 104900 }, { "epoch": 4.083537510208844, "grad_norm": 1.6112866401672363, "learning_rate": 2.9648716050344853e-05, "loss": 1.4523, "step": 105000 }, { "epoch": 4.083537510208844, "eval_accuracy": 0.5456111111111112, "eval_f1": 0.5261141838523008, "eval_loss": 1.2101609706878662, "eval_precision": 0.5411154120293807, "eval_recall": 0.5456111111111112, "eval_runtime": 6848.7592, "eval_samples_per_second": 26.282, "eval_steps_per_second": 0.411, "step": 105000 }, { "epoch": 4.0874265935519, "grad_norm": 2.2382566928863525, "learning_rate": 2.9629232747535363e-05, "loss": 1.4892, "step": 105100 }, { "epoch": 4.091315676894956, "grad_norm": 2.8192970752716064, "learning_rate": 2.960974944472587e-05, "loss": 1.4598, "step": 105200 }, { "epoch": 4.095204760238012, "grad_norm": 3.3120696544647217, "learning_rate": 2.959026614191638e-05, "loss": 1.4963, "step": 105300 }, { "epoch": 4.099093843581068, "grad_norm": 3.5920298099517822, "learning_rate": 2.9570782839106886e-05, "loss": 1.4287, "step": 105400 }, { "epoch": 4.102982926924124, "grad_norm": 2.274451494216919, "learning_rate": 2.9551299536297393e-05, "loss": 1.3878, "step": 105500 }, { "epoch": 4.10687201026718, "grad_norm": 4.901716709136963, "learning_rate": 2.9532011066515998e-05, "loss": 1.4265, "step": 105600 }, { "epoch": 4.110761093610236, "grad_norm": 5.1479291915893555, "learning_rate": 2.9512527763706504e-05, "loss": 1.5042, "step": 105700 }, { "epoch": 4.114650176953292, "grad_norm": 1.3319998979568481, "learning_rate": 2.9493044460897014e-05, "loss": 1.4663, "step": 105800 }, { "epoch": 4.118539260296348, "grad_norm": 1.9814605712890625, "learning_rate": 2.947356115808752e-05, "loss": 1.3812, "step": 105900 }, { "epoch": 4.122428343639404, "grad_norm": 4.893007755279541, "learning_rate": 2.9454077855278027e-05, "loss": 1.5537, "step": 106000 }, { "epoch": 4.122428343639404, "eval_accuracy": 0.53405, "eval_f1": 0.5154903579046625, "eval_loss": 1.233683466911316, "eval_precision": 0.53340054730498, "eval_recall": 0.53405, "eval_runtime": 6843.7868, "eval_samples_per_second": 26.301, "eval_steps_per_second": 0.411, "step": 106000 }, { "epoch": 4.12631742698246, "grad_norm": 4.03909158706665, "learning_rate": 2.9434594552468537e-05, "loss": 1.5686, "step": 106100 }, { "epoch": 4.130206510325516, "grad_norm": 1.3069279193878174, "learning_rate": 2.9415111249659044e-05, "loss": 1.4081, "step": 106200 }, { "epoch": 4.134095593668572, "grad_norm": 3.504511594772339, "learning_rate": 2.9395627946849553e-05, "loss": 1.4924, "step": 106300 }, { "epoch": 4.137984677011628, "grad_norm": 2.8455770015716553, "learning_rate": 2.937614464404006e-05, "loss": 1.4883, "step": 106400 }, { "epoch": 4.1418737603546845, "grad_norm": 4.498127460479736, "learning_rate": 2.9356661341230567e-05, "loss": 1.4954, "step": 106500 }, { "epoch": 4.1457628436977405, "grad_norm": 4.591662883758545, "learning_rate": 2.9337178038421076e-05, "loss": 1.4337, "step": 106600 }, { "epoch": 4.1496519270407966, "grad_norm": 2.4918456077575684, "learning_rate": 2.9317694735611583e-05, "loss": 1.4293, "step": 106700 }, { "epoch": 4.153541010383853, "grad_norm": 4.644388198852539, "learning_rate": 2.9298211432802093e-05, "loss": 1.6862, "step": 106800 }, { "epoch": 4.157430093726909, "grad_norm": 5.302671432495117, "learning_rate": 2.92787281299926e-05, "loss": 1.7203, "step": 106900 }, { "epoch": 4.161319177069965, "grad_norm": 5.130696773529053, "learning_rate": 2.925924482718311e-05, "loss": 1.4931, "step": 107000 }, { "epoch": 4.161319177069965, "eval_accuracy": 0.5437388888888889, "eval_f1": 0.5335999892308664, "eval_loss": 1.211417317390442, "eval_precision": 0.5460867749251973, "eval_recall": 0.5437388888888889, "eval_runtime": 6840.4744, "eval_samples_per_second": 26.314, "eval_steps_per_second": 0.411, "step": 107000 }, { "epoch": 4.165208260413021, "grad_norm": 2.487485647201538, "learning_rate": 2.9239761524373616e-05, "loss": 1.5706, "step": 107100 }, { "epoch": 4.169097343756077, "grad_norm": 2.9438815116882324, "learning_rate": 2.9220278221564122e-05, "loss": 1.4426, "step": 107200 }, { "epoch": 4.172986427099133, "grad_norm": 2.5352606773376465, "learning_rate": 2.9200794918754625e-05, "loss": 1.4776, "step": 107300 }, { "epoch": 4.176875510442189, "grad_norm": 3.6519277095794678, "learning_rate": 2.9181311615945135e-05, "loss": 1.4293, "step": 107400 }, { "epoch": 4.180764593785245, "grad_norm": 2.8033599853515625, "learning_rate": 2.9161828313135642e-05, "loss": 1.4494, "step": 107500 }, { "epoch": 4.184653677128301, "grad_norm": 2.1894683837890625, "learning_rate": 2.914234501032615e-05, "loss": 1.6714, "step": 107600 }, { "epoch": 4.188542760471357, "grad_norm": 2.435467481613159, "learning_rate": 2.9123056540544753e-05, "loss": 1.5395, "step": 107700 }, { "epoch": 4.192431843814413, "grad_norm": 4.135743141174316, "learning_rate": 2.910357323773526e-05, "loss": 1.5031, "step": 107800 }, { "epoch": 4.196320927157469, "grad_norm": 5.2851409912109375, "learning_rate": 2.9084089934925766e-05, "loss": 1.4058, "step": 107900 }, { "epoch": 4.200210010500525, "grad_norm": 2.8358099460601807, "learning_rate": 2.9064606632116276e-05, "loss": 1.4286, "step": 108000 }, { "epoch": 4.200210010500525, "eval_accuracy": 0.5153111111111112, "eval_f1": 0.4956221982905286, "eval_loss": 1.2610857486724854, "eval_precision": 0.5457733968181441, "eval_recall": 0.515311111111111, "eval_runtime": 6853.4112, "eval_samples_per_second": 26.264, "eval_steps_per_second": 0.41, "step": 108000 }, { "epoch": 4.204099093843581, "grad_norm": 8.997483253479004, "learning_rate": 2.9045123329306783e-05, "loss": 1.3902, "step": 108100 }, { "epoch": 4.207988177186637, "grad_norm": 9.018051147460938, "learning_rate": 2.9025640026497293e-05, "loss": 1.5028, "step": 108200 }, { "epoch": 4.211877260529693, "grad_norm": 2.077026128768921, "learning_rate": 2.90061567236878e-05, "loss": 1.5944, "step": 108300 }, { "epoch": 4.215766343872749, "grad_norm": 2.227729082107544, "learning_rate": 2.898667342087831e-05, "loss": 1.4553, "step": 108400 }, { "epoch": 4.219655427215805, "grad_norm": 6.954668045043945, "learning_rate": 2.8967190118068816e-05, "loss": 1.518, "step": 108500 }, { "epoch": 4.223544510558861, "grad_norm": 3.081763505935669, "learning_rate": 2.8947706815259322e-05, "loss": 1.5179, "step": 108600 }, { "epoch": 4.227433593901917, "grad_norm": 4.905932426452637, "learning_rate": 2.8928223512449832e-05, "loss": 1.6271, "step": 108700 }, { "epoch": 4.2313226772449735, "grad_norm": 2.613628625869751, "learning_rate": 2.890874020964034e-05, "loss": 1.5318, "step": 108800 }, { "epoch": 4.2352117605880295, "grad_norm": 2.285127878189087, "learning_rate": 2.888925690683085e-05, "loss": 1.5492, "step": 108900 }, { "epoch": 4.2391008439310855, "grad_norm": 2.311089515686035, "learning_rate": 2.8869773604021355e-05, "loss": 1.3667, "step": 109000 }, { "epoch": 4.2391008439310855, "eval_accuracy": 0.5439333333333334, "eval_f1": 0.5301099784555464, "eval_loss": 1.21080482006073, "eval_precision": 0.546264658344764, "eval_recall": 0.5439333333333334, "eval_runtime": 6854.7605, "eval_samples_per_second": 26.259, "eval_steps_per_second": 0.41, "step": 109000 }, { "epoch": 4.242989927274142, "grad_norm": 3.201484441757202, "learning_rate": 2.885029030121186e-05, "loss": 1.4629, "step": 109100 }, { "epoch": 4.246879010617198, "grad_norm": 3.135488748550415, "learning_rate": 2.883080699840237e-05, "loss": 1.6118, "step": 109200 }, { "epoch": 4.250768093960254, "grad_norm": 1.9276597499847412, "learning_rate": 2.8811323695592878e-05, "loss": 1.4835, "step": 109300 }, { "epoch": 4.25465717730331, "grad_norm": 4.615667819976807, "learning_rate": 2.8791840392783388e-05, "loss": 1.4446, "step": 109400 }, { "epoch": 4.258546260646366, "grad_norm": 1.472648024559021, "learning_rate": 2.8772357089973894e-05, "loss": 1.4261, "step": 109500 }, { "epoch": 4.262435343989422, "grad_norm": 3.189324140548706, "learning_rate": 2.8752873787164404e-05, "loss": 1.5095, "step": 109600 }, { "epoch": 4.266324427332478, "grad_norm": 3.238450050354004, "learning_rate": 2.8733585317383006e-05, "loss": 1.4104, "step": 109700 }, { "epoch": 4.270213510675534, "grad_norm": 4.5963897705078125, "learning_rate": 2.8714102014573512e-05, "loss": 1.4382, "step": 109800 }, { "epoch": 4.27410259401859, "grad_norm": 3.089970827102661, "learning_rate": 2.8694813544792114e-05, "loss": 1.643, "step": 109900 }, { "epoch": 4.277991677361646, "grad_norm": 2.585320234298706, "learning_rate": 2.8675330241982624e-05, "loss": 1.4723, "step": 110000 }, { "epoch": 4.277991677361646, "eval_accuracy": 0.5312333333333333, "eval_f1": 0.5212893131996393, "eval_loss": 1.2268587350845337, "eval_precision": 0.5496691939881158, "eval_recall": 0.5312333333333333, "eval_runtime": 6849.5189, "eval_samples_per_second": 26.279, "eval_steps_per_second": 0.411, "step": 110000 }, { "epoch": 4.281880760704702, "grad_norm": 1.4724042415618896, "learning_rate": 2.865584693917313e-05, "loss": 1.4316, "step": 110100 }, { "epoch": 4.285769844047758, "grad_norm": 24.27642822265625, "learning_rate": 2.863636363636364e-05, "loss": 1.6327, "step": 110200 }, { "epoch": 4.289658927390814, "grad_norm": 4.110645771026611, "learning_rate": 2.8616880333554147e-05, "loss": 1.3506, "step": 110300 }, { "epoch": 4.29354801073387, "grad_norm": 2.8673505783081055, "learning_rate": 2.8597397030744653e-05, "loss": 1.5769, "step": 110400 }, { "epoch": 4.297437094076926, "grad_norm": 7.560262680053711, "learning_rate": 2.8577913727935163e-05, "loss": 1.4662, "step": 110500 }, { "epoch": 4.301326177419982, "grad_norm": 3.584359884262085, "learning_rate": 2.855843042512567e-05, "loss": 1.3512, "step": 110600 }, { "epoch": 4.305215260763038, "grad_norm": 2.7447874546051025, "learning_rate": 2.853894712231618e-05, "loss": 1.5587, "step": 110700 }, { "epoch": 4.309104344106094, "grad_norm": 7.231234073638916, "learning_rate": 2.8519463819506686e-05, "loss": 1.3882, "step": 110800 }, { "epoch": 4.31299342744915, "grad_norm": 2.5299837589263916, "learning_rate": 2.8499980516697196e-05, "loss": 1.5779, "step": 110900 }, { "epoch": 4.316882510792206, "grad_norm": 1.9336833953857422, "learning_rate": 2.8480497213887696e-05, "loss": 1.3852, "step": 111000 }, { "epoch": 4.316882510792206, "eval_accuracy": 0.5451666666666667, "eval_f1": 0.5289892199018348, "eval_loss": 1.2127546072006226, "eval_precision": 0.5487846348887978, "eval_recall": 0.5451666666666667, "eval_runtime": 6840.1, "eval_samples_per_second": 26.315, "eval_steps_per_second": 0.411, "step": 111000 }, { "epoch": 4.3207715941352625, "grad_norm": 2.978119134902954, "learning_rate": 2.8461013911078206e-05, "loss": 1.5052, "step": 111100 }, { "epoch": 4.3246606774783185, "grad_norm": 2.0322084426879883, "learning_rate": 2.8441530608268712e-05, "loss": 1.4775, "step": 111200 }, { "epoch": 4.3285497608213745, "grad_norm": 3.2550253868103027, "learning_rate": 2.8422047305459222e-05, "loss": 1.5106, "step": 111300 }, { "epoch": 4.332438844164431, "grad_norm": 2.117290735244751, "learning_rate": 2.840256400264973e-05, "loss": 1.3479, "step": 111400 }, { "epoch": 4.336327927507487, "grad_norm": 5.6786580085754395, "learning_rate": 2.8383080699840235e-05, "loss": 1.5364, "step": 111500 }, { "epoch": 4.340217010850543, "grad_norm": 7.668860912322998, "learning_rate": 2.8363597397030745e-05, "loss": 1.4549, "step": 111600 }, { "epoch": 4.344106094193599, "grad_norm": 3.191805124282837, "learning_rate": 2.834411409422125e-05, "loss": 1.4616, "step": 111700 }, { "epoch": 4.347995177536655, "grad_norm": 2.0247278213500977, "learning_rate": 2.832463079141176e-05, "loss": 1.4097, "step": 111800 }, { "epoch": 4.351884260879711, "grad_norm": 3.860193967819214, "learning_rate": 2.8305147488602268e-05, "loss": 1.4603, "step": 111900 }, { "epoch": 4.355773344222767, "grad_norm": 3.035848617553711, "learning_rate": 2.8285664185792775e-05, "loss": 1.489, "step": 112000 }, { "epoch": 4.355773344222767, "eval_accuracy": 0.54185, "eval_f1": 0.5309544950028787, "eval_loss": 1.2094485759735107, "eval_precision": 0.5470994911207389, "eval_recall": 0.54185, "eval_runtime": 6859.6664, "eval_samples_per_second": 26.24, "eval_steps_per_second": 0.41, "step": 112000 }, { "epoch": 4.359662427565823, "grad_norm": 6.496464729309082, "learning_rate": 2.8266180882983285e-05, "loss": 1.3813, "step": 112100 }, { "epoch": 4.363551510908879, "grad_norm": 2.112854480743408, "learning_rate": 2.824669758017379e-05, "loss": 1.4899, "step": 112200 }, { "epoch": 4.367440594251935, "grad_norm": 1.6540718078613281, "learning_rate": 2.82272142773643e-05, "loss": 1.3683, "step": 112300 }, { "epoch": 4.371329677594991, "grad_norm": 3.333434581756592, "learning_rate": 2.8207730974554807e-05, "loss": 1.4023, "step": 112400 }, { "epoch": 4.375218760938047, "grad_norm": 4.344939708709717, "learning_rate": 2.8188247671745317e-05, "loss": 1.6041, "step": 112500 }, { "epoch": 4.379107844281103, "grad_norm": 2.4337613582611084, "learning_rate": 2.8168764368935824e-05, "loss": 1.5065, "step": 112600 }, { "epoch": 4.382996927624159, "grad_norm": 3.6722412109375, "learning_rate": 2.814928106612633e-05, "loss": 1.4551, "step": 112700 }, { "epoch": 4.386886010967215, "grad_norm": 2.823208808898926, "learning_rate": 2.812979776331684e-05, "loss": 1.4842, "step": 112800 }, { "epoch": 4.390775094310271, "grad_norm": 2.7576797008514404, "learning_rate": 2.8110314460507347e-05, "loss": 1.5035, "step": 112900 }, { "epoch": 4.394664177653327, "grad_norm": 2.6779181957244873, "learning_rate": 2.8090831157697857e-05, "loss": 1.4598, "step": 113000 }, { "epoch": 4.394664177653327, "eval_accuracy": 0.5356, "eval_f1": 0.5245949045245608, "eval_loss": 1.2183064222335815, "eval_precision": 0.5418341834308862, "eval_recall": 0.5356, "eval_runtime": 6857.4452, "eval_samples_per_second": 26.249, "eval_steps_per_second": 0.41, "step": 113000 }, { "epoch": 4.398553260996383, "grad_norm": 7.436748504638672, "learning_rate": 2.8071347854888363e-05, "loss": 1.4256, "step": 113100 }, { "epoch": 4.402442344339439, "grad_norm": 1.8180633783340454, "learning_rate": 2.805186455207887e-05, "loss": 1.4005, "step": 113200 }, { "epoch": 4.406331427682495, "grad_norm": 3.8570680618286133, "learning_rate": 2.803238124926938e-05, "loss": 1.4607, "step": 113300 }, { "epoch": 4.4102205110255515, "grad_norm": 2.6973893642425537, "learning_rate": 2.8012897946459886e-05, "loss": 1.5485, "step": 113400 }, { "epoch": 4.4141095943686075, "grad_norm": 4.777961730957031, "learning_rate": 2.7993414643650396e-05, "loss": 1.4438, "step": 113500 }, { "epoch": 4.4179986777116635, "grad_norm": 3.8277645111083984, "learning_rate": 2.7973931340840903e-05, "loss": 1.4434, "step": 113600 }, { "epoch": 4.42188776105472, "grad_norm": 1.5033832788467407, "learning_rate": 2.7954448038031413e-05, "loss": 1.4227, "step": 113700 }, { "epoch": 4.425776844397776, "grad_norm": 4.293521404266357, "learning_rate": 2.7934964735221912e-05, "loss": 1.4041, "step": 113800 }, { "epoch": 4.429665927740832, "grad_norm": 2.1052162647247314, "learning_rate": 2.791567626544052e-05, "loss": 1.3933, "step": 113900 }, { "epoch": 4.433555011083888, "grad_norm": 3.1535582542419434, "learning_rate": 2.789619296263103e-05, "loss": 1.5491, "step": 114000 }, { "epoch": 4.433555011083888, "eval_accuracy": 0.5438166666666666, "eval_f1": 0.5371539459797273, "eval_loss": 1.20622980594635, "eval_precision": 0.5534984486945551, "eval_recall": 0.5438166666666667, "eval_runtime": 6839.8465, "eval_samples_per_second": 26.316, "eval_steps_per_second": 0.411, "step": 114000 }, { "epoch": 4.437444094426944, "grad_norm": 1.9128328561782837, "learning_rate": 2.787670965982153e-05, "loss": 1.5221, "step": 114100 }, { "epoch": 4.44133317777, "grad_norm": 3.4761343002319336, "learning_rate": 2.785722635701204e-05, "loss": 1.5202, "step": 114200 }, { "epoch": 4.445222261113056, "grad_norm": 3.8772671222686768, "learning_rate": 2.7837743054202547e-05, "loss": 1.4654, "step": 114300 }, { "epoch": 4.449111344456112, "grad_norm": 2.4887638092041016, "learning_rate": 2.7818259751393057e-05, "loss": 1.6731, "step": 114400 }, { "epoch": 4.453000427799168, "grad_norm": 2.539456844329834, "learning_rate": 2.7798776448583563e-05, "loss": 1.5351, "step": 114500 }, { "epoch": 4.456889511142224, "grad_norm": 5.914574146270752, "learning_rate": 2.777929314577407e-05, "loss": 1.415, "step": 114600 }, { "epoch": 4.46077859448528, "grad_norm": 4.319646835327148, "learning_rate": 2.775980984296458e-05, "loss": 1.4931, "step": 114700 }, { "epoch": 4.464667677828336, "grad_norm": 2.227620840072632, "learning_rate": 2.7740326540155086e-05, "loss": 1.5624, "step": 114800 }, { "epoch": 4.468556761171392, "grad_norm": 1.9009370803833008, "learning_rate": 2.7720843237345596e-05, "loss": 1.4301, "step": 114900 }, { "epoch": 4.472445844514448, "grad_norm": 2.3817644119262695, "learning_rate": 2.7701359934536103e-05, "loss": 1.3628, "step": 115000 }, { "epoch": 4.472445844514448, "eval_accuracy": 0.5381166666666667, "eval_f1": 0.5280535402319294, "eval_loss": 1.2204170227050781, "eval_precision": 0.5430036693289062, "eval_recall": 0.5381166666666667, "eval_runtime": 6837.0299, "eval_samples_per_second": 26.327, "eval_steps_per_second": 0.411, "step": 115000 }, { "epoch": 4.476334927857504, "grad_norm": 5.512262344360352, "learning_rate": 2.7681876631726612e-05, "loss": 1.4393, "step": 115100 }, { "epoch": 4.48022401120056, "grad_norm": 2.414970874786377, "learning_rate": 2.766239332891712e-05, "loss": 1.5349, "step": 115200 }, { "epoch": 4.484113094543616, "grad_norm": 2.0715749263763428, "learning_rate": 2.7642910026107626e-05, "loss": 1.3359, "step": 115300 }, { "epoch": 4.488002177886672, "grad_norm": 3.4886839389801025, "learning_rate": 2.7623426723298135e-05, "loss": 1.437, "step": 115400 }, { "epoch": 4.491891261229728, "grad_norm": 1.9266588687896729, "learning_rate": 2.7603943420488642e-05, "loss": 1.5823, "step": 115500 }, { "epoch": 4.495780344572784, "grad_norm": 1.0120360851287842, "learning_rate": 2.7584460117679152e-05, "loss": 1.4245, "step": 115600 }, { "epoch": 4.4996694279158405, "grad_norm": 3.7969601154327393, "learning_rate": 2.756497681486966e-05, "loss": 1.489, "step": 115700 }, { "epoch": 4.5035585112588965, "grad_norm": 2.959249496459961, "learning_rate": 2.7545493512060165e-05, "loss": 1.5336, "step": 115800 }, { "epoch": 4.5074475946019525, "grad_norm": 1.357529640197754, "learning_rate": 2.752620504227877e-05, "loss": 1.6406, "step": 115900 }, { "epoch": 4.511336677945009, "grad_norm": 2.5849862098693848, "learning_rate": 2.7506721739469276e-05, "loss": 1.5225, "step": 116000 }, { "epoch": 4.511336677945009, "eval_accuracy": 0.5362388888888889, "eval_f1": 0.5288938574760201, "eval_loss": 1.2173670530319214, "eval_precision": 0.5573016527991282, "eval_recall": 0.5362388888888888, "eval_runtime": 6851.4692, "eval_samples_per_second": 26.272, "eval_steps_per_second": 0.411, "step": 116000 }, { "epoch": 4.515225761288065, "grad_norm": 2.7378785610198975, "learning_rate": 2.7487238436659786e-05, "loss": 1.5067, "step": 116100 }, { "epoch": 4.519114844631121, "grad_norm": 3.4167284965515137, "learning_rate": 2.7467755133850293e-05, "loss": 1.4517, "step": 116200 }, { "epoch": 4.523003927974177, "grad_norm": 1.3542879819869995, "learning_rate": 2.74482718310408e-05, "loss": 1.3941, "step": 116300 }, { "epoch": 4.526893011317233, "grad_norm": 5.507721900939941, "learning_rate": 2.742878852823131e-05, "loss": 1.5018, "step": 116400 }, { "epoch": 4.530782094660289, "grad_norm": 2.7522058486938477, "learning_rate": 2.7409305225421816e-05, "loss": 1.4048, "step": 116500 }, { "epoch": 4.534671178003345, "grad_norm": 3.9861855506896973, "learning_rate": 2.7389821922612326e-05, "loss": 1.5402, "step": 116600 }, { "epoch": 4.538560261346401, "grad_norm": 2.7396936416625977, "learning_rate": 2.7370338619802832e-05, "loss": 1.6087, "step": 116700 }, { "epoch": 4.542449344689457, "grad_norm": 3.6984426975250244, "learning_rate": 2.735085531699334e-05, "loss": 1.4705, "step": 116800 }, { "epoch": 4.546338428032513, "grad_norm": 3.2038023471832275, "learning_rate": 2.733137201418385e-05, "loss": 1.5721, "step": 116900 }, { "epoch": 4.550227511375569, "grad_norm": 1.2675554752349854, "learning_rate": 2.7311888711374355e-05, "loss": 1.4036, "step": 117000 }, { "epoch": 4.550227511375569, "eval_accuracy": 0.5439944444444444, "eval_f1": 0.5315636406748231, "eval_loss": 1.2083544731140137, "eval_precision": 0.5547197812396377, "eval_recall": 0.5439944444444444, "eval_runtime": 6829.7652, "eval_samples_per_second": 26.355, "eval_steps_per_second": 0.412, "step": 117000 }, { "epoch": 4.554116594718625, "grad_norm": 3.5597774982452393, "learning_rate": 2.7292405408564865e-05, "loss": 1.6079, "step": 117100 }, { "epoch": 4.558005678061681, "grad_norm": 3.155365228652954, "learning_rate": 2.7272922105755365e-05, "loss": 1.5706, "step": 117200 }, { "epoch": 4.561894761404737, "grad_norm": 1.2961684465408325, "learning_rate": 2.7253438802945875e-05, "loss": 1.375, "step": 117300 }, { "epoch": 4.565783844747793, "grad_norm": 4.118727684020996, "learning_rate": 2.723395550013638e-05, "loss": 1.4899, "step": 117400 }, { "epoch": 4.569672928090849, "grad_norm": 1.1666998863220215, "learning_rate": 2.721447219732689e-05, "loss": 1.4095, "step": 117500 }, { "epoch": 4.573562011433905, "grad_norm": 5.171709060668945, "learning_rate": 2.7194988894517398e-05, "loss": 1.5086, "step": 117600 }, { "epoch": 4.577451094776961, "grad_norm": 2.7443690299987793, "learning_rate": 2.7175505591707908e-05, "loss": 1.584, "step": 117700 }, { "epoch": 4.581340178120017, "grad_norm": 3.137174606323242, "learning_rate": 2.7156022288898414e-05, "loss": 1.3959, "step": 117800 }, { "epoch": 4.585229261463073, "grad_norm": 0.524573564529419, "learning_rate": 2.713653898608892e-05, "loss": 1.4524, "step": 117900 }, { "epoch": 4.5891183448061295, "grad_norm": 2.658357858657837, "learning_rate": 2.7117250516307526e-05, "loss": 1.4956, "step": 118000 }, { "epoch": 4.5891183448061295, "eval_accuracy": 0.53185, "eval_f1": 0.5173093422407827, "eval_loss": 1.2280298471450806, "eval_precision": 0.5358208599507156, "eval_recall": 0.5318499999999999, "eval_runtime": 6820.9497, "eval_samples_per_second": 26.389, "eval_steps_per_second": 0.412, "step": 118000 }, { "epoch": 4.5930074281491855, "grad_norm": 3.140775680541992, "learning_rate": 2.7097767213498032e-05, "loss": 1.4592, "step": 118100 }, { "epoch": 4.5968965114922415, "grad_norm": 4.925965309143066, "learning_rate": 2.707828391068854e-05, "loss": 1.4444, "step": 118200 }, { "epoch": 4.600785594835298, "grad_norm": 12.638775825500488, "learning_rate": 2.705880060787905e-05, "loss": 1.5162, "step": 118300 }, { "epoch": 4.604674678178354, "grad_norm": 2.6176629066467285, "learning_rate": 2.7039317305069555e-05, "loss": 1.361, "step": 118400 }, { "epoch": 4.60856376152141, "grad_norm": 3.8061952590942383, "learning_rate": 2.7019834002260065e-05, "loss": 1.635, "step": 118500 }, { "epoch": 4.612452844864466, "grad_norm": 3.8750967979431152, "learning_rate": 2.700035069945057e-05, "loss": 1.4213, "step": 118600 }, { "epoch": 4.616341928207522, "grad_norm": 4.93895149230957, "learning_rate": 2.698086739664108e-05, "loss": 1.3617, "step": 118700 }, { "epoch": 4.620231011550578, "grad_norm": 2.8518168926239014, "learning_rate": 2.6961384093831588e-05, "loss": 1.5757, "step": 118800 }, { "epoch": 4.624120094893634, "grad_norm": 1.5935434103012085, "learning_rate": 2.6941900791022094e-05, "loss": 1.5391, "step": 118900 }, { "epoch": 4.62800917823669, "grad_norm": 1.4280357360839844, "learning_rate": 2.6922417488212604e-05, "loss": 1.3991, "step": 119000 }, { "epoch": 4.62800917823669, "eval_accuracy": 0.5453166666666667, "eval_f1": 0.5338344405644703, "eval_loss": 1.2046219110488892, "eval_precision": 0.549447303576673, "eval_recall": 0.5453166666666667, "eval_runtime": 6828.2871, "eval_samples_per_second": 26.361, "eval_steps_per_second": 0.412, "step": 119000 }, { "epoch": 4.631898261579746, "grad_norm": 2.262718915939331, "learning_rate": 2.690293418540311e-05, "loss": 1.554, "step": 119100 }, { "epoch": 4.635787344922802, "grad_norm": 3.788743257522583, "learning_rate": 2.688345088259362e-05, "loss": 1.4474, "step": 119200 }, { "epoch": 4.639676428265858, "grad_norm": 1.7821648120880127, "learning_rate": 2.6863967579784127e-05, "loss": 1.5131, "step": 119300 }, { "epoch": 4.643565511608914, "grad_norm": 14.625734329223633, "learning_rate": 2.6844484276974634e-05, "loss": 1.5465, "step": 119400 }, { "epoch": 4.64745459495197, "grad_norm": 4.33615779876709, "learning_rate": 2.6825000974165144e-05, "loss": 1.4043, "step": 119500 }, { "epoch": 4.651343678295026, "grad_norm": 2.63066029548645, "learning_rate": 2.680551767135565e-05, "loss": 1.5282, "step": 119600 }, { "epoch": 4.655232761638082, "grad_norm": 1.5687698125839233, "learning_rate": 2.678603436854616e-05, "loss": 1.3974, "step": 119700 }, { "epoch": 4.659121844981138, "grad_norm": 2.611372470855713, "learning_rate": 2.6766551065736667e-05, "loss": 1.4595, "step": 119800 }, { "epoch": 4.663010928324194, "grad_norm": 3.2560575008392334, "learning_rate": 2.6747067762927177e-05, "loss": 1.4662, "step": 119900 }, { "epoch": 4.66690001166725, "grad_norm": 3.631840229034424, "learning_rate": 2.6727779293145778e-05, "loss": 1.5407, "step": 120000 }, { "epoch": 4.66690001166725, "eval_accuracy": 0.5428055555555555, "eval_f1": 0.5288766850386778, "eval_loss": 1.2093476057052612, "eval_precision": 0.5503297317390046, "eval_recall": 0.5428055555555557, "eval_runtime": 6816.1016, "eval_samples_per_second": 26.408, "eval_steps_per_second": 0.413, "step": 120000 }, { "epoch": 4.670789095010306, "grad_norm": 4.156496047973633, "learning_rate": 2.670849082336438e-05, "loss": 1.5818, "step": 120100 }, { "epoch": 4.674678178353362, "grad_norm": 2.609058380126953, "learning_rate": 2.6689007520554886e-05, "loss": 1.4801, "step": 120200 }, { "epoch": 4.6785672616964185, "grad_norm": 5.489620208740234, "learning_rate": 2.6669524217745396e-05, "loss": 1.4935, "step": 120300 }, { "epoch": 4.6824563450394745, "grad_norm": 3.6116583347320557, "learning_rate": 2.6650040914935903e-05, "loss": 1.4752, "step": 120400 }, { "epoch": 4.6863454283825305, "grad_norm": 3.0323333740234375, "learning_rate": 2.6630557612126412e-05, "loss": 1.4954, "step": 120500 }, { "epoch": 4.690234511725587, "grad_norm": 2.333742618560791, "learning_rate": 2.661107430931692e-05, "loss": 1.4704, "step": 120600 }, { "epoch": 4.694123595068643, "grad_norm": 2.6306982040405273, "learning_rate": 2.6591591006507426e-05, "loss": 1.3663, "step": 120700 }, { "epoch": 4.698012678411699, "grad_norm": 5.920077323913574, "learning_rate": 2.6572107703697935e-05, "loss": 1.467, "step": 120800 }, { "epoch": 4.701901761754755, "grad_norm": 3.0835249423980713, "learning_rate": 2.655262440088844e-05, "loss": 1.3666, "step": 120900 }, { "epoch": 4.705790845097811, "grad_norm": 2.40122127532959, "learning_rate": 2.6533141098078945e-05, "loss": 1.5033, "step": 121000 }, { "epoch": 4.705790845097811, "eval_accuracy": 0.5403444444444444, "eval_f1": 0.5236912494142391, "eval_loss": 1.2121702432632446, "eval_precision": 0.5630475121957609, "eval_recall": 0.5403444444444444, "eval_runtime": 6818.556, "eval_samples_per_second": 26.399, "eval_steps_per_second": 0.413, "step": 121000 }, { "epoch": 4.709679928440867, "grad_norm": 3.222153902053833, "learning_rate": 2.651365779526945e-05, "loss": 1.5022, "step": 121100 }, { "epoch": 4.713569011783923, "grad_norm": 2.956955671310425, "learning_rate": 2.649417449245996e-05, "loss": 1.5036, "step": 121200 }, { "epoch": 4.717458095126979, "grad_norm": 2.049320936203003, "learning_rate": 2.6474691189650468e-05, "loss": 1.4956, "step": 121300 }, { "epoch": 4.721347178470035, "grad_norm": 3.215341091156006, "learning_rate": 2.6455207886840978e-05, "loss": 1.5784, "step": 121400 }, { "epoch": 4.725236261813091, "grad_norm": 2.699300527572632, "learning_rate": 2.6435724584031484e-05, "loss": 1.4535, "step": 121500 }, { "epoch": 4.729125345156147, "grad_norm": 6.858210563659668, "learning_rate": 2.6416241281221994e-05, "loss": 1.5119, "step": 121600 }, { "epoch": 4.733014428499203, "grad_norm": 5.201038360595703, "learning_rate": 2.63967579784125e-05, "loss": 1.5104, "step": 121700 }, { "epoch": 4.736903511842259, "grad_norm": 2.273268222808838, "learning_rate": 2.6377274675603007e-05, "loss": 1.5277, "step": 121800 }, { "epoch": 4.740792595185315, "grad_norm": 2.846250534057617, "learning_rate": 2.6357791372793517e-05, "loss": 1.4792, "step": 121900 }, { "epoch": 4.744681678528371, "grad_norm": 7.9509992599487305, "learning_rate": 2.6338308069984024e-05, "loss": 1.5966, "step": 122000 }, { "epoch": 4.744681678528371, "eval_accuracy": 0.5536055555555556, "eval_f1": 0.5462033679877261, "eval_loss": 1.1885603666305542, "eval_precision": 0.5590495645125001, "eval_recall": 0.5536055555555556, "eval_runtime": 6823.3277, "eval_samples_per_second": 26.38, "eval_steps_per_second": 0.412, "step": 122000 }, { "epoch": 2.374331550802139, "grad_norm": 7.746562480926514, "learning_rate": 3.817080291970803e-05, "loss": 1.3746, "step": 122100 }, { "epoch": 2.3762761302868256, "grad_norm": 5.002953052520752, "learning_rate": 3.816107055961071e-05, "loss": 1.3926, "step": 122200 }, { "epoch": 2.378220709771512, "grad_norm": 3.142582416534424, "learning_rate": 3.815133819951338e-05, "loss": 1.6379, "step": 122300 }, { "epoch": 2.3801652892561984, "grad_norm": 3.724693775177002, "learning_rate": 3.814160583941606e-05, "loss": 1.4129, "step": 122400 }, { "epoch": 2.382109868740885, "grad_norm": 4.866302013397217, "learning_rate": 3.813187347931874e-05, "loss": 1.5771, "step": 122500 }, { "epoch": 2.384054448225571, "grad_norm": 3.94940447807312, "learning_rate": 3.812214111922141e-05, "loss": 1.3963, "step": 122600 }, { "epoch": 2.3859990277102576, "grad_norm": 4.2556376457214355, "learning_rate": 3.811240875912409e-05, "loss": 1.6859, "step": 122700 }, { "epoch": 2.387943607194944, "grad_norm": 3.3731706142425537, "learning_rate": 3.8102676399026764e-05, "loss": 1.3596, "step": 122800 }, { "epoch": 2.3898881866796304, "grad_norm": 3.5068063735961914, "learning_rate": 3.809294403892944e-05, "loss": 1.6284, "step": 122900 }, { "epoch": 2.391832766164317, "grad_norm": 4.794012546539307, "learning_rate": 3.808321167883212e-05, "loss": 1.637, "step": 123000 }, { "epoch": 2.391832766164317, "eval_accuracy": 0.5389166666666667, "eval_f1": 0.5157412929475459, "eval_loss": 1.2281535863876343, "eval_precision": 0.5438073517181395, "eval_recall": 0.5389166666666667, "eval_runtime": 11720.6501, "eval_samples_per_second": 15.358, "eval_steps_per_second": 0.48, "step": 123000 }, { "epoch": 2.3937773456490032, "grad_norm": 1.9931375980377197, "learning_rate": 3.807357664233577e-05, "loss": 1.4318, "step": 123100 }, { "epoch": 2.3957219251336896, "grad_norm": 3.0255789756774902, "learning_rate": 3.806384428223844e-05, "loss": 1.5416, "step": 123200 }, { "epoch": 2.3976665046183765, "grad_norm": 5.752557277679443, "learning_rate": 3.805411192214112e-05, "loss": 1.3312, "step": 123300 }, { "epoch": 2.399611084103063, "grad_norm": 3.8919661045074463, "learning_rate": 3.80443795620438e-05, "loss": 1.5199, "step": 123400 }, { "epoch": 2.4015556635877493, "grad_norm": 2.60259747505188, "learning_rate": 3.803464720194647e-05, "loss": 1.4454, "step": 123500 }, { "epoch": 2.4035002430724357, "grad_norm": 6.541689872741699, "learning_rate": 3.8024914841849154e-05, "loss": 1.4203, "step": 123600 }, { "epoch": 2.405444822557122, "grad_norm": 4.495509624481201, "learning_rate": 3.8015182481751824e-05, "loss": 1.5965, "step": 123700 }, { "epoch": 2.4073894020418085, "grad_norm": 9.207340240478516, "learning_rate": 3.80054501216545e-05, "loss": 1.4441, "step": 123800 }, { "epoch": 2.409333981526495, "grad_norm": 6.378660678863525, "learning_rate": 3.799571776155718e-05, "loss": 1.4572, "step": 123900 }, { "epoch": 2.4112785610111813, "grad_norm": 3.0215299129486084, "learning_rate": 3.7985985401459855e-05, "loss": 1.5217, "step": 124000 }, { "epoch": 2.4112785610111813, "eval_accuracy": 0.5487777777777778, "eval_f1": 0.5427135855886721, "eval_loss": 1.2010254859924316, "eval_precision": 0.5442191975458245, "eval_recall": 0.5487777777777778, "eval_runtime": 11720.4187, "eval_samples_per_second": 15.358, "eval_steps_per_second": 0.48, "step": 124000 }, { "epoch": 2.4132231404958677, "grad_norm": 4.452690601348877, "learning_rate": 3.797625304136253e-05, "loss": 1.4035, "step": 124100 }, { "epoch": 2.415167719980554, "grad_norm": 6.512496471405029, "learning_rate": 3.796652068126521e-05, "loss": 1.6997, "step": 124200 }, { "epoch": 2.4171122994652405, "grad_norm": 2.756286382675171, "learning_rate": 3.7956788321167886e-05, "loss": 1.3731, "step": 124300 }, { "epoch": 2.419056878949927, "grad_norm": 4.21470832824707, "learning_rate": 3.794705596107056e-05, "loss": 1.5207, "step": 124400 }, { "epoch": 2.4210014584346133, "grad_norm": 4.312173366546631, "learning_rate": 3.793732360097324e-05, "loss": 1.499, "step": 124500 }, { "epoch": 2.4229460379193, "grad_norm": 5.553069114685059, "learning_rate": 3.792759124087592e-05, "loss": 1.4157, "step": 124600 }, { "epoch": 2.424890617403986, "grad_norm": 5.254312515258789, "learning_rate": 3.791785888077859e-05, "loss": 1.4122, "step": 124700 }, { "epoch": 2.426835196888673, "grad_norm": 4.566195011138916, "learning_rate": 3.790812652068127e-05, "loss": 1.4653, "step": 124800 }, { "epoch": 2.4287797763733594, "grad_norm": 3.54243540763855, "learning_rate": 3.789839416058394e-05, "loss": 1.4716, "step": 124900 }, { "epoch": 2.430724355858046, "grad_norm": 5.398393154144287, "learning_rate": 3.788866180048662e-05, "loss": 1.6031, "step": 125000 }, { "epoch": 2.430724355858046, "eval_accuracy": 0.5400277777777778, "eval_f1": 0.527655566535076, "eval_loss": 1.2236906290054321, "eval_precision": 0.5370936646901662, "eval_recall": 0.5400277777777777, "eval_runtime": 11704.9086, "eval_samples_per_second": 15.378, "eval_steps_per_second": 0.481, "step": 125000 }, { "epoch": 2.432668935342732, "grad_norm": 4.660057544708252, "learning_rate": 3.787902676399027e-05, "loss": 1.7364, "step": 125100 }, { "epoch": 2.4346135148274186, "grad_norm": 9.250042915344238, "learning_rate": 3.7869294403892947e-05, "loss": 1.5789, "step": 125200 }, { "epoch": 2.436558094312105, "grad_norm": 5.414535045623779, "learning_rate": 3.7859562043795624e-05, "loss": 1.5067, "step": 125300 }, { "epoch": 2.4385026737967914, "grad_norm": 6.6904296875, "learning_rate": 3.78498296836983e-05, "loss": 1.4454, "step": 125400 }, { "epoch": 2.440447253281478, "grad_norm": 3.3569560050964355, "learning_rate": 3.784009732360098e-05, "loss": 1.4772, "step": 125500 }, { "epoch": 2.4423918327661642, "grad_norm": 8.543791770935059, "learning_rate": 3.783036496350365e-05, "loss": 1.6411, "step": 125600 }, { "epoch": 2.4443364122508506, "grad_norm": 3.7477216720581055, "learning_rate": 3.7820632603406325e-05, "loss": 1.4197, "step": 125700 }, { "epoch": 2.446280991735537, "grad_norm": 6.28621768951416, "learning_rate": 3.781090024330901e-05, "loss": 1.3491, "step": 125800 }, { "epoch": 2.4482255712202234, "grad_norm": 16.032079696655273, "learning_rate": 3.780116788321168e-05, "loss": 1.4282, "step": 125900 }, { "epoch": 2.45017015070491, "grad_norm": 5.21914005279541, "learning_rate": 3.7791435523114356e-05, "loss": 1.4542, "step": 126000 }, { "epoch": 2.45017015070491, "eval_accuracy": 0.5433555555555556, "eval_f1": 0.5307859227379652, "eval_loss": 1.2101101875305176, "eval_precision": 0.5585658552516469, "eval_recall": 0.5433555555555557, "eval_runtime": 11706.0447, "eval_samples_per_second": 15.377, "eval_steps_per_second": 0.481, "step": 126000 }, { "epoch": 2.4521147301895967, "grad_norm": 10.756732940673828, "learning_rate": 3.778170316301703e-05, "loss": 1.4114, "step": 126100 }, { "epoch": 2.454059309674283, "grad_norm": 3.7012839317321777, "learning_rate": 3.777197080291971e-05, "loss": 1.4735, "step": 126200 }, { "epoch": 2.4560038891589695, "grad_norm": 3.1096372604370117, "learning_rate": 3.776223844282239e-05, "loss": 1.59, "step": 126300 }, { "epoch": 2.457948468643656, "grad_norm": 1.901330828666687, "learning_rate": 3.7752506082725064e-05, "loss": 1.3609, "step": 126400 }, { "epoch": 2.4598930481283423, "grad_norm": 2.937941551208496, "learning_rate": 3.774277372262774e-05, "loss": 1.3508, "step": 126500 }, { "epoch": 2.4618376276130287, "grad_norm": 7.9245524406433105, "learning_rate": 3.773304136253041e-05, "loss": 1.4372, "step": 126600 }, { "epoch": 2.463782207097715, "grad_norm": 7.598072052001953, "learning_rate": 3.7723309002433095e-05, "loss": 1.4948, "step": 126700 }, { "epoch": 2.4657267865824015, "grad_norm": 3.2467031478881836, "learning_rate": 3.7713576642335765e-05, "loss": 1.3924, "step": 126800 }, { "epoch": 2.467671366067088, "grad_norm": 3.9926083087921143, "learning_rate": 3.770384428223844e-05, "loss": 1.4277, "step": 126900 }, { "epoch": 2.4696159455517743, "grad_norm": 4.7598042488098145, "learning_rate": 3.7694111922141126e-05, "loss": 1.5071, "step": 127000 }, { "epoch": 2.4696159455517743, "eval_accuracy": 0.5429055555555555, "eval_f1": 0.5279278987623637, "eval_loss": 1.2115564346313477, "eval_precision": 0.5500566480239266, "eval_recall": 0.5429055555555556, "eval_runtime": 11703.6215, "eval_samples_per_second": 15.38, "eval_steps_per_second": 0.481, "step": 127000 }, { "epoch": 2.4715605250364607, "grad_norm": 2.2148499488830566, "learning_rate": 3.768447688564477e-05, "loss": 1.5053, "step": 127100 }, { "epoch": 2.473505104521147, "grad_norm": 10.410021781921387, "learning_rate": 3.767474452554745e-05, "loss": 1.5668, "step": 127200 }, { "epoch": 2.4754496840058335, "grad_norm": 5.490994453430176, "learning_rate": 3.7665012165450125e-05, "loss": 1.4791, "step": 127300 }, { "epoch": 2.4773942634905204, "grad_norm": 5.222614288330078, "learning_rate": 3.76552798053528e-05, "loss": 1.3394, "step": 127400 }, { "epoch": 2.479338842975207, "grad_norm": 4.345531940460205, "learning_rate": 3.764554744525547e-05, "loss": 1.3787, "step": 127500 }, { "epoch": 2.481283422459893, "grad_norm": 4.245398998260498, "learning_rate": 3.7635815085158156e-05, "loss": 1.4776, "step": 127600 }, { "epoch": 2.4832280019445796, "grad_norm": 5.5448317527771, "learning_rate": 3.762608272506083e-05, "loss": 1.4099, "step": 127700 }, { "epoch": 2.485172581429266, "grad_norm": 3.4202358722686768, "learning_rate": 3.76163503649635e-05, "loss": 1.4584, "step": 127800 }, { "epoch": 2.4871171609139524, "grad_norm": 7.463212013244629, "learning_rate": 3.760661800486619e-05, "loss": 1.4607, "step": 127900 }, { "epoch": 2.489061740398639, "grad_norm": 8.97133731842041, "learning_rate": 3.759688564476886e-05, "loss": 1.5437, "step": 128000 }, { "epoch": 2.489061740398639, "eval_accuracy": 0.5382944444444444, "eval_f1": 0.5256181649078323, "eval_loss": 1.2150262594223022, "eval_precision": 0.5487142133559698, "eval_recall": 0.5382944444444444, "eval_runtime": 11708.7668, "eval_samples_per_second": 15.373, "eval_steps_per_second": 0.48, "step": 128000 }, { "epoch": 2.491006319883325, "grad_norm": 9.475485801696777, "learning_rate": 3.7587153284671534e-05, "loss": 1.5768, "step": 128100 }, { "epoch": 2.4929508993680116, "grad_norm": 4.767566680908203, "learning_rate": 3.757742092457421e-05, "loss": 1.4129, "step": 128200 }, { "epoch": 2.494895478852698, "grad_norm": 3.3636186122894287, "learning_rate": 3.756768856447689e-05, "loss": 1.4915, "step": 128300 }, { "epoch": 2.4968400583373844, "grad_norm": 5.493124485015869, "learning_rate": 3.7557956204379565e-05, "loss": 1.6841, "step": 128400 }, { "epoch": 2.498784637822071, "grad_norm": 5.522589206695557, "learning_rate": 3.7548223844282235e-05, "loss": 1.5026, "step": 128500 }, { "epoch": 2.5007292173067572, "grad_norm": 7.894842147827148, "learning_rate": 3.753849148418492e-05, "loss": 1.6033, "step": 128600 }, { "epoch": 2.502673796791444, "grad_norm": 4.346030235290527, "learning_rate": 3.752875912408759e-05, "loss": 1.5277, "step": 128700 }, { "epoch": 2.50461837627613, "grad_norm": 2.507094621658325, "learning_rate": 3.7519026763990266e-05, "loss": 1.4053, "step": 128800 }, { "epoch": 2.506562955760817, "grad_norm": 4.448509216308594, "learning_rate": 3.750929440389295e-05, "loss": 1.5293, "step": 128900 }, { "epoch": 2.5085075352455033, "grad_norm": 8.932188034057617, "learning_rate": 3.749956204379562e-05, "loss": 1.4489, "step": 129000 }, { "epoch": 2.5085075352455033, "eval_accuracy": 0.5128888888888888, "eval_f1": 0.5062814118374187, "eval_loss": 1.256624698638916, "eval_precision": 0.5527730038213342, "eval_recall": 0.5128888888888888, "eval_runtime": 11707.878, "eval_samples_per_second": 15.374, "eval_steps_per_second": 0.48, "step": 129000 }, { "epoch": 2.5104521147301897, "grad_norm": 2.7912261486053467, "learning_rate": 3.74898296836983e-05, "loss": 1.4556, "step": 129100 }, { "epoch": 2.512396694214876, "grad_norm": 6.169617652893066, "learning_rate": 3.748019464720195e-05, "loss": 1.5001, "step": 129200 }, { "epoch": 2.5143412736995625, "grad_norm": 3.286309242248535, "learning_rate": 3.7470462287104625e-05, "loss": 1.5675, "step": 129300 }, { "epoch": 2.516285853184249, "grad_norm": 3.781956911087036, "learning_rate": 3.7460729927007296e-05, "loss": 1.4544, "step": 129400 }, { "epoch": 2.5182304326689353, "grad_norm": 5.108039379119873, "learning_rate": 3.745099756690998e-05, "loss": 1.5292, "step": 129500 }, { "epoch": 2.5201750121536217, "grad_norm": 5.07665491104126, "learning_rate": 3.7441362530413624e-05, "loss": 1.5943, "step": 129600 }, { "epoch": 2.522119591638308, "grad_norm": 3.7710981369018555, "learning_rate": 3.74316301703163e-05, "loss": 1.6169, "step": 129700 }, { "epoch": 2.5240641711229945, "grad_norm": 3.2845406532287598, "learning_rate": 3.742189781021898e-05, "loss": 1.344, "step": 129800 }, { "epoch": 2.526008750607681, "grad_norm": 4.7335004806518555, "learning_rate": 3.7412165450121655e-05, "loss": 1.4337, "step": 129900 }, { "epoch": 2.527953330092368, "grad_norm": 1.224095344543457, "learning_rate": 3.740243309002433e-05, "loss": 1.5495, "step": 130000 }, { "epoch": 2.527953330092368, "eval_accuracy": 0.5532111111111111, "eval_f1": 0.5426730885293557, "eval_loss": 1.1922270059585571, "eval_precision": 0.5581329528089346, "eval_recall": 0.5532111111111111, "eval_runtime": 11701.2978, "eval_samples_per_second": 15.383, "eval_steps_per_second": 0.481, "step": 130000 }, { "epoch": 2.5298979095770537, "grad_norm": 5.766182899475098, "learning_rate": 3.739270072992701e-05, "loss": 1.5009, "step": 130100 }, { "epoch": 2.5318424890617406, "grad_norm": 3.3143343925476074, "learning_rate": 3.7382968369829686e-05, "loss": 1.4676, "step": 130200 }, { "epoch": 2.533787068546427, "grad_norm": 4.9459075927734375, "learning_rate": 3.737323600973236e-05, "loss": 1.5076, "step": 130300 }, { "epoch": 2.5357316480311134, "grad_norm": 10.157061576843262, "learning_rate": 3.736350364963504e-05, "loss": 1.5048, "step": 130400 }, { "epoch": 2.5376762275158, "grad_norm": 8.682615280151367, "learning_rate": 3.735377128953772e-05, "loss": 1.4709, "step": 130500 }, { "epoch": 2.539620807000486, "grad_norm": 7.750446796417236, "learning_rate": 3.734403892944039e-05, "loss": 1.4065, "step": 130600 }, { "epoch": 2.5415653864851726, "grad_norm": 10.846022605895996, "learning_rate": 3.733430656934307e-05, "loss": 1.5883, "step": 130700 }, { "epoch": 2.543509965969859, "grad_norm": 4.400326728820801, "learning_rate": 3.732457420924574e-05, "loss": 1.3877, "step": 130800 }, { "epoch": 2.5454545454545454, "grad_norm": 12.703445434570312, "learning_rate": 3.731484184914842e-05, "loss": 1.4824, "step": 130900 }, { "epoch": 2.547399124939232, "grad_norm": 2.299748182296753, "learning_rate": 3.73051094890511e-05, "loss": 1.4348, "step": 131000 }, { "epoch": 2.547399124939232, "eval_accuracy": 0.5432444444444444, "eval_f1": 0.5375353663926442, "eval_loss": 1.2032384872436523, "eval_precision": 0.551032287651369, "eval_recall": 0.5432444444444445, "eval_runtime": 11700.7356, "eval_samples_per_second": 15.384, "eval_steps_per_second": 0.481, "step": 131000 }, { "epoch": 2.5493437044239182, "grad_norm": 6.144115924835205, "learning_rate": 3.729537712895377e-05, "loss": 1.5616, "step": 131100 }, { "epoch": 2.5512882839086046, "grad_norm": 6.1861066818237305, "learning_rate": 3.728564476885645e-05, "loss": 1.4873, "step": 131200 }, { "epoch": 2.5532328633932915, "grad_norm": 6.012712478637695, "learning_rate": 3.7275912408759126e-05, "loss": 1.4701, "step": 131300 }, { "epoch": 2.5551774428779774, "grad_norm": 1.849448800086975, "learning_rate": 3.72661800486618e-05, "loss": 1.3992, "step": 131400 }, { "epoch": 2.5571220223626643, "grad_norm": 5.330036163330078, "learning_rate": 3.725644768856448e-05, "loss": 1.4454, "step": 131500 }, { "epoch": 2.5590666018473502, "grad_norm": 8.195008277893066, "learning_rate": 3.724671532846716e-05, "loss": 1.528, "step": 131600 }, { "epoch": 2.561011181332037, "grad_norm": 5.142409801483154, "learning_rate": 3.7236982968369834e-05, "loss": 1.4059, "step": 131700 }, { "epoch": 2.5629557608167235, "grad_norm": 2.0515873432159424, "learning_rate": 3.7227250608272505e-05, "loss": 1.54, "step": 131800 }, { "epoch": 2.56490034030141, "grad_norm": 5.879722595214844, "learning_rate": 3.721751824817518e-05, "loss": 1.4483, "step": 131900 }, { "epoch": 2.5668449197860963, "grad_norm": 4.814441204071045, "learning_rate": 3.720778588807786e-05, "loss": 1.4554, "step": 132000 }, { "epoch": 2.5668449197860963, "eval_accuracy": 0.5382666666666667, "eval_f1": 0.525228496651421, "eval_loss": 1.2144094705581665, "eval_precision": 0.5637702180305619, "eval_recall": 0.5382666666666667, "eval_runtime": 11695.5873, "eval_samples_per_second": 15.39, "eval_steps_per_second": 0.481, "step": 132000 }, { "epoch": 2.5687894992707827, "grad_norm": 11.15813159942627, "learning_rate": 3.7198053527980536e-05, "loss": 1.4249, "step": 132100 }, { "epoch": 2.570734078755469, "grad_norm": 7.833085536956787, "learning_rate": 3.718832116788321e-05, "loss": 1.5075, "step": 132200 }, { "epoch": 2.5726786582401555, "grad_norm": 7.286487579345703, "learning_rate": 3.717858880778589e-05, "loss": 1.4507, "step": 132300 }, { "epoch": 2.574623237724842, "grad_norm": 2.784047842025757, "learning_rate": 3.716885644768857e-05, "loss": 1.7649, "step": 132400 }, { "epoch": 2.5765678172095283, "grad_norm": 2.4435904026031494, "learning_rate": 3.7159124087591244e-05, "loss": 1.4992, "step": 132500 }, { "epoch": 2.5785123966942147, "grad_norm": 9.102299690246582, "learning_rate": 3.714939172749392e-05, "loss": 1.4061, "step": 132600 }, { "epoch": 2.580456976178901, "grad_norm": 7.262824535369873, "learning_rate": 3.71396593673966e-05, "loss": 1.513, "step": 132700 }, { "epoch": 2.582401555663588, "grad_norm": 2.617892265319824, "learning_rate": 3.712992700729927e-05, "loss": 1.5128, "step": 132800 }, { "epoch": 2.584346135148274, "grad_norm": 3.1354260444641113, "learning_rate": 3.712019464720195e-05, "loss": 1.4341, "step": 132900 }, { "epoch": 2.586290714632961, "grad_norm": 2.2353668212890625, "learning_rate": 3.711046228710462e-05, "loss": 1.4183, "step": 133000 }, { "epoch": 2.586290714632961, "eval_accuracy": 0.5334888888888889, "eval_f1": 0.5256485968775854, "eval_loss": 1.2201597690582275, "eval_precision": 0.5481931619939298, "eval_recall": 0.5334888888888888, "eval_runtime": 11702.1334, "eval_samples_per_second": 15.382, "eval_steps_per_second": 0.481, "step": 133000 }, { "epoch": 2.588235294117647, "grad_norm": 3.1103808879852295, "learning_rate": 3.71007299270073e-05, "loss": 1.5425, "step": 133100 }, { "epoch": 2.5901798736023336, "grad_norm": 4.470186233520508, "learning_rate": 3.709099756690998e-05, "loss": 1.507, "step": 133200 }, { "epoch": 2.59212445308702, "grad_norm": 3.179208993911743, "learning_rate": 3.708126520681265e-05, "loss": 1.4445, "step": 133300 }, { "epoch": 2.5940690325717064, "grad_norm": 2.8759262561798096, "learning_rate": 3.707153284671533e-05, "loss": 1.3972, "step": 133400 }, { "epoch": 2.596013612056393, "grad_norm": 2.0203115940093994, "learning_rate": 3.706180048661801e-05, "loss": 1.3792, "step": 133500 }, { "epoch": 2.597958191541079, "grad_norm": 5.717413902282715, "learning_rate": 3.705216545012166e-05, "loss": 1.6462, "step": 133600 }, { "epoch": 2.5999027710257656, "grad_norm": 9.80691909790039, "learning_rate": 3.704243309002433e-05, "loss": 1.4195, "step": 133700 }, { "epoch": 2.601847350510452, "grad_norm": 4.343341827392578, "learning_rate": 3.703270072992701e-05, "loss": 1.4413, "step": 133800 }, { "epoch": 2.6037919299951384, "grad_norm": 5.324920177459717, "learning_rate": 3.702296836982968e-05, "loss": 1.4181, "step": 133900 }, { "epoch": 2.605736509479825, "grad_norm": 6.214841365814209, "learning_rate": 3.701323600973236e-05, "loss": 1.4754, "step": 134000 }, { "epoch": 2.605736509479825, "eval_accuracy": 0.5470333333333334, "eval_f1": 0.5436880475298606, "eval_loss": 1.1987807750701904, "eval_precision": 0.5536887864650161, "eval_recall": 0.5470333333333334, "eval_runtime": 11702.7789, "eval_samples_per_second": 15.381, "eval_steps_per_second": 0.481, "step": 134000 }, { "epoch": 2.6076810889645117, "grad_norm": 1.8559759855270386, "learning_rate": 3.700350364963504e-05, "loss": 1.4516, "step": 134100 }, { "epoch": 2.6096256684491976, "grad_norm": 5.648890972137451, "learning_rate": 3.6993771289537714e-05, "loss": 1.6505, "step": 134200 }, { "epoch": 2.6115702479338845, "grad_norm": 3.15840744972229, "learning_rate": 3.698403892944039e-05, "loss": 1.4685, "step": 134300 }, { "epoch": 2.613514827418571, "grad_norm": 4.5391154289245605, "learning_rate": 3.697430656934307e-05, "loss": 1.4959, "step": 134400 }, { "epoch": 2.6154594069032573, "grad_norm": 3.317028522491455, "learning_rate": 3.6964574209245745e-05, "loss": 1.4906, "step": 134500 }, { "epoch": 2.6174039863879437, "grad_norm": 7.499607086181641, "learning_rate": 3.695484184914842e-05, "loss": 1.4708, "step": 134600 }, { "epoch": 2.61934856587263, "grad_norm": 5.4760847091674805, "learning_rate": 3.694510948905109e-05, "loss": 1.5683, "step": 134700 }, { "epoch": 2.6212931453573165, "grad_norm": 2.6815028190612793, "learning_rate": 3.6935377128953776e-05, "loss": 1.3908, "step": 134800 }, { "epoch": 2.623237724842003, "grad_norm": 8.497615814208984, "learning_rate": 3.6925644768856446e-05, "loss": 1.4105, "step": 134900 }, { "epoch": 2.6251823043266893, "grad_norm": 5.217629909515381, "learning_rate": 3.691591240875912e-05, "loss": 1.5864, "step": 135000 }, { "epoch": 2.6251823043266893, "eval_accuracy": 0.5426277777777778, "eval_f1": 0.5348139238404958, "eval_loss": 1.2015126943588257, "eval_precision": 0.5414052733585322, "eval_recall": 0.5426277777777777, "eval_runtime": 11701.1944, "eval_samples_per_second": 15.383, "eval_steps_per_second": 0.481, "step": 135000 }, { "epoch": 2.6271268838113757, "grad_norm": 2.929307699203491, "learning_rate": 3.690618004866181e-05, "loss": 1.5411, "step": 135100 }, { "epoch": 2.629071463296062, "grad_norm": 3.506011486053467, "learning_rate": 3.689644768856448e-05, "loss": 1.4798, "step": 135200 }, { "epoch": 2.6310160427807485, "grad_norm": 2.526944398880005, "learning_rate": 3.6886715328467154e-05, "loss": 1.3965, "step": 135300 }, { "epoch": 2.6329606222654354, "grad_norm": 6.01577091217041, "learning_rate": 3.687698296836983e-05, "loss": 1.3861, "step": 135400 }, { "epoch": 2.6349052017501213, "grad_norm": 3.171880006790161, "learning_rate": 3.686725060827251e-05, "loss": 1.5004, "step": 135500 }, { "epoch": 2.636849781234808, "grad_norm": 4.262874126434326, "learning_rate": 3.685761557177615e-05, "loss": 1.6797, "step": 135600 }, { "epoch": 2.638794360719494, "grad_norm": 10.61685562133789, "learning_rate": 3.6847883211678836e-05, "loss": 1.3224, "step": 135700 }, { "epoch": 2.640738940204181, "grad_norm": 6.7449631690979, "learning_rate": 3.683815085158151e-05, "loss": 1.6533, "step": 135800 }, { "epoch": 2.6426835196888674, "grad_norm": 7.8499603271484375, "learning_rate": 3.6828418491484183e-05, "loss": 1.4874, "step": 135900 }, { "epoch": 2.644628099173554, "grad_norm": 16.089895248413086, "learning_rate": 3.681868613138687e-05, "loss": 1.3715, "step": 136000 }, { "epoch": 2.644628099173554, "eval_accuracy": 0.5287166666666666, "eval_f1": 0.5135771611211362, "eval_loss": 1.2305988073349, "eval_precision": 0.5495002785365138, "eval_recall": 0.5287166666666667, "eval_runtime": 11700.3522, "eval_samples_per_second": 15.384, "eval_steps_per_second": 0.481, "step": 136000 }, { "epoch": 2.64657267865824, "grad_norm": 2.5036091804504395, "learning_rate": 3.680895377128954e-05, "loss": 1.5387, "step": 136100 }, { "epoch": 2.6485172581429266, "grad_norm": 5.292614459991455, "learning_rate": 3.6799221411192214e-05, "loss": 1.4633, "step": 136200 }, { "epoch": 2.650461837627613, "grad_norm": 6.591153144836426, "learning_rate": 3.678948905109489e-05, "loss": 1.4941, "step": 136300 }, { "epoch": 2.6524064171122994, "grad_norm": 3.784457206726074, "learning_rate": 3.677975669099757e-05, "loss": 1.4254, "step": 136400 }, { "epoch": 2.654350996596986, "grad_norm": 4.622765064239502, "learning_rate": 3.6770024330900246e-05, "loss": 1.4533, "step": 136500 }, { "epoch": 2.656295576081672, "grad_norm": 3.8145179748535156, "learning_rate": 3.676029197080292e-05, "loss": 1.4919, "step": 136600 }, { "epoch": 2.6582401555663586, "grad_norm": 8.212794303894043, "learning_rate": 3.67505596107056e-05, "loss": 1.6828, "step": 136700 }, { "epoch": 2.660184735051045, "grad_norm": 4.446144104003906, "learning_rate": 3.674082725060827e-05, "loss": 1.5509, "step": 136800 }, { "epoch": 2.662129314535732, "grad_norm": 7.020377159118652, "learning_rate": 3.6731094890510954e-05, "loss": 1.4654, "step": 136900 }, { "epoch": 2.664073894020418, "grad_norm": 3.2034873962402344, "learning_rate": 3.672136253041363e-05, "loss": 1.3886, "step": 137000 }, { "epoch": 2.664073894020418, "eval_accuracy": 0.5445222222222222, "eval_f1": 0.5323332674595841, "eval_loss": 1.2043038606643677, "eval_precision": 0.5477855171564977, "eval_recall": 0.5445222222222222, "eval_runtime": 11697.106, "eval_samples_per_second": 15.388, "eval_steps_per_second": 0.481, "step": 137000 }, { "epoch": 2.6660184735051047, "grad_norm": 4.7920966148376465, "learning_rate": 3.67116301703163e-05, "loss": 1.469, "step": 137100 }, { "epoch": 2.667963052989791, "grad_norm": 5.560246467590332, "learning_rate": 3.6701897810218985e-05, "loss": 1.3374, "step": 137200 }, { "epoch": 2.6699076324744775, "grad_norm": 3.3148908615112305, "learning_rate": 3.6692165450121655e-05, "loss": 1.4299, "step": 137300 }, { "epoch": 2.671852211959164, "grad_norm": 5.148831367492676, "learning_rate": 3.668243309002433e-05, "loss": 1.5941, "step": 137400 }, { "epoch": 2.6737967914438503, "grad_norm": 3.1917150020599365, "learning_rate": 3.667270072992701e-05, "loss": 1.5404, "step": 137500 }, { "epoch": 2.6757413709285367, "grad_norm": 2.9254508018493652, "learning_rate": 3.666306569343066e-05, "loss": 1.4572, "step": 137600 }, { "epoch": 2.677685950413223, "grad_norm": 5.366604804992676, "learning_rate": 3.665333333333334e-05, "loss": 1.5752, "step": 137700 }, { "epoch": 2.6796305298979095, "grad_norm": 2.1068837642669678, "learning_rate": 3.6643600973236014e-05, "loss": 1.5003, "step": 137800 }, { "epoch": 2.681575109382596, "grad_norm": 9.662635803222656, "learning_rate": 3.663386861313869e-05, "loss": 1.6439, "step": 137900 }, { "epoch": 2.6835196888672823, "grad_norm": 3.429116725921631, "learning_rate": 3.662413625304136e-05, "loss": 1.4509, "step": 138000 }, { "epoch": 2.6835196888672823, "eval_accuracy": 0.5438333333333333, "eval_f1": 0.5275600692423633, "eval_loss": 1.204463005065918, "eval_precision": 0.5601687440021733, "eval_recall": 0.5438333333333333, "eval_runtime": 11700.7249, "eval_samples_per_second": 15.384, "eval_steps_per_second": 0.481, "step": 138000 }, { "epoch": 2.6854642683519687, "grad_norm": 3.995939254760742, "learning_rate": 3.661440389294404e-05, "loss": 1.599, "step": 138100 }, { "epoch": 2.6874088478366556, "grad_norm": 4.259720802307129, "learning_rate": 3.6604671532846715e-05, "loss": 1.352, "step": 138200 }, { "epoch": 2.6893534273213415, "grad_norm": 1.5554279088974, "learning_rate": 3.659493917274939e-05, "loss": 1.5532, "step": 138300 }, { "epoch": 2.6912980068060284, "grad_norm": 3.6563358306884766, "learning_rate": 3.658520681265207e-05, "loss": 1.5603, "step": 138400 }, { "epoch": 2.693242586290715, "grad_norm": 3.4740819931030273, "learning_rate": 3.6575474452554746e-05, "loss": 1.5454, "step": 138500 }, { "epoch": 2.695187165775401, "grad_norm": 4.129930019378662, "learning_rate": 3.6565742092457423e-05, "loss": 1.5653, "step": 138600 }, { "epoch": 2.6971317452600876, "grad_norm": 1.7310948371887207, "learning_rate": 3.6556009732360094e-05, "loss": 1.4438, "step": 138700 }, { "epoch": 2.699076324744774, "grad_norm": 2.2803988456726074, "learning_rate": 3.654627737226278e-05, "loss": 1.4756, "step": 138800 }, { "epoch": 2.7010209042294604, "grad_norm": 3.062891960144043, "learning_rate": 3.6536545012165454e-05, "loss": 1.4377, "step": 138900 }, { "epoch": 2.702965483714147, "grad_norm": 5.409154415130615, "learning_rate": 3.6526812652068125e-05, "loss": 1.4868, "step": 139000 }, { "epoch": 2.702965483714147, "eval_accuracy": 0.5232777777777777, "eval_f1": 0.5097137863886235, "eval_loss": 1.2468398809432983, "eval_precision": 0.5384642564815899, "eval_recall": 0.5232777777777778, "eval_runtime": 11700.1137, "eval_samples_per_second": 15.384, "eval_steps_per_second": 0.481, "step": 139000 }, { "epoch": 2.704910063198833, "grad_norm": 3.337860107421875, "learning_rate": 3.651708029197081e-05, "loss": 1.4856, "step": 139100 }, { "epoch": 2.7068546426835196, "grad_norm": 5.200771331787109, "learning_rate": 3.650734793187348e-05, "loss": 1.3706, "step": 139200 }, { "epoch": 2.708799222168206, "grad_norm": 11.96838665008545, "learning_rate": 3.6497615571776156e-05, "loss": 1.4798, "step": 139300 }, { "epoch": 2.7107438016528924, "grad_norm": 3.5028131008148193, "learning_rate": 3.648788321167883e-05, "loss": 1.4228, "step": 139400 }, { "epoch": 2.7126883811375793, "grad_norm": 7.03187370300293, "learning_rate": 3.647815085158151e-05, "loss": 1.3673, "step": 139500 }, { "epoch": 2.7146329606222652, "grad_norm": 3.8014962673187256, "learning_rate": 3.646841849148419e-05, "loss": 1.5277, "step": 139600 }, { "epoch": 2.716577540106952, "grad_norm": 4.487148761749268, "learning_rate": 3.645878345498784e-05, "loss": 1.4787, "step": 139700 }, { "epoch": 2.718522119591638, "grad_norm": 6.178878307342529, "learning_rate": 3.6449051094890515e-05, "loss": 1.4137, "step": 139800 }, { "epoch": 2.720466699076325, "grad_norm": 6.767302989959717, "learning_rate": 3.6439318734793185e-05, "loss": 1.4526, "step": 139900 }, { "epoch": 2.7224112785610113, "grad_norm": 9.1553373336792, "learning_rate": 3.642958637469587e-05, "loss": 1.4345, "step": 140000 }, { "epoch": 2.7224112785610113, "eval_accuracy": 0.5455666666666666, "eval_f1": 0.5311972088064522, "eval_loss": 1.2123345136642456, "eval_precision": 0.5404402667309468, "eval_recall": 0.5455666666666666, "eval_runtime": 11704.0442, "eval_samples_per_second": 15.379, "eval_steps_per_second": 0.481, "step": 140000 }, { "epoch": 2.7243558580456977, "grad_norm": 3.488968849182129, "learning_rate": 3.641985401459854e-05, "loss": 1.4216, "step": 140100 }, { "epoch": 2.726300437530384, "grad_norm": 5.2070136070251465, "learning_rate": 3.6410121654501216e-05, "loss": 1.4168, "step": 140200 }, { "epoch": 2.7282450170150705, "grad_norm": 3.7023682594299316, "learning_rate": 3.64003892944039e-05, "loss": 1.4298, "step": 140300 }, { "epoch": 2.730189596499757, "grad_norm": 9.44726276397705, "learning_rate": 3.639065693430657e-05, "loss": 1.5325, "step": 140400 }, { "epoch": 2.7321341759844433, "grad_norm": 3.566453456878662, "learning_rate": 3.638092457420925e-05, "loss": 1.5402, "step": 140500 }, { "epoch": 2.7340787554691297, "grad_norm": 4.19261360168457, "learning_rate": 3.6371192214111924e-05, "loss": 1.4644, "step": 140600 }, { "epoch": 2.736023334953816, "grad_norm": 1.518747329711914, "learning_rate": 3.63614598540146e-05, "loss": 1.5672, "step": 140700 }, { "epoch": 2.7379679144385025, "grad_norm": 6.056252956390381, "learning_rate": 3.635172749391728e-05, "loss": 1.447, "step": 140800 }, { "epoch": 2.739912493923189, "grad_norm": 5.74575138092041, "learning_rate": 3.634199513381995e-05, "loss": 1.6661, "step": 140900 }, { "epoch": 2.7418570734078758, "grad_norm": 9.669859886169434, "learning_rate": 3.633226277372263e-05, "loss": 1.3935, "step": 141000 }, { "epoch": 2.7418570734078758, "eval_accuracy": 0.5441111111111111, "eval_f1": 0.5320882291864693, "eval_loss": 1.206077218055725, "eval_precision": 0.5428303603844985, "eval_recall": 0.5441111111111111, "eval_runtime": 11696.1624, "eval_samples_per_second": 15.39, "eval_steps_per_second": 0.481, "step": 141000 }, { "epoch": 2.7438016528925617, "grad_norm": 2.5247066020965576, "learning_rate": 3.63225304136253e-05, "loss": 1.4546, "step": 141100 }, { "epoch": 2.7457462323772486, "grad_norm": 2.4642157554626465, "learning_rate": 3.631279805352798e-05, "loss": 1.4348, "step": 141200 }, { "epoch": 2.747690811861935, "grad_norm": 4.006363868713379, "learning_rate": 3.6303065693430663e-05, "loss": 1.5559, "step": 141300 }, { "epoch": 2.7496353913466214, "grad_norm": 5.4720778465271, "learning_rate": 3.6293333333333334e-05, "loss": 1.5275, "step": 141400 }, { "epoch": 2.751579970831308, "grad_norm": 11.877006530761719, "learning_rate": 3.628360097323601e-05, "loss": 1.4473, "step": 141500 }, { "epoch": 2.753524550315994, "grad_norm": 4.403212070465088, "learning_rate": 3.627386861313869e-05, "loss": 1.6769, "step": 141600 }, { "epoch": 2.7554691298006806, "grad_norm": 8.280721664428711, "learning_rate": 3.6264136253041365e-05, "loss": 1.4281, "step": 141700 }, { "epoch": 2.757413709285367, "grad_norm": 6.277519702911377, "learning_rate": 3.625450121654501e-05, "loss": 1.3993, "step": 141800 }, { "epoch": 2.7593582887700534, "grad_norm": 5.544741153717041, "learning_rate": 3.624476885644769e-05, "loss": 1.5105, "step": 141900 }, { "epoch": 2.76130286825474, "grad_norm": 5.83301305770874, "learning_rate": 3.623503649635036e-05, "loss": 1.5243, "step": 142000 }, { "epoch": 2.76130286825474, "eval_accuracy": 0.5529555555555555, "eval_f1": 0.5402327125900432, "eval_loss": 1.1958545446395874, "eval_precision": 0.5436921787246107, "eval_recall": 0.5529555555555555, "eval_runtime": 11700.5166, "eval_samples_per_second": 15.384, "eval_steps_per_second": 0.481, "step": 142000 }, { "epoch": 2.763247447739426, "grad_norm": Infinity, "learning_rate": 3.6225401459854014e-05, "loss": 1.3934, "step": 142100 }, { "epoch": 2.7651920272241126, "grad_norm": 3.052508592605591, "learning_rate": 3.621566909975669e-05, "loss": 1.484, "step": 142200 }, { "epoch": 2.7671366067087995, "grad_norm": 2.4593145847320557, "learning_rate": 3.620593673965937e-05, "loss": 1.4851, "step": 142300 }, { "epoch": 2.7690811861934854, "grad_norm": 2.0293965339660645, "learning_rate": 3.6196204379562045e-05, "loss": 1.3567, "step": 142400 }, { "epoch": 2.7710257656781723, "grad_norm": 3.5616836547851562, "learning_rate": 3.618647201946472e-05, "loss": 1.494, "step": 142500 }, { "epoch": 2.7729703451628587, "grad_norm": 3.650461435317993, "learning_rate": 3.61767396593674e-05, "loss": 1.4864, "step": 142600 }, { "epoch": 2.774914924647545, "grad_norm": 10.196043014526367, "learning_rate": 3.616700729927007e-05, "loss": 1.4256, "step": 142700 }, { "epoch": 2.7768595041322315, "grad_norm": 5.651501655578613, "learning_rate": 3.615727493917275e-05, "loss": 1.3229, "step": 142800 }, { "epoch": 2.778804083616918, "grad_norm": 18.10386848449707, "learning_rate": 3.614754257907543e-05, "loss": 1.525, "step": 142900 }, { "epoch": 2.7807486631016043, "grad_norm": 3.1464109420776367, "learning_rate": 3.61378102189781e-05, "loss": 1.5899, "step": 143000 }, { "epoch": 2.7826932425862907, "grad_norm": 2.5966172218322754, "learning_rate": 3.6128077858880784e-05, "loss": 1.7428, "step": 143100 }, { "epoch": 2.784637822070977, "grad_norm": 4.665292263031006, "learning_rate": 3.6118345498783455e-05, "loss": 1.5366, "step": 143200 }, { "epoch": 2.7865824015556635, "grad_norm": 1.6635992527008057, "learning_rate": 3.610861313868613e-05, "loss": 1.4651, "step": 143300 }, { "epoch": 2.78852698104035, "grad_norm": 4.9245991706848145, "learning_rate": 3.609888077858881e-05, "loss": 1.435, "step": 143400 }, { "epoch": 2.7904715605250363, "grad_norm": 5.283934116363525, "learning_rate": 3.6089148418491486e-05, "loss": 1.4794, "step": 143500 }, { "epoch": 2.792416140009723, "grad_norm": 5.139278411865234, "learning_rate": 3.607941605839416e-05, "loss": 1.3865, "step": 143600 }, { "epoch": 2.794360719494409, "grad_norm": 4.906033515930176, "learning_rate": 3.606968369829684e-05, "loss": 1.356, "step": 143700 }, { "epoch": 2.796305298979096, "grad_norm": 5.087152481079102, "learning_rate": 3.605995133819952e-05, "loss": 1.4854, "step": 143800 }, { "epoch": 2.7982498784637824, "grad_norm": 3.3110742568969727, "learning_rate": 3.6050218978102194e-05, "loss": 1.4727, "step": 143900 }, { "epoch": 2.8001944579484688, "grad_norm": 6.21459436416626, "learning_rate": 3.604048661800487e-05, "loss": 1.4657, "step": 144000 }, { "epoch": 2.802139037433155, "grad_norm": 2.726806879043579, "learning_rate": 3.603075425790755e-05, "loss": 1.3634, "step": 144100 }, { "epoch": 2.8040836169178416, "grad_norm": 11.711623191833496, "learning_rate": 3.602102189781022e-05, "loss": 1.3575, "step": 144200 }, { "epoch": 2.806028196402528, "grad_norm": 5.016298770904541, "learning_rate": 3.6011289537712895e-05, "loss": 1.412, "step": 144300 }, { "epoch": 2.8079727758872144, "grad_norm": 2.8743202686309814, "learning_rate": 3.6001654501216546e-05, "loss": 1.5285, "step": 144400 }, { "epoch": 2.809917355371901, "grad_norm": 3.309705972671509, "learning_rate": 3.59920194647202e-05, "loss": 1.5714, "step": 144500 }, { "epoch": 2.811861934856587, "grad_norm": 1.9416168928146362, "learning_rate": 3.5982287104622874e-05, "loss": 1.4345, "step": 144600 }, { "epoch": 2.8138065143412736, "grad_norm": 6.685002326965332, "learning_rate": 3.597255474452555e-05, "loss": 1.6005, "step": 144700 }, { "epoch": 2.81575109382596, "grad_norm": 6.736208438873291, "learning_rate": 3.596282238442822e-05, "loss": 1.3881, "step": 144800 }, { "epoch": 2.8176956733106464, "grad_norm": 2.892781972885132, "learning_rate": 3.5953090024330905e-05, "loss": 1.3421, "step": 144900 }, { "epoch": 2.819640252795333, "grad_norm": 4.821162223815918, "learning_rate": 3.594335766423358e-05, "loss": 1.5884, "step": 145000 }, { "epoch": 2.819640252795333, "eval_accuracy": 0.5518777777777778, "eval_f1": 0.5429859851309288, "eval_loss": 1.1935709714889526, "eval_precision": 0.5580801221344135, "eval_recall": 0.5518777777777778, "eval_runtime": 11690.9212, "eval_samples_per_second": 15.397, "eval_steps_per_second": 0.481, "step": 145000 }, { "epoch": 2.8215848322800197, "grad_norm": 3.5679290294647217, "learning_rate": 3.593362530413625e-05, "loss": 1.3989, "step": 145100 }, { "epoch": 2.8235294117647056, "grad_norm": 5.087439060211182, "learning_rate": 3.5923892944038936e-05, "loss": 1.5958, "step": 145200 }, { "epoch": 2.8254739912493925, "grad_norm": 4.048695087432861, "learning_rate": 3.5914160583941607e-05, "loss": 1.4007, "step": 145300 }, { "epoch": 2.827418570734079, "grad_norm": 2.38126277923584, "learning_rate": 3.5904428223844284e-05, "loss": 1.4921, "step": 145400 }, { "epoch": 2.8293631502187653, "grad_norm": 4.312170505523682, "learning_rate": 3.589469586374696e-05, "loss": 1.4821, "step": 145500 }, { "epoch": 2.8313077297034517, "grad_norm": 7.14309549331665, "learning_rate": 3.588496350364964e-05, "loss": 1.4587, "step": 145600 }, { "epoch": 2.833252309188138, "grad_norm": 3.706202745437622, "learning_rate": 3.5875231143552315e-05, "loss": 1.4545, "step": 145700 }, { "epoch": 2.8351968886728245, "grad_norm": 5.18079137802124, "learning_rate": 3.5865498783454985e-05, "loss": 1.4016, "step": 145800 }, { "epoch": 2.837141468157511, "grad_norm": 2.329741954803467, "learning_rate": 3.585576642335767e-05, "loss": 1.667, "step": 145900 }, { "epoch": 2.8390860476421973, "grad_norm": 4.140172004699707, "learning_rate": 3.584603406326034e-05, "loss": 1.4696, "step": 146000 }, { "epoch": 2.8410306271268837, "grad_norm": 3.9143996238708496, "learning_rate": 3.5836301703163016e-05, "loss": 1.3673, "step": 146100 }, { "epoch": 2.84297520661157, "grad_norm": 3.252748966217041, "learning_rate": 3.58265693430657e-05, "loss": 1.5011, "step": 146200 }, { "epoch": 2.8449197860962565, "grad_norm": 3.2772161960601807, "learning_rate": 3.581683698296837e-05, "loss": 1.4585, "step": 146300 }, { "epoch": 2.8468643655809434, "grad_norm": 1.7339909076690674, "learning_rate": 3.580710462287105e-05, "loss": 1.461, "step": 146400 }, { "epoch": 2.8488089450656293, "grad_norm": 9.189515113830566, "learning_rate": 3.5797372262773724e-05, "loss": 1.4629, "step": 146500 }, { "epoch": 2.850753524550316, "grad_norm": 3.819024085998535, "learning_rate": 3.57876399026764e-05, "loss": 1.4487, "step": 146600 }, { "epoch": 2.8526981040350026, "grad_norm": 4.7378082275390625, "learning_rate": 3.577790754257908e-05, "loss": 1.6832, "step": 146700 }, { "epoch": 2.854642683519689, "grad_norm": 3.6273341178894043, "learning_rate": 3.5768175182481755e-05, "loss": 1.4306, "step": 146800 }, { "epoch": 2.8565872630043754, "grad_norm": 2.7646450996398926, "learning_rate": 3.575844282238443e-05, "loss": 1.4624, "step": 146900 }, { "epoch": 2.858531842489062, "grad_norm": 5.538106918334961, "learning_rate": 3.57487104622871e-05, "loss": 1.4426, "step": 147000 }, { "epoch": 2.860476421973748, "grad_norm": 1.7823628187179565, "learning_rate": 3.5738978102189786e-05, "loss": 1.517, "step": 147100 }, { "epoch": 2.8624210014584346, "grad_norm": 3.5713796615600586, "learning_rate": 3.572924574209246e-05, "loss": 1.4357, "step": 147200 }, { "epoch": 2.864365580943121, "grad_norm": 3.744774580001831, "learning_rate": 3.571951338199513e-05, "loss": 1.4799, "step": 147300 }, { "epoch": 2.8663101604278074, "grad_norm": 8.418418884277344, "learning_rate": 3.570978102189782e-05, "loss": 1.5081, "step": 147400 }, { "epoch": 2.868254739912494, "grad_norm": 4.028896808624268, "learning_rate": 3.570014598540146e-05, "loss": 1.5946, "step": 147500 }, { "epoch": 2.87019931939718, "grad_norm": 6.257025241851807, "learning_rate": 3.569041362530414e-05, "loss": 1.4285, "step": 147600 }, { "epoch": 2.872143898881867, "grad_norm": 2.458538055419922, "learning_rate": 3.5680681265206816e-05, "loss": 1.5107, "step": 147700 }, { "epoch": 2.874088478366553, "grad_norm": 3.305389404296875, "learning_rate": 3.567094890510949e-05, "loss": 1.6983, "step": 147800 }, { "epoch": 2.87603305785124, "grad_norm": 1.3700670003890991, "learning_rate": 3.566121654501216e-05, "loss": 1.439, "step": 147900 }, { "epoch": 2.8779776373359263, "grad_norm": 1.5027657747268677, "learning_rate": 3.5651484184914847e-05, "loss": 1.4767, "step": 148000 }, { "epoch": 2.8799222168206127, "grad_norm": 3.323707103729248, "learning_rate": 3.5641751824817524e-05, "loss": 1.6011, "step": 148100 }, { "epoch": 2.881866796305299, "grad_norm": 6.306870937347412, "learning_rate": 3.5632019464720194e-05, "loss": 1.5862, "step": 148200 }, { "epoch": 2.8838113757899855, "grad_norm": 5.070829391479492, "learning_rate": 3.562228710462287e-05, "loss": 1.492, "step": 148300 }, { "epoch": 2.885755955274672, "grad_norm": 5.8588666915893555, "learning_rate": 3.561255474452555e-05, "loss": 1.526, "step": 148400 }, { "epoch": 2.8877005347593583, "grad_norm": 2.569530487060547, "learning_rate": 3.5602822384428225e-05, "loss": 1.3728, "step": 148500 }, { "epoch": 2.8896451142440447, "grad_norm": 6.642461776733398, "learning_rate": 3.55930900243309e-05, "loss": 1.4902, "step": 148600 }, { "epoch": 2.891589693728731, "grad_norm": 3.161496877670288, "learning_rate": 3.558335766423358e-05, "loss": 1.4547, "step": 148700 }, { "epoch": 2.8935342732134175, "grad_norm": 6.484923362731934, "learning_rate": 3.5573625304136256e-05, "loss": 1.408, "step": 148800 }, { "epoch": 2.895478852698104, "grad_norm": 4.98514461517334, "learning_rate": 3.5563892944038926e-05, "loss": 1.4992, "step": 148900 }, { "epoch": 2.8974234321827907, "grad_norm": 4.038093090057373, "learning_rate": 3.555416058394161e-05, "loss": 1.4231, "step": 149000 }, { "epoch": 2.8993680116674767, "grad_norm": 1.4407713413238525, "learning_rate": 3.554442822384429e-05, "loss": 1.4453, "step": 149100 }, { "epoch": 2.9013125911521636, "grad_norm": 4.864222049713135, "learning_rate": 3.553469586374696e-05, "loss": 1.4952, "step": 149200 }, { "epoch": 2.9032571706368495, "grad_norm": 2.1119303703308105, "learning_rate": 3.552496350364964e-05, "loss": 1.5303, "step": 149300 }, { "epoch": 2.9052017501215364, "grad_norm": 5.2074103355407715, "learning_rate": 3.551523114355231e-05, "loss": 1.4492, "step": 149400 }, { "epoch": 2.9071463296062228, "grad_norm": 6.509381294250488, "learning_rate": 3.550549878345499e-05, "loss": 1.4543, "step": 149500 }, { "epoch": 2.909090909090909, "grad_norm": 3.349846363067627, "learning_rate": 3.5495766423357665e-05, "loss": 1.6593, "step": 149600 }, { "epoch": 2.9110354885755956, "grad_norm": 3.037105083465576, "learning_rate": 3.548603406326034e-05, "loss": 1.4607, "step": 149700 }, { "epoch": 2.912980068060282, "grad_norm": 4.545512676239014, "learning_rate": 3.547630170316302e-05, "loss": 1.418, "step": 149800 }, { "epoch": 2.9149246475449684, "grad_norm": 3.920936346054077, "learning_rate": 3.5466569343065696e-05, "loss": 1.4248, "step": 149900 }, { "epoch": 2.916869227029655, "grad_norm": 5.607579708099365, "learning_rate": 3.5456836982968373e-05, "loss": 1.4449, "step": 150000 }, { "epoch": 2.916869227029655, "eval_accuracy": 0.5287611111111111, "eval_f1": 0.5164174569593832, "eval_loss": 1.2337738275527954, "eval_precision": 0.5527380014964488, "eval_recall": 0.5287611111111111, "eval_runtime": 11630.0995, "eval_samples_per_second": 15.477, "eval_steps_per_second": 0.484, "step": 150000 }, { "epoch": 2.918813806514341, "grad_norm": 4.287477016448975, "learning_rate": 3.5447104622871044e-05, "loss": 1.56, "step": 150100 }, { "epoch": 2.9207583859990276, "grad_norm": 4.131676197052002, "learning_rate": 3.543737226277373e-05, "loss": 1.4585, "step": 150200 }, { "epoch": 2.922702965483714, "grad_norm": 8.563018798828125, "learning_rate": 3.5427639902676404e-05, "loss": 1.4207, "step": 150300 }, { "epoch": 2.9246475449684004, "grad_norm": 4.976408004760742, "learning_rate": 3.5417907542579075e-05, "loss": 1.3587, "step": 150400 }, { "epoch": 2.9265921244530873, "grad_norm": 5.384346008300781, "learning_rate": 3.540817518248175e-05, "loss": 1.454, "step": 150500 }, { "epoch": 2.928536703937773, "grad_norm": 5.853832244873047, "learning_rate": 3.539844282238443e-05, "loss": 1.4909, "step": 150600 }, { "epoch": 2.93048128342246, "grad_norm": 3.914234161376953, "learning_rate": 3.5388710462287106e-05, "loss": 1.4092, "step": 150700 }, { "epoch": 2.9324258629071465, "grad_norm": 5.015030384063721, "learning_rate": 3.537897810218978e-05, "loss": 1.4489, "step": 150800 }, { "epoch": 2.934370442391833, "grad_norm": 7.263000011444092, "learning_rate": 3.536924574209246e-05, "loss": 1.4719, "step": 150900 }, { "epoch": 2.9363150218765193, "grad_norm": 4.237588882446289, "learning_rate": 3.535951338199514e-05, "loss": 1.408, "step": 151000 }, { "epoch": 2.9382596013612057, "grad_norm": 5.551347255706787, "learning_rate": 3.534978102189781e-05, "loss": 1.5267, "step": 151100 }, { "epoch": 2.940204180845892, "grad_norm": 5.720644950866699, "learning_rate": 3.534004866180049e-05, "loss": 1.3856, "step": 151200 }, { "epoch": 2.9421487603305785, "grad_norm": 4.472009658813477, "learning_rate": 3.533031630170317e-05, "loss": 1.4089, "step": 151300 }, { "epoch": 2.944093339815265, "grad_norm": 6.717526912689209, "learning_rate": 3.532058394160584e-05, "loss": 1.5086, "step": 151400 }, { "epoch": 2.9460379192999513, "grad_norm": 5.23137903213501, "learning_rate": 3.531094890510949e-05, "loss": 1.5534, "step": 151500 }, { "epoch": 2.9479824987846377, "grad_norm": 4.114367961883545, "learning_rate": 3.5301216545012166e-05, "loss": 1.5693, "step": 151600 }, { "epoch": 2.949927078269324, "grad_norm": 8.448725700378418, "learning_rate": 3.529148418491484e-05, "loss": 1.4834, "step": 151700 }, { "epoch": 2.951871657754011, "grad_norm": 4.006798267364502, "learning_rate": 3.528175182481752e-05, "loss": 1.4522, "step": 151800 }, { "epoch": 2.953816237238697, "grad_norm": 5.5401930809021, "learning_rate": 3.52720194647202e-05, "loss": 1.4255, "step": 151900 }, { "epoch": 2.9557608167233838, "grad_norm": 3.1997087001800537, "learning_rate": 3.526228710462287e-05, "loss": 1.4639, "step": 152000 }, { "epoch": 2.95770539620807, "grad_norm": 5.6073222160339355, "learning_rate": 3.525255474452555e-05, "loss": 1.4435, "step": 152100 }, { "epoch": 2.9596499756927566, "grad_norm": 3.8035967350006104, "learning_rate": 3.524282238442823e-05, "loss": 1.5237, "step": 152200 }, { "epoch": 2.961594555177443, "grad_norm": 3.051806926727295, "learning_rate": 3.52330900243309e-05, "loss": 1.5651, "step": 152300 }, { "epoch": 2.9635391346621294, "grad_norm": 10.054733276367188, "learning_rate": 3.522335766423358e-05, "loss": 1.3675, "step": 152400 }, { "epoch": 2.9654837141468158, "grad_norm": 2.688277244567871, "learning_rate": 3.521362530413625e-05, "loss": 1.469, "step": 152500 }, { "epoch": 2.967428293631502, "grad_norm": 2.040126085281372, "learning_rate": 3.520389294403893e-05, "loss": 1.5122, "step": 152600 }, { "epoch": 2.9693728731161886, "grad_norm": 3.8344414234161377, "learning_rate": 3.519416058394161e-05, "loss": 1.6178, "step": 152700 }, { "epoch": 2.971317452600875, "grad_norm": 3.7688181400299072, "learning_rate": 3.5184428223844284e-05, "loss": 1.579, "step": 152800 }, { "epoch": 2.9732620320855614, "grad_norm": 3.86669921875, "learning_rate": 3.517469586374696e-05, "loss": 1.3125, "step": 152900 }, { "epoch": 2.975206611570248, "grad_norm": 4.066821098327637, "learning_rate": 3.516496350364964e-05, "loss": 1.5732, "step": 153000 }, { "epoch": 2.9771511910549346, "grad_norm": 8.678412437438965, "learning_rate": 3.5155231143552315e-05, "loss": 1.4085, "step": 153100 }, { "epoch": 2.9790957705396206, "grad_norm": 5.11848258972168, "learning_rate": 3.514549878345499e-05, "loss": 1.7467, "step": 153200 }, { "epoch": 2.9810403500243075, "grad_norm": 6.370443820953369, "learning_rate": 3.513576642335766e-05, "loss": 1.3945, "step": 153300 }, { "epoch": 2.9829849295089934, "grad_norm": 6.093334674835205, "learning_rate": 3.5126034063260346e-05, "loss": 1.5956, "step": 153400 }, { "epoch": 2.9849295089936803, "grad_norm": 5.641905784606934, "learning_rate": 3.5116301703163016e-05, "loss": 1.4727, "step": 153500 }, { "epoch": 2.9868740884783667, "grad_norm": 5.287601470947266, "learning_rate": 3.5106666666666674e-05, "loss": 1.5838, "step": 153600 }, { "epoch": 2.988818667963053, "grad_norm": 2.474306583404541, "learning_rate": 3.5096934306569344e-05, "loss": 1.3566, "step": 153700 }, { "epoch": 2.9907632474477395, "grad_norm": 5.46156120300293, "learning_rate": 3.508720194647202e-05, "loss": 1.516, "step": 153800 }, { "epoch": 2.992707826932426, "grad_norm": 4.142871379852295, "learning_rate": 3.50774695863747e-05, "loss": 1.6162, "step": 153900 }, { "epoch": 2.9946524064171123, "grad_norm": 5.105610370635986, "learning_rate": 3.5067737226277375e-05, "loss": 1.49, "step": 154000 }, { "epoch": 2.9965969859017987, "grad_norm": 3.518402099609375, "learning_rate": 3.505800486618005e-05, "loss": 1.3356, "step": 154100 }, { "epoch": 2.998541565386485, "grad_norm": 3.8503239154815674, "learning_rate": 3.504827250608272e-05, "loss": 1.6901, "step": 154200 }, { "epoch": 3.0004861448711715, "grad_norm": 4.418140888214111, "learning_rate": 3.5038540145985406e-05, "loss": 1.3597, "step": 154300 }, { "epoch": 3.002430724355858, "grad_norm": 9.23311710357666, "learning_rate": 3.5028807785888076e-05, "loss": 1.5096, "step": 154400 }, { "epoch": 3.0043753038405443, "grad_norm": 12.454386711120605, "learning_rate": 3.5019075425790753e-05, "loss": 1.3478, "step": 154500 }, { "epoch": 3.0063198833252307, "grad_norm": 5.823021411895752, "learning_rate": 3.500934306569344e-05, "loss": 1.4113, "step": 154600 }, { "epoch": 3.0082644628099175, "grad_norm": 3.560788631439209, "learning_rate": 3.499961070559611e-05, "loss": 1.4758, "step": 154700 }, { "epoch": 3.010209042294604, "grad_norm": 3.3216919898986816, "learning_rate": 3.4989878345498785e-05, "loss": 1.3481, "step": 154800 }, { "epoch": 3.0121536217792904, "grad_norm": 2.8766303062438965, "learning_rate": 3.498014598540146e-05, "loss": 1.3807, "step": 154900 }, { "epoch": 3.0140982012639768, "grad_norm": 3.900698661804199, "learning_rate": 3.497041362530414e-05, "loss": 1.4557, "step": 155000 }, { "epoch": 3.0140982012639768, "eval_accuracy": 0.5560833333333334, "eval_f1": 0.542822377446443, "eval_loss": 1.1909997463226318, "eval_precision": 0.5568220480564966, "eval_recall": 0.5560833333333334, "eval_runtime": 11506.0655, "eval_samples_per_second": 15.644, "eval_steps_per_second": 0.489, "step": 155000 }, { "epoch": 3.016042780748663, "grad_norm": 4.378618240356445, "learning_rate": 3.4960681265206816e-05, "loss": 1.5551, "step": 155100 }, { "epoch": 3.0179873602333496, "grad_norm": 6.628927230834961, "learning_rate": 3.495094890510949e-05, "loss": 1.4091, "step": 155200 }, { "epoch": 3.019931939718036, "grad_norm": 5.191813945770264, "learning_rate": 3.494121654501217e-05, "loss": 1.4307, "step": 155300 }, { "epoch": 3.0218765192027224, "grad_norm": 8.69275188446045, "learning_rate": 3.493148418491484e-05, "loss": 1.547, "step": 155400 }, { "epoch": 3.023821098687409, "grad_norm": 5.704620361328125, "learning_rate": 3.4921751824817524e-05, "loss": 1.6806, "step": 155500 }, { "epoch": 3.025765678172095, "grad_norm": 9.204468727111816, "learning_rate": 3.491211678832117e-05, "loss": 1.4691, "step": 155600 }, { "epoch": 3.0277102576567816, "grad_norm": 6.161401271820068, "learning_rate": 3.4902384428223845e-05, "loss": 1.429, "step": 155700 }, { "epoch": 3.029654837141468, "grad_norm": 3.4619200229644775, "learning_rate": 3.489265206812652e-05, "loss": 1.465, "step": 155800 }, { "epoch": 3.0315994166261544, "grad_norm": 4.2969794273376465, "learning_rate": 3.48829197080292e-05, "loss": 1.4229, "step": 155900 }, { "epoch": 3.0335439961108412, "grad_norm": 1.9654381275177002, "learning_rate": 3.4873187347931876e-05, "loss": 1.4961, "step": 156000 }, { "epoch": 3.0354885755955276, "grad_norm": 3.964885711669922, "learning_rate": 3.486345498783455e-05, "loss": 1.4973, "step": 156100 }, { "epoch": 3.037433155080214, "grad_norm": 5.054129123687744, "learning_rate": 3.485372262773723e-05, "loss": 1.5377, "step": 156200 }, { "epoch": 3.0393777345649005, "grad_norm": 1.2099976539611816, "learning_rate": 3.48439902676399e-05, "loss": 1.4437, "step": 156300 }, { "epoch": 3.041322314049587, "grad_norm": 6.038549423217773, "learning_rate": 3.4834257907542584e-05, "loss": 1.4389, "step": 156400 }, { "epoch": 3.0432668935342733, "grad_norm": 4.338478088378906, "learning_rate": 3.482452554744526e-05, "loss": 1.7286, "step": 156500 }, { "epoch": 3.0452114730189597, "grad_norm": 2.845613718032837, "learning_rate": 3.481479318734793e-05, "loss": 1.3789, "step": 156600 }, { "epoch": 3.047156052503646, "grad_norm": 8.468988418579102, "learning_rate": 3.480506082725061e-05, "loss": 1.307, "step": 156700 }, { "epoch": 3.0491006319883325, "grad_norm": 5.9633049964904785, "learning_rate": 3.4795328467153285e-05, "loss": 1.5535, "step": 156800 }, { "epoch": 3.051045211473019, "grad_norm": 4.221526145935059, "learning_rate": 3.478559610705596e-05, "loss": 1.4783, "step": 156900 }, { "epoch": 3.0529897909577053, "grad_norm": 5.679263114929199, "learning_rate": 3.477586374695864e-05, "loss": 1.5655, "step": 157000 }, { "epoch": 3.0549343704423917, "grad_norm": 3.6308860778808594, "learning_rate": 3.4766131386861317e-05, "loss": 1.421, "step": 157100 }, { "epoch": 3.056878949927078, "grad_norm": 2.582277297973633, "learning_rate": 3.4756399026763994e-05, "loss": 1.5437, "step": 157200 }, { "epoch": 3.0588235294117645, "grad_norm": 7.978797912597656, "learning_rate": 3.4746666666666664e-05, "loss": 1.4867, "step": 157300 }, { "epoch": 3.0607681088964513, "grad_norm": 4.018340110778809, "learning_rate": 3.473693430656935e-05, "loss": 1.3225, "step": 157400 }, { "epoch": 3.0627126883811377, "grad_norm": 5.349169731140137, "learning_rate": 3.472720194647202e-05, "loss": 1.4587, "step": 157500 }, { "epoch": 3.064657267865824, "grad_norm": 5.826761245727539, "learning_rate": 3.471756690997567e-05, "loss": 1.4607, "step": 157600 }, { "epoch": 3.0666018473505106, "grad_norm": 2.31510066986084, "learning_rate": 3.4707834549878346e-05, "loss": 1.3496, "step": 157700 }, { "epoch": 3.068546426835197, "grad_norm": 2.92162823677063, "learning_rate": 3.469810218978102e-05, "loss": 1.4536, "step": 157800 }, { "epoch": 3.0704910063198834, "grad_norm": 7.338606834411621, "learning_rate": 3.46883698296837e-05, "loss": 1.5128, "step": 157900 }, { "epoch": 3.0724355858045698, "grad_norm": 3.310462236404419, "learning_rate": 3.467863746958638e-05, "loss": 1.4447, "step": 158000 }, { "epoch": 3.074380165289256, "grad_norm": 5.357006549835205, "learning_rate": 3.466900243309003e-05, "loss": 1.4909, "step": 158100 }, { "epoch": 3.0763247447739426, "grad_norm": 3.4434895515441895, "learning_rate": 3.46592700729927e-05, "loss": 1.4935, "step": 158200 }, { "epoch": 3.078269324258629, "grad_norm": 4.40606689453125, "learning_rate": 3.464953771289538e-05, "loss": 1.4705, "step": 158300 }, { "epoch": 3.0802139037433154, "grad_norm": 2.9572014808654785, "learning_rate": 3.463980535279805e-05, "loss": 1.4884, "step": 158400 }, { "epoch": 3.082158483228002, "grad_norm": 4.8454999923706055, "learning_rate": 3.463007299270073e-05, "loss": 1.6044, "step": 158500 }, { "epoch": 3.084103062712688, "grad_norm": 4.1979660987854, "learning_rate": 3.4620340632603406e-05, "loss": 1.5592, "step": 158600 }, { "epoch": 3.086047642197375, "grad_norm": 2.6623246669769287, "learning_rate": 3.461060827250608e-05, "loss": 1.4471, "step": 158700 }, { "epoch": 3.0879922216820614, "grad_norm": 5.711036205291748, "learning_rate": 3.460087591240876e-05, "loss": 1.4339, "step": 158800 }, { "epoch": 3.089936801166748, "grad_norm": 2.604954481124878, "learning_rate": 3.459114355231144e-05, "loss": 1.5779, "step": 158900 }, { "epoch": 3.0918813806514343, "grad_norm": 7.697136402130127, "learning_rate": 3.4581411192214114e-05, "loss": 1.5035, "step": 159000 }, { "epoch": 3.0938259601361207, "grad_norm": 3.4283740520477295, "learning_rate": 3.457167883211679e-05, "loss": 1.4234, "step": 159100 }, { "epoch": 3.095770539620807, "grad_norm": 2.1430537700653076, "learning_rate": 3.456194647201947e-05, "loss": 1.5259, "step": 159200 }, { "epoch": 3.0977151191054935, "grad_norm": 7.039016246795654, "learning_rate": 3.4552214111922145e-05, "loss": 1.4422, "step": 159300 }, { "epoch": 3.09965969859018, "grad_norm": 2.817624568939209, "learning_rate": 3.4542481751824816e-05, "loss": 1.4462, "step": 159400 }, { "epoch": 3.1016042780748663, "grad_norm": 1.441921591758728, "learning_rate": 3.45327493917275e-05, "loss": 1.4992, "step": 159500 }, { "epoch": 3.1035488575595527, "grad_norm": 4.220325469970703, "learning_rate": 3.452301703163017e-05, "loss": 1.4053, "step": 159600 }, { "epoch": 3.105493437044239, "grad_norm": 5.407520771026611, "learning_rate": 3.451328467153285e-05, "loss": 1.594, "step": 159700 }, { "epoch": 3.1074380165289255, "grad_norm": 6.420644760131836, "learning_rate": 3.450355231143553e-05, "loss": 1.5897, "step": 159800 }, { "epoch": 3.109382596013612, "grad_norm": 4.587944507598877, "learning_rate": 3.44938199513382e-05, "loss": 1.5929, "step": 159900 }, { "epoch": 3.1113271754982983, "grad_norm": 4.9559502601623535, "learning_rate": 3.448408759124088e-05, "loss": 1.6852, "step": 160000 }, { "epoch": 3.1113271754982983, "eval_accuracy": 0.5563666666666667, "eval_f1": 0.5496830905877259, "eval_loss": 1.1851845979690552, "eval_precision": 0.5596602923545012, "eval_recall": 0.5563666666666667, "eval_runtime": 11499.8449, "eval_samples_per_second": 15.652, "eval_steps_per_second": 0.489, "step": 160000 }, { "epoch": 3.113271754982985, "grad_norm": 14.44343376159668, "learning_rate": 3.447435523114355e-05, "loss": 1.544, "step": 160100 }, { "epoch": 3.1152163344676715, "grad_norm": 2.753309488296509, "learning_rate": 3.446462287104623e-05, "loss": 1.496, "step": 160200 }, { "epoch": 3.117160913952358, "grad_norm": 6.377251148223877, "learning_rate": 3.445489051094891e-05, "loss": 1.4492, "step": 160300 }, { "epoch": 3.1191054934370444, "grad_norm": 3.3870558738708496, "learning_rate": 3.444515815085158e-05, "loss": 1.4977, "step": 160400 }, { "epoch": 3.1210500729217308, "grad_norm": 8.413570404052734, "learning_rate": 3.443542579075426e-05, "loss": 1.5978, "step": 160500 }, { "epoch": 3.122994652406417, "grad_norm": 1.8220551013946533, "learning_rate": 3.442569343065693e-05, "loss": 1.5928, "step": 160600 }, { "epoch": 3.1249392318911036, "grad_norm": 4.113388538360596, "learning_rate": 3.441596107055961e-05, "loss": 1.5012, "step": 160700 }, { "epoch": 3.12688381137579, "grad_norm": 4.639334201812744, "learning_rate": 3.440622871046229e-05, "loss": 1.5625, "step": 160800 }, { "epoch": 3.1288283908604764, "grad_norm": 11.515008926391602, "learning_rate": 3.4396496350364964e-05, "loss": 1.4686, "step": 160900 }, { "epoch": 3.1307729703451628, "grad_norm": 2.0638904571533203, "learning_rate": 3.438676399026764e-05, "loss": 1.4161, "step": 161000 }, { "epoch": 3.132717549829849, "grad_norm": 5.804614067077637, "learning_rate": 3.437703163017032e-05, "loss": 1.4317, "step": 161100 }, { "epoch": 3.1346621293145356, "grad_norm": 3.682448625564575, "learning_rate": 3.4367299270072995e-05, "loss": 1.528, "step": 161200 }, { "epoch": 3.136606708799222, "grad_norm": 3.4133918285369873, "learning_rate": 3.435756690997567e-05, "loss": 1.4988, "step": 161300 }, { "epoch": 3.1385512882839084, "grad_norm": 5.198219299316406, "learning_rate": 3.434783454987835e-05, "loss": 1.4827, "step": 161400 }, { "epoch": 3.1404958677685952, "grad_norm": 2.2315680980682373, "learning_rate": 3.4338102189781026e-05, "loss": 1.4814, "step": 161500 }, { "epoch": 3.1424404472532816, "grad_norm": 7.005928039550781, "learning_rate": 3.4328369829683697e-05, "loss": 1.5579, "step": 161600 }, { "epoch": 3.144385026737968, "grad_norm": 2.3937325477600098, "learning_rate": 3.431863746958638e-05, "loss": 1.4017, "step": 161700 }, { "epoch": 3.1463296062226545, "grad_norm": 19.875730514526367, "learning_rate": 3.430890510948905e-05, "loss": 1.6664, "step": 161800 }, { "epoch": 3.148274185707341, "grad_norm": 3.6318655014038086, "learning_rate": 3.429917274939173e-05, "loss": 1.4259, "step": 161900 }, { "epoch": 3.1502187651920273, "grad_norm": 3.8171072006225586, "learning_rate": 3.428944038929441e-05, "loss": 1.4906, "step": 162000 }, { "epoch": 3.1521633446767137, "grad_norm": 8.396157264709473, "learning_rate": 3.4279805352798056e-05, "loss": 1.4535, "step": 162100 }, { "epoch": 3.1541079241614, "grad_norm": 5.0974578857421875, "learning_rate": 3.427007299270073e-05, "loss": 1.3289, "step": 162200 }, { "epoch": 3.1560525036460865, "grad_norm": 4.535772800445557, "learning_rate": 3.426034063260341e-05, "loss": 1.4448, "step": 162300 }, { "epoch": 3.157997083130773, "grad_norm": 4.318943500518799, "learning_rate": 3.425060827250609e-05, "loss": 1.4734, "step": 162400 }, { "epoch": 3.1599416626154593, "grad_norm": 4.175350666046143, "learning_rate": 3.424087591240876e-05, "loss": 1.4061, "step": 162500 }, { "epoch": 3.1618862421001457, "grad_norm": 4.043076038360596, "learning_rate": 3.423114355231144e-05, "loss": 1.4379, "step": 162600 }, { "epoch": 3.163830821584832, "grad_norm": 4.845165729522705, "learning_rate": 3.422141119221412e-05, "loss": 1.4215, "step": 162700 }, { "epoch": 3.165775401069519, "grad_norm": 6.767171382904053, "learning_rate": 3.421167883211679e-05, "loss": 1.4005, "step": 162800 }, { "epoch": 3.1677199805542053, "grad_norm": 10.41292667388916, "learning_rate": 3.4201946472019465e-05, "loss": 1.4402, "step": 162900 }, { "epoch": 3.1696645600388917, "grad_norm": 10.690726280212402, "learning_rate": 3.419221411192214e-05, "loss": 1.481, "step": 163000 }, { "epoch": 3.171609139523578, "grad_norm": 2.7384896278381348, "learning_rate": 3.418248175182482e-05, "loss": 1.3685, "step": 163100 }, { "epoch": 3.1735537190082646, "grad_norm": 4.212832450866699, "learning_rate": 3.4172749391727496e-05, "loss": 1.3641, "step": 163200 }, { "epoch": 3.175498298492951, "grad_norm": 4.693664073944092, "learning_rate": 3.416301703163017e-05, "loss": 1.4568, "step": 163300 }, { "epoch": 3.1774428779776374, "grad_norm": 12.574394226074219, "learning_rate": 3.415328467153285e-05, "loss": 1.4431, "step": 163400 }, { "epoch": 3.1793874574623238, "grad_norm": 6.062303066253662, "learning_rate": 3.414355231143552e-05, "loss": 1.6609, "step": 163500 }, { "epoch": 3.18133203694701, "grad_norm": 3.180542230606079, "learning_rate": 3.4133819951338204e-05, "loss": 1.3888, "step": 163600 }, { "epoch": 3.1832766164316966, "grad_norm": 3.9438633918762207, "learning_rate": 3.4124087591240875e-05, "loss": 1.3393, "step": 163700 }, { "epoch": 3.185221195916383, "grad_norm": 1.9487624168395996, "learning_rate": 3.411435523114355e-05, "loss": 1.502, "step": 163800 }, { "epoch": 3.1871657754010694, "grad_norm": 3.4765048027038574, "learning_rate": 3.4104622871046235e-05, "loss": 1.5404, "step": 163900 }, { "epoch": 3.189110354885756, "grad_norm": 2.445920705795288, "learning_rate": 3.4094890510948906e-05, "loss": 1.5198, "step": 164000 }, { "epoch": 3.191054934370442, "grad_norm": 2.5661659240722656, "learning_rate": 3.408515815085158e-05, "loss": 1.5561, "step": 164100 }, { "epoch": 3.192999513855129, "grad_norm": 3.865601062774658, "learning_rate": 3.407562043795621e-05, "loss": 1.7031, "step": 164200 }, { "epoch": 3.1949440933398154, "grad_norm": 4.287374973297119, "learning_rate": 3.4065888077858885e-05, "loss": 1.5207, "step": 164300 }, { "epoch": 3.196888672824502, "grad_norm": 4.0022807121276855, "learning_rate": 3.4056155717761555e-05, "loss": 1.5125, "step": 164400 }, { "epoch": 3.1988332523091882, "grad_norm": 7.201198101043701, "learning_rate": 3.404642335766424e-05, "loss": 1.4302, "step": 164500 }, { "epoch": 3.2007778317938747, "grad_norm": 2.977882146835327, "learning_rate": 3.403669099756691e-05, "loss": 1.5218, "step": 164600 }, { "epoch": 3.202722411278561, "grad_norm": 10.507155418395996, "learning_rate": 3.4026958637469586e-05, "loss": 1.5253, "step": 164700 }, { "epoch": 3.2046669907632475, "grad_norm": 16.870819091796875, "learning_rate": 3.401722627737226e-05, "loss": 1.4815, "step": 164800 }, { "epoch": 3.206611570247934, "grad_norm": 5.169261932373047, "learning_rate": 3.400749391727494e-05, "loss": 1.5295, "step": 164900 }, { "epoch": 3.2085561497326203, "grad_norm": 4.456422805786133, "learning_rate": 3.399776155717762e-05, "loss": 1.4623, "step": 165000 }, { "epoch": 3.2085561497326203, "eval_accuracy": 0.5597666666666666, "eval_f1": 0.5556707441373586, "eval_loss": 1.177709937095642, "eval_precision": 0.5622914215650292, "eval_recall": 0.5597666666666666, "eval_runtime": 11529.4461, "eval_samples_per_second": 15.612, "eval_steps_per_second": 0.488, "step": 165000 }, { "epoch": 3.2105007292173067, "grad_norm": 2.2461650371551514, "learning_rate": 3.398812652068127e-05, "loss": 1.4483, "step": 165100 }, { "epoch": 3.212445308701993, "grad_norm": 3.7702481746673584, "learning_rate": 3.3978394160583945e-05, "loss": 1.3639, "step": 165200 }, { "epoch": 3.2143898881866795, "grad_norm": 4.928601264953613, "learning_rate": 3.3968661800486615e-05, "loss": 1.4357, "step": 165300 }, { "epoch": 3.216334467671366, "grad_norm": 2.713656187057495, "learning_rate": 3.39589294403893e-05, "loss": 1.7239, "step": 165400 }, { "epoch": 3.2182790471560523, "grad_norm": 1.8045425415039062, "learning_rate": 3.394919708029197e-05, "loss": 1.3018, "step": 165500 }, { "epoch": 3.220223626640739, "grad_norm": 4.3961687088012695, "learning_rate": 3.3939464720194646e-05, "loss": 1.3347, "step": 165600 }, { "epoch": 3.2221682061254255, "grad_norm": 2.511216163635254, "learning_rate": 3.392973236009733e-05, "loss": 1.3846, "step": 165700 }, { "epoch": 3.224112785610112, "grad_norm": 5.548396587371826, "learning_rate": 3.392e-05, "loss": 1.4766, "step": 165800 }, { "epoch": 3.2260573650947983, "grad_norm": 3.566295862197876, "learning_rate": 3.391026763990268e-05, "loss": 1.4312, "step": 165900 }, { "epoch": 3.2280019445794847, "grad_norm": 5.330104827880859, "learning_rate": 3.3900535279805355e-05, "loss": 1.3534, "step": 166000 }, { "epoch": 3.229946524064171, "grad_norm": 3.618781805038452, "learning_rate": 3.389080291970803e-05, "loss": 1.6026, "step": 166100 }, { "epoch": 3.2318911035488576, "grad_norm": 2.9892220497131348, "learning_rate": 3.3881167883211676e-05, "loss": 1.5931, "step": 166200 }, { "epoch": 3.233835683033544, "grad_norm": 2.2262182235717773, "learning_rate": 3.387143552311436e-05, "loss": 1.504, "step": 166300 }, { "epoch": 3.2357802625182304, "grad_norm": 3.7211337089538574, "learning_rate": 3.386170316301704e-05, "loss": 1.621, "step": 166400 }, { "epoch": 3.2377248420029168, "grad_norm": 4.043158531188965, "learning_rate": 3.385197080291971e-05, "loss": 1.3483, "step": 166500 }, { "epoch": 3.239669421487603, "grad_norm": 7.339958667755127, "learning_rate": 3.384223844282239e-05, "loss": 1.4835, "step": 166600 }, { "epoch": 3.2416140009722896, "grad_norm": 4.4084272384643555, "learning_rate": 3.383250608272506e-05, "loss": 1.4705, "step": 166700 }, { "epoch": 3.243558580456976, "grad_norm": 4.446264266967773, "learning_rate": 3.382277372262774e-05, "loss": 1.4706, "step": 166800 }, { "epoch": 3.245503159941663, "grad_norm": 4.914437770843506, "learning_rate": 3.3813041362530415e-05, "loss": 1.3834, "step": 166900 }, { "epoch": 3.2474477394263492, "grad_norm": 4.875402450561523, "learning_rate": 3.380330900243309e-05, "loss": 1.4027, "step": 167000 }, { "epoch": 3.2493923189110356, "grad_norm": 5.864405155181885, "learning_rate": 3.379357664233577e-05, "loss": 1.3768, "step": 167100 }, { "epoch": 3.251336898395722, "grad_norm": 3.7369678020477295, "learning_rate": 3.3783844282238446e-05, "loss": 1.4313, "step": 167200 }, { "epoch": 3.2532814778804084, "grad_norm": 3.3835067749023438, "learning_rate": 3.377411192214112e-05, "loss": 1.3769, "step": 167300 }, { "epoch": 3.255226057365095, "grad_norm": 3.489903211593628, "learning_rate": 3.376437956204379e-05, "loss": 1.5366, "step": 167400 }, { "epoch": 3.2571706368497813, "grad_norm": 7.645496845245361, "learning_rate": 3.375464720194647e-05, "loss": 1.47, "step": 167500 }, { "epoch": 3.2591152163344677, "grad_norm": 3.238957405090332, "learning_rate": 3.3744914841849154e-05, "loss": 1.4682, "step": 167600 }, { "epoch": 3.261059795819154, "grad_norm": 2.6915946006774902, "learning_rate": 3.3735182481751824e-05, "loss": 1.5108, "step": 167700 }, { "epoch": 3.2630043753038405, "grad_norm": 10.59205436706543, "learning_rate": 3.37254501216545e-05, "loss": 1.4021, "step": 167800 }, { "epoch": 3.264948954788527, "grad_norm": 4.084170341491699, "learning_rate": 3.371571776155718e-05, "loss": 1.5375, "step": 167900 }, { "epoch": 3.2668935342732133, "grad_norm": 3.372372627258301, "learning_rate": 3.3705985401459855e-05, "loss": 1.3653, "step": 168000 }, { "epoch": 3.2688381137578997, "grad_norm": 1.8437151908874512, "learning_rate": 3.369625304136253e-05, "loss": 1.4295, "step": 168100 }, { "epoch": 3.2707826932425865, "grad_norm": 3.7493937015533447, "learning_rate": 3.368652068126521e-05, "loss": 1.2969, "step": 168200 }, { "epoch": 3.2727272727272725, "grad_norm": 3.074734687805176, "learning_rate": 3.3676788321167886e-05, "loss": 1.3726, "step": 168300 }, { "epoch": 3.2746718522119593, "grad_norm": 1.738521695137024, "learning_rate": 3.366705596107056e-05, "loss": 1.6283, "step": 168400 }, { "epoch": 3.2766164316966457, "grad_norm": 6.238025188446045, "learning_rate": 3.365732360097324e-05, "loss": 1.4229, "step": 168500 }, { "epoch": 3.278561011181332, "grad_norm": 4.599395751953125, "learning_rate": 3.364759124087592e-05, "loss": 1.4658, "step": 168600 }, { "epoch": 3.2805055906660185, "grad_norm": 8.876984596252441, "learning_rate": 3.363785888077859e-05, "loss": 1.4276, "step": 168700 }, { "epoch": 3.282450170150705, "grad_norm": 23.71131134033203, "learning_rate": 3.362812652068127e-05, "loss": 1.6286, "step": 168800 }, { "epoch": 3.2843947496353914, "grad_norm": 3.630026340484619, "learning_rate": 3.361839416058394e-05, "loss": 1.4337, "step": 168900 }, { "epoch": 3.2863393291200778, "grad_norm": 5.31183385848999, "learning_rate": 3.360866180048662e-05, "loss": 1.3887, "step": 169000 }, { "epoch": 3.288283908604764, "grad_norm": 5.391042709350586, "learning_rate": 3.3598929440389296e-05, "loss": 1.5046, "step": 169100 }, { "epoch": 3.2902284880894506, "grad_norm": 4.481645107269287, "learning_rate": 3.358919708029197e-05, "loss": 1.5236, "step": 169200 }, { "epoch": 3.292173067574137, "grad_norm": 6.014301300048828, "learning_rate": 3.357946472019465e-05, "loss": 1.6536, "step": 169300 }, { "epoch": 3.2941176470588234, "grad_norm": 4.956286430358887, "learning_rate": 3.35698296836983e-05, "loss": 1.3984, "step": 169400 }, { "epoch": 3.2960622265435098, "grad_norm": 7.386185169219971, "learning_rate": 3.356009732360098e-05, "loss": 1.4376, "step": 169500 }, { "epoch": 3.298006806028196, "grad_norm": 7.998392105102539, "learning_rate": 3.355036496350365e-05, "loss": 1.4107, "step": 169600 }, { "epoch": 3.299951385512883, "grad_norm": 6.161044597625732, "learning_rate": 3.354063260340633e-05, "loss": 1.4325, "step": 169700 }, { "epoch": 3.3018959649975694, "grad_norm": 7.226409912109375, "learning_rate": 3.3530900243309e-05, "loss": 1.3626, "step": 169800 }, { "epoch": 3.303840544482256, "grad_norm": 2.9761719703674316, "learning_rate": 3.352116788321168e-05, "loss": 1.4696, "step": 169900 }, { "epoch": 3.3057851239669422, "grad_norm": 1.6315118074417114, "learning_rate": 3.3511435523114356e-05, "loss": 1.4993, "step": 170000 }, { "epoch": 3.3057851239669422, "eval_accuracy": 0.5299166666666667, "eval_f1": 0.5146131282616163, "eval_loss": 1.234226942062378, "eval_precision": 0.536210469960953, "eval_recall": 0.5299166666666667, "eval_runtime": 11533.6919, "eval_samples_per_second": 15.606, "eval_steps_per_second": 0.488, "step": 170000 }, { "epoch": 3.3077297034516286, "grad_norm": 5.031646728515625, "learning_rate": 3.350170316301703e-05, "loss": 1.4047, "step": 170100 }, { "epoch": 3.309674282936315, "grad_norm": 2.814578056335449, "learning_rate": 3.349197080291971e-05, "loss": 1.427, "step": 170200 }, { "epoch": 3.3116188624210015, "grad_norm": 2.387068748474121, "learning_rate": 3.348223844282238e-05, "loss": 1.6387, "step": 170300 }, { "epoch": 3.313563441905688, "grad_norm": 11.947486877441406, "learning_rate": 3.3472506082725064e-05, "loss": 1.4381, "step": 170400 }, { "epoch": 3.3155080213903743, "grad_norm": 4.583066940307617, "learning_rate": 3.346277372262774e-05, "loss": 1.5588, "step": 170500 }, { "epoch": 3.3174526008750607, "grad_norm": 2.9906303882598877, "learning_rate": 3.345304136253041e-05, "loss": 1.4087, "step": 170600 }, { "epoch": 3.319397180359747, "grad_norm": 5.771797180175781, "learning_rate": 3.3443309002433095e-05, "loss": 1.4943, "step": 170700 }, { "epoch": 3.3213417598444335, "grad_norm": 3.7230634689331055, "learning_rate": 3.3433576642335766e-05, "loss": 1.4924, "step": 170800 }, { "epoch": 3.32328633932912, "grad_norm": 3.157071590423584, "learning_rate": 3.342384428223844e-05, "loss": 1.6053, "step": 170900 }, { "epoch": 3.3252309188138067, "grad_norm": 4.748063087463379, "learning_rate": 3.341411192214112e-05, "loss": 1.6367, "step": 171000 }, { "epoch": 3.327175498298493, "grad_norm": 3.7639522552490234, "learning_rate": 3.34043795620438e-05, "loss": 1.3801, "step": 171100 }, { "epoch": 3.3291200777831795, "grad_norm": 5.422913551330566, "learning_rate": 3.3394647201946474e-05, "loss": 1.4491, "step": 171200 }, { "epoch": 3.331064657267866, "grad_norm": 2.4810068607330322, "learning_rate": 3.338491484184915e-05, "loss": 1.3559, "step": 171300 }, { "epoch": 3.3330092367525523, "grad_norm": 2.89337420463562, "learning_rate": 3.337518248175183e-05, "loss": 1.4599, "step": 171400 }, { "epoch": 3.3349538162372387, "grad_norm": 4.574434280395508, "learning_rate": 3.33654501216545e-05, "loss": 1.3643, "step": 171500 }, { "epoch": 3.336898395721925, "grad_norm": 7.136233329772949, "learning_rate": 3.335571776155718e-05, "loss": 1.5063, "step": 171600 }, { "epoch": 3.3388429752066116, "grad_norm": 3.0614969730377197, "learning_rate": 3.334598540145986e-05, "loss": 1.6077, "step": 171700 }, { "epoch": 3.340787554691298, "grad_norm": 6.622594833374023, "learning_rate": 3.333625304136253e-05, "loss": 1.3718, "step": 171800 }, { "epoch": 3.3427321341759844, "grad_norm": 6.7776970863342285, "learning_rate": 3.332652068126521e-05, "loss": 1.4988, "step": 171900 }, { "epoch": 3.3446767136606708, "grad_norm": 5.847710609436035, "learning_rate": 3.331678832116788e-05, "loss": 1.3334, "step": 172000 }, { "epoch": 3.346621293145357, "grad_norm": 4.344485282897949, "learning_rate": 3.330705596107056e-05, "loss": 1.3505, "step": 172100 }, { "epoch": 3.3485658726300436, "grad_norm": 4.204843044281006, "learning_rate": 3.329732360097324e-05, "loss": 1.4989, "step": 172200 }, { "epoch": 3.3505104521147304, "grad_norm": 4.069840431213379, "learning_rate": 3.3287591240875914e-05, "loss": 1.3916, "step": 172300 }, { "epoch": 3.3524550315994164, "grad_norm": 3.776874542236328, "learning_rate": 3.327785888077859e-05, "loss": 1.5729, "step": 172400 }, { "epoch": 3.3543996110841032, "grad_norm": 1.9472843408584595, "learning_rate": 3.326812652068126e-05, "loss": 1.3645, "step": 172500 }, { "epoch": 3.3563441905687896, "grad_norm": 5.4129958152771, "learning_rate": 3.3258394160583945e-05, "loss": 1.4449, "step": 172600 }, { "epoch": 3.358288770053476, "grad_norm": 7.123589038848877, "learning_rate": 3.324866180048662e-05, "loss": 1.3588, "step": 172700 }, { "epoch": 3.3602333495381624, "grad_norm": 6.564899444580078, "learning_rate": 3.323892944038929e-05, "loss": 1.5603, "step": 172800 }, { "epoch": 3.362177929022849, "grad_norm": 3.124093532562256, "learning_rate": 3.3229197080291976e-05, "loss": 1.3811, "step": 172900 }, { "epoch": 3.3641225085075352, "grad_norm": 6.755166053771973, "learning_rate": 3.3219464720194647e-05, "loss": 1.4121, "step": 173000 }, { "epoch": 3.3660670879922217, "grad_norm": 3.786875009536743, "learning_rate": 3.3209732360097324e-05, "loss": 1.5575, "step": 173100 }, { "epoch": 3.368011667476908, "grad_norm": 6.653440475463867, "learning_rate": 3.32e-05, "loss": 1.4405, "step": 173200 }, { "epoch": 3.3699562469615945, "grad_norm": 13.707387924194336, "learning_rate": 3.319026763990268e-05, "loss": 1.5202, "step": 173300 }, { "epoch": 3.371900826446281, "grad_norm": 5.876728534698486, "learning_rate": 3.3180535279805355e-05, "loss": 1.4319, "step": 173400 }, { "epoch": 3.3738454059309673, "grad_norm": 5.308914661407471, "learning_rate": 3.3170900243309006e-05, "loss": 1.5143, "step": 173500 }, { "epoch": 3.3757899854156537, "grad_norm": 5.827585220336914, "learning_rate": 3.316116788321168e-05, "loss": 1.5164, "step": 173600 }, { "epoch": 3.37773456490034, "grad_norm": 5.315308094024658, "learning_rate": 3.315143552311435e-05, "loss": 1.3876, "step": 173700 }, { "epoch": 3.379679144385027, "grad_norm": 5.446086406707764, "learning_rate": 3.314170316301704e-05, "loss": 1.3181, "step": 173800 }, { "epoch": 3.3816237238697133, "grad_norm": 8.003002166748047, "learning_rate": 3.313197080291971e-05, "loss": 1.4502, "step": 173900 }, { "epoch": 3.3835683033543997, "grad_norm": 7.244203567504883, "learning_rate": 3.3122238442822384e-05, "loss": 1.4512, "step": 174000 }, { "epoch": 3.385512882839086, "grad_norm": 3.1236069202423096, "learning_rate": 3.311250608272506e-05, "loss": 1.5623, "step": 174100 }, { "epoch": 3.3874574623237725, "grad_norm": 5.394275188446045, "learning_rate": 3.310277372262774e-05, "loss": 1.3266, "step": 174200 }, { "epoch": 3.389402041808459, "grad_norm": 4.687415599822998, "learning_rate": 3.3093041362530415e-05, "loss": 1.4847, "step": 174300 }, { "epoch": 3.3913466212931453, "grad_norm": 4.1448845863342285, "learning_rate": 3.308330900243309e-05, "loss": 1.4738, "step": 174400 }, { "epoch": 3.3932912007778318, "grad_norm": 3.6888837814331055, "learning_rate": 3.307357664233577e-05, "loss": 1.4491, "step": 174500 }, { "epoch": 3.395235780262518, "grad_norm": 6.243124008178711, "learning_rate": 3.3063844282238446e-05, "loss": 1.57, "step": 174600 }, { "epoch": 3.3971803597472046, "grad_norm": 4.707263469696045, "learning_rate": 3.305411192214112e-05, "loss": 1.4584, "step": 174700 }, { "epoch": 3.399124939231891, "grad_norm": 2.5992958545684814, "learning_rate": 3.30443795620438e-05, "loss": 1.357, "step": 174800 }, { "epoch": 3.4010695187165774, "grad_norm": 5.3876776695251465, "learning_rate": 3.303464720194647e-05, "loss": 1.4959, "step": 174900 }, { "epoch": 3.4030140982012638, "grad_norm": 8.892577171325684, "learning_rate": 3.3024914841849154e-05, "loss": 1.466, "step": 175000 }, { "epoch": 3.4030140982012638, "eval_accuracy": 0.5499111111111111, "eval_f1": 0.5470765369550749, "eval_loss": 1.1877998113632202, "eval_precision": 0.5656013327750222, "eval_recall": 0.5499111111111111, "eval_runtime": 11529.2754, "eval_samples_per_second": 15.612, "eval_steps_per_second": 0.488, "step": 175000 }, { "epoch": 3.4049586776859506, "grad_norm": 7.597278118133545, "learning_rate": 3.3015182481751824e-05, "loss": 1.4357, "step": 175100 }, { "epoch": 3.406903257170637, "grad_norm": 4.423178195953369, "learning_rate": 3.30054501216545e-05, "loss": 1.5105, "step": 175200 }, { "epoch": 3.4088478366553234, "grad_norm": 2.857414960861206, "learning_rate": 3.299571776155718e-05, "loss": 1.4705, "step": 175300 }, { "epoch": 3.41079241614001, "grad_norm": 7.032068252563477, "learning_rate": 3.2985985401459856e-05, "loss": 1.4497, "step": 175400 }, { "epoch": 3.4127369956246962, "grad_norm": 3.0603277683258057, "learning_rate": 3.2976350364963507e-05, "loss": 1.3673, "step": 175500 }, { "epoch": 3.4146815751093826, "grad_norm": 3.9720852375030518, "learning_rate": 3.2966618004866184e-05, "loss": 1.5846, "step": 175600 }, { "epoch": 3.416626154594069, "grad_norm": 1.39033043384552, "learning_rate": 3.295688564476886e-05, "loss": 1.348, "step": 175700 }, { "epoch": 3.4185707340787554, "grad_norm": 3.129202365875244, "learning_rate": 3.294715328467153e-05, "loss": 1.5841, "step": 175800 }, { "epoch": 3.420515313563442, "grad_norm": 4.870450973510742, "learning_rate": 3.293742092457421e-05, "loss": 1.563, "step": 175900 }, { "epoch": 3.4224598930481283, "grad_norm": 2.476733922958374, "learning_rate": 3.292768856447689e-05, "loss": 1.2668, "step": 176000 }, { "epoch": 3.4244044725328147, "grad_norm": 2.042283535003662, "learning_rate": 3.291795620437956e-05, "loss": 1.6384, "step": 176100 }, { "epoch": 3.426349052017501, "grad_norm": 4.071489334106445, "learning_rate": 3.290832116788321e-05, "loss": 1.5092, "step": 176200 }, { "epoch": 3.4282936315021875, "grad_norm": 5.388479709625244, "learning_rate": 3.289858880778589e-05, "loss": 1.3886, "step": 176300 }, { "epoch": 3.4302382109868743, "grad_norm": 3.9132258892059326, "learning_rate": 3.288885644768857e-05, "loss": 1.3519, "step": 176400 }, { "epoch": 3.4321827904715607, "grad_norm": 7.3020148277282715, "learning_rate": 3.287912408759124e-05, "loss": 1.4874, "step": 176500 }, { "epoch": 3.434127369956247, "grad_norm": 5.011734485626221, "learning_rate": 3.286939172749392e-05, "loss": 1.5292, "step": 176600 }, { "epoch": 3.4360719494409335, "grad_norm": 4.154934406280518, "learning_rate": 3.285965936739659e-05, "loss": 1.409, "step": 176700 }, { "epoch": 3.43801652892562, "grad_norm": 4.644326210021973, "learning_rate": 3.284992700729927e-05, "loss": 1.6067, "step": 176800 }, { "epoch": 3.4399611084103063, "grad_norm": 6.442100524902344, "learning_rate": 3.284019464720195e-05, "loss": 1.6607, "step": 176900 }, { "epoch": 3.4419056878949927, "grad_norm": 9.617768287658691, "learning_rate": 3.283046228710462e-05, "loss": 1.5832, "step": 177000 }, { "epoch": 3.443850267379679, "grad_norm": 1.399074673652649, "learning_rate": 3.28207299270073e-05, "loss": 1.5807, "step": 177100 }, { "epoch": 3.4457948468643655, "grad_norm": 1.9660173654556274, "learning_rate": 3.2810997566909976e-05, "loss": 1.4164, "step": 177200 }, { "epoch": 3.447739426349052, "grad_norm": 4.822132587432861, "learning_rate": 3.2801265206812653e-05, "loss": 1.428, "step": 177300 }, { "epoch": 3.4496840058337384, "grad_norm": 6.631158828735352, "learning_rate": 3.279153284671533e-05, "loss": 1.665, "step": 177400 }, { "epoch": 3.4516285853184248, "grad_norm": 2.9860923290252686, "learning_rate": 3.278180048661801e-05, "loss": 1.4396, "step": 177500 }, { "epoch": 3.453573164803111, "grad_norm": 6.893071174621582, "learning_rate": 3.2772068126520685e-05, "loss": 1.3697, "step": 177600 }, { "epoch": 3.455517744287798, "grad_norm": 3.80025053024292, "learning_rate": 3.2762335766423355e-05, "loss": 1.4337, "step": 177700 }, { "epoch": 3.457462323772484, "grad_norm": 4.0970072746276855, "learning_rate": 3.275260340632604e-05, "loss": 1.4427, "step": 177800 }, { "epoch": 3.459406903257171, "grad_norm": 3.227292537689209, "learning_rate": 3.2742871046228716e-05, "loss": 1.6359, "step": 177900 }, { "epoch": 3.461351482741857, "grad_norm": 5.370667457580566, "learning_rate": 3.2733138686131386e-05, "loss": 1.3888, "step": 178000 }, { "epoch": 3.4632960622265436, "grad_norm": 2.51029634475708, "learning_rate": 3.272340632603407e-05, "loss": 1.361, "step": 178100 }, { "epoch": 3.46524064171123, "grad_norm": 7.730439186096191, "learning_rate": 3.271367396593674e-05, "loss": 1.4479, "step": 178200 }, { "epoch": 3.4671852211959164, "grad_norm": 19.0919246673584, "learning_rate": 3.270394160583942e-05, "loss": 1.4969, "step": 178300 }, { "epoch": 3.469129800680603, "grad_norm": 3.6248667240142822, "learning_rate": 3.2694209245742094e-05, "loss": 1.4414, "step": 178400 }, { "epoch": 3.4710743801652892, "grad_norm": 4.379124164581299, "learning_rate": 3.268447688564477e-05, "loss": 1.3791, "step": 178500 }, { "epoch": 3.4730189596499756, "grad_norm": 1.9421803951263428, "learning_rate": 3.267474452554745e-05, "loss": 1.4625, "step": 178600 }, { "epoch": 3.474963539134662, "grad_norm": 7.223816871643066, "learning_rate": 3.266501216545012e-05, "loss": 1.5157, "step": 178700 }, { "epoch": 3.4769081186193485, "grad_norm": 4.067922592163086, "learning_rate": 3.26552798053528e-05, "loss": 1.4229, "step": 178800 }, { "epoch": 3.478852698104035, "grad_norm": 6.362250804901123, "learning_rate": 3.264554744525547e-05, "loss": 1.4436, "step": 178900 }, { "epoch": 3.4807972775887213, "grad_norm": 4.224020481109619, "learning_rate": 3.263581508515815e-05, "loss": 1.3677, "step": 179000 }, { "epoch": 3.4827418570734077, "grad_norm": 4.895819664001465, "learning_rate": 3.262608272506083e-05, "loss": 1.3986, "step": 179100 }, { "epoch": 3.4846864365580945, "grad_norm": 4.071390151977539, "learning_rate": 3.26163503649635e-05, "loss": 1.5726, "step": 179200 }, { "epoch": 3.486631016042781, "grad_norm": 3.670074224472046, "learning_rate": 3.260661800486618e-05, "loss": 1.7426, "step": 179300 }, { "epoch": 3.4885755955274673, "grad_norm": 3.2992849349975586, "learning_rate": 3.259688564476886e-05, "loss": 1.398, "step": 179400 }, { "epoch": 3.4905201750121537, "grad_norm": 5.6500983238220215, "learning_rate": 3.2587153284671534e-05, "loss": 1.4606, "step": 179500 }, { "epoch": 3.49246475449684, "grad_norm": 3.597337245941162, "learning_rate": 3.257742092457421e-05, "loss": 1.3629, "step": 179600 }, { "epoch": 3.4944093339815265, "grad_norm": 6.384410858154297, "learning_rate": 3.256768856447689e-05, "loss": 1.5447, "step": 179700 }, { "epoch": 3.496353913466213, "grad_norm": 6.201845645904541, "learning_rate": 3.2557956204379565e-05, "loss": 1.5245, "step": 179800 }, { "epoch": 3.4982984929508993, "grad_norm": 7.665905475616455, "learning_rate": 3.2548223844282236e-05, "loss": 1.3696, "step": 179900 }, { "epoch": 3.5002430724355857, "grad_norm": 3.762413740158081, "learning_rate": 3.253849148418492e-05, "loss": 1.409, "step": 180000 }, { "epoch": 3.5002430724355857, "eval_accuracy": 0.5332833333333333, "eval_f1": 0.5278272372453312, "eval_loss": 1.2196710109710693, "eval_precision": 0.5568285582755171, "eval_recall": 0.5332833333333333, "eval_runtime": 11546.2226, "eval_samples_per_second": 15.59, "eval_steps_per_second": 0.487, "step": 180000 }, { "epoch": 3.502187651920272, "grad_norm": 8.001137733459473, "learning_rate": 3.2528759124087596e-05, "loss": 1.444, "step": 180100 }, { "epoch": 3.5041322314049586, "grad_norm": 2.7410006523132324, "learning_rate": 3.251912408759124e-05, "loss": 1.5404, "step": 180200 }, { "epoch": 3.506076810889645, "grad_norm": 2.8732011318206787, "learning_rate": 3.250939172749392e-05, "loss": 1.4512, "step": 180300 }, { "epoch": 3.5080213903743314, "grad_norm": 4.1830644607543945, "learning_rate": 3.2499659367396595e-05, "loss": 1.4315, "step": 180400 }, { "epoch": 3.509965969859018, "grad_norm": 3.508216142654419, "learning_rate": 3.248992700729927e-05, "loss": 1.5116, "step": 180500 }, { "epoch": 3.511910549343704, "grad_norm": 3.463304042816162, "learning_rate": 3.248019464720195e-05, "loss": 1.3003, "step": 180600 }, { "epoch": 3.513855128828391, "grad_norm": 4.018204212188721, "learning_rate": 3.2470462287104626e-05, "loss": 1.7626, "step": 180700 }, { "epoch": 3.5157997083130774, "grad_norm": 5.554513931274414, "learning_rate": 3.24607299270073e-05, "loss": 1.5042, "step": 180800 }, { "epoch": 3.517744287797764, "grad_norm": 2.4130241870880127, "learning_rate": 3.245099756690998e-05, "loss": 1.5344, "step": 180900 }, { "epoch": 3.5196888672824502, "grad_norm": 4.416082859039307, "learning_rate": 3.244126520681266e-05, "loss": 1.5507, "step": 181000 }, { "epoch": 3.5216334467671366, "grad_norm": 3.468419075012207, "learning_rate": 3.243153284671533e-05, "loss": 1.5483, "step": 181100 }, { "epoch": 3.523578026251823, "grad_norm": 3.727236747741699, "learning_rate": 3.242180048661801e-05, "loss": 1.5596, "step": 181200 }, { "epoch": 3.5255226057365094, "grad_norm": 4.559162616729736, "learning_rate": 3.241206812652068e-05, "loss": 1.4931, "step": 181300 }, { "epoch": 3.527467185221196, "grad_norm": 6.363912105560303, "learning_rate": 3.240233576642336e-05, "loss": 1.5076, "step": 181400 }, { "epoch": 3.5294117647058822, "grad_norm": 5.634368896484375, "learning_rate": 3.2392603406326035e-05, "loss": 1.3634, "step": 181500 }, { "epoch": 3.5313563441905687, "grad_norm": 3.4705138206481934, "learning_rate": 3.238287104622871e-05, "loss": 1.4776, "step": 181600 }, { "epoch": 3.533300923675255, "grad_norm": 2.808687448501587, "learning_rate": 3.237323600973236e-05, "loss": 1.6395, "step": 181700 }, { "epoch": 3.535245503159942, "grad_norm": 5.326289653778076, "learning_rate": 3.236350364963504e-05, "loss": 1.5441, "step": 181800 }, { "epoch": 3.537190082644628, "grad_norm": 1.5275052785873413, "learning_rate": 3.235377128953772e-05, "loss": 1.6237, "step": 181900 }, { "epoch": 3.5391346621293147, "grad_norm": 9.659943580627441, "learning_rate": 3.234403892944039e-05, "loss": 1.4422, "step": 182000 }, { "epoch": 3.541079241614001, "grad_norm": 2.820641040802002, "learning_rate": 3.2334306569343065e-05, "loss": 1.5525, "step": 182100 }, { "epoch": 3.5430238210986875, "grad_norm": 6.381496429443359, "learning_rate": 3.232457420924574e-05, "loss": 1.4877, "step": 182200 }, { "epoch": 3.544968400583374, "grad_norm": 6.815134048461914, "learning_rate": 3.231484184914842e-05, "loss": 1.4446, "step": 182300 }, { "epoch": 3.5469129800680603, "grad_norm": 3.74178409576416, "learning_rate": 3.2305109489051096e-05, "loss": 1.4013, "step": 182400 }, { "epoch": 3.5488575595527467, "grad_norm": 2.3761579990386963, "learning_rate": 3.229537712895377e-05, "loss": 1.5308, "step": 182500 }, { "epoch": 3.550802139037433, "grad_norm": 3.5356297492980957, "learning_rate": 3.228564476885645e-05, "loss": 1.5281, "step": 182600 }, { "epoch": 3.5527467185221195, "grad_norm": 3.717634677886963, "learning_rate": 3.227591240875913e-05, "loss": 1.3176, "step": 182700 }, { "epoch": 3.554691298006806, "grad_norm": 3.1107587814331055, "learning_rate": 3.2266180048661804e-05, "loss": 1.6914, "step": 182800 }, { "epoch": 3.5566358774914923, "grad_norm": 3.048600435256958, "learning_rate": 3.225644768856448e-05, "loss": 1.577, "step": 182900 }, { "epoch": 3.5585804569761788, "grad_norm": 5.607958793640137, "learning_rate": 3.224671532846715e-05, "loss": 1.5514, "step": 183000 }, { "epoch": 3.5605250364608656, "grad_norm": 9.790914535522461, "learning_rate": 3.2236982968369835e-05, "loss": 1.4382, "step": 183100 }, { "epoch": 3.5624696159455516, "grad_norm": 1.6446797847747803, "learning_rate": 3.2227250608272505e-05, "loss": 1.6568, "step": 183200 }, { "epoch": 3.5644141954302384, "grad_norm": 5.964009761810303, "learning_rate": 3.221751824817518e-05, "loss": 1.519, "step": 183300 }, { "epoch": 3.566358774914925, "grad_norm": 1.4075846672058105, "learning_rate": 3.2207785888077866e-05, "loss": 1.4848, "step": 183400 }, { "epoch": 3.568303354399611, "grad_norm": 3.475112199783325, "learning_rate": 3.2198053527980536e-05, "loss": 1.4447, "step": 183500 }, { "epoch": 3.5702479338842976, "grad_norm": 2.216562032699585, "learning_rate": 3.218832116788321e-05, "loss": 1.4859, "step": 183600 }, { "epoch": 3.572192513368984, "grad_norm": 3.803002119064331, "learning_rate": 3.217858880778589e-05, "loss": 1.5867, "step": 183700 }, { "epoch": 3.5741370928536704, "grad_norm": 7.395280361175537, "learning_rate": 3.216885644768857e-05, "loss": 1.5737, "step": 183800 }, { "epoch": 3.576081672338357, "grad_norm": 2.6441633701324463, "learning_rate": 3.2159124087591244e-05, "loss": 1.7086, "step": 183900 }, { "epoch": 3.5780262518230432, "grad_norm": 4.6574177742004395, "learning_rate": 3.214939172749392e-05, "loss": 1.488, "step": 184000 }, { "epoch": 3.5799708313077296, "grad_norm": 4.747029781341553, "learning_rate": 3.21396593673966e-05, "loss": 1.5581, "step": 184100 }, { "epoch": 3.581915410792416, "grad_norm": 6.82007360458374, "learning_rate": 3.212992700729927e-05, "loss": 1.5391, "step": 184200 }, { "epoch": 3.5838599902771024, "grad_norm": 5.771962642669678, "learning_rate": 3.2120194647201945e-05, "loss": 1.4447, "step": 184300 }, { "epoch": 3.585804569761789, "grad_norm": 4.7183380126953125, "learning_rate": 3.211046228710462e-05, "loss": 1.4695, "step": 184400 }, { "epoch": 3.5877491492464753, "grad_norm": 3.577984094619751, "learning_rate": 3.21007299270073e-05, "loss": 1.5479, "step": 184500 }, { "epoch": 3.589693728731162, "grad_norm": 6.594232559204102, "learning_rate": 3.2090997566909977e-05, "loss": 1.4991, "step": 184600 }, { "epoch": 3.591638308215848, "grad_norm": 3.3493947982788086, "learning_rate": 3.2081265206812654e-05, "loss": 1.4374, "step": 184700 }, { "epoch": 3.593582887700535, "grad_norm": 5.002516746520996, "learning_rate": 3.207153284671533e-05, "loss": 1.5183, "step": 184800 }, { "epoch": 3.5955274671852213, "grad_norm": 2.8351712226867676, "learning_rate": 3.206180048661801e-05, "loss": 1.4143, "step": 184900 }, { "epoch": 3.5974720466699077, "grad_norm": 8.523571968078613, "learning_rate": 3.2052068126520685e-05, "loss": 1.4949, "step": 185000 }, { "epoch": 3.5974720466699077, "eval_accuracy": 0.56175, "eval_f1": 0.5407802344218462, "eval_loss": 1.1775490045547485, "eval_precision": 0.5610746401169078, "eval_recall": 0.5617500000000001, "eval_runtime": 11534.0618, "eval_samples_per_second": 15.606, "eval_steps_per_second": 0.488, "step": 185000 }, { "epoch": 3.599416626154594, "grad_norm": 5.557082653045654, "learning_rate": 3.204233576642336e-05, "loss": 1.4733, "step": 185100 }, { "epoch": 3.6013612056392805, "grad_norm": 8.791021347045898, "learning_rate": 3.203260340632603e-05, "loss": 1.3566, "step": 185200 }, { "epoch": 3.603305785123967, "grad_norm": 2.2691681385040283, "learning_rate": 3.2022871046228716e-05, "loss": 1.7001, "step": 185300 }, { "epoch": 3.6052503646086533, "grad_norm": 6.041215896606445, "learning_rate": 3.2013138686131386e-05, "loss": 1.3873, "step": 185400 }, { "epoch": 3.6071949440933397, "grad_norm": 5.008601188659668, "learning_rate": 3.200340632603406e-05, "loss": 1.435, "step": 185500 }, { "epoch": 3.609139523578026, "grad_norm": 3.423417091369629, "learning_rate": 3.199367396593675e-05, "loss": 1.5453, "step": 185600 }, { "epoch": 3.6110841030627125, "grad_norm": 4.537726402282715, "learning_rate": 3.198394160583942e-05, "loss": 1.3272, "step": 185700 }, { "epoch": 3.613028682547399, "grad_norm": 3.5328736305236816, "learning_rate": 3.197430656934307e-05, "loss": 1.4138, "step": 185800 }, { "epoch": 3.614973262032086, "grad_norm": 4.090452194213867, "learning_rate": 3.1964574209245745e-05, "loss": 1.4993, "step": 185900 }, { "epoch": 3.6169178415167718, "grad_norm": 10.378958702087402, "learning_rate": 3.195484184914842e-05, "loss": 1.7672, "step": 186000 }, { "epoch": 3.6188624210014586, "grad_norm": 2.8812198638916016, "learning_rate": 3.194510948905109e-05, "loss": 1.4107, "step": 186100 }, { "epoch": 3.620807000486145, "grad_norm": 2.0235583782196045, "learning_rate": 3.1935377128953776e-05, "loss": 1.43, "step": 186200 }, { "epoch": 3.6227515799708314, "grad_norm": 2.4392011165618896, "learning_rate": 3.1925644768856446e-05, "loss": 1.4851, "step": 186300 }, { "epoch": 3.624696159455518, "grad_norm": 10.01359748840332, "learning_rate": 3.1915912408759123e-05, "loss": 1.5172, "step": 186400 }, { "epoch": 3.6266407389402042, "grad_norm": 3.26108717918396, "learning_rate": 3.190618004866181e-05, "loss": 1.4651, "step": 186500 }, { "epoch": 3.6285853184248906, "grad_norm": 8.13802433013916, "learning_rate": 3.189644768856448e-05, "loss": 1.3579, "step": 186600 }, { "epoch": 3.630529897909577, "grad_norm": 5.09847354888916, "learning_rate": 3.1886715328467154e-05, "loss": 1.438, "step": 186700 }, { "epoch": 3.6324744773942634, "grad_norm": 6.433923721313477, "learning_rate": 3.187698296836983e-05, "loss": 1.4695, "step": 186800 }, { "epoch": 3.63441905687895, "grad_norm": 4.507726192474365, "learning_rate": 3.186725060827251e-05, "loss": 1.4538, "step": 186900 }, { "epoch": 3.6363636363636362, "grad_norm": 7.314497947692871, "learning_rate": 3.1857518248175186e-05, "loss": 1.5372, "step": 187000 }, { "epoch": 3.6383082158483226, "grad_norm": 2.789271354675293, "learning_rate": 3.1847785888077856e-05, "loss": 1.5282, "step": 187100 }, { "epoch": 3.6402527953330095, "grad_norm": 5.858973503112793, "learning_rate": 3.183805352798054e-05, "loss": 1.5359, "step": 187200 }, { "epoch": 3.6421973748176955, "grad_norm": 4.487477779388428, "learning_rate": 3.182832116788321e-05, "loss": 1.4569, "step": 187300 }, { "epoch": 3.6441419543023823, "grad_norm": 4.591485977172852, "learning_rate": 3.181858880778589e-05, "loss": 1.4444, "step": 187400 }, { "epoch": 3.6460865337870687, "grad_norm": 5.598405361175537, "learning_rate": 3.180885644768857e-05, "loss": 1.3974, "step": 187500 }, { "epoch": 3.648031113271755, "grad_norm": 2.89035964012146, "learning_rate": 3.179912408759124e-05, "loss": 1.4345, "step": 187600 }, { "epoch": 3.6499756927564415, "grad_norm": 2.1467511653900146, "learning_rate": 3.178939172749392e-05, "loss": 1.5351, "step": 187700 }, { "epoch": 3.651920272241128, "grad_norm": 5.807663440704346, "learning_rate": 3.177975669099757e-05, "loss": 1.3674, "step": 187800 }, { "epoch": 3.6538648517258143, "grad_norm": 3.251499652862549, "learning_rate": 3.1770024330900246e-05, "loss": 1.6247, "step": 187900 }, { "epoch": 3.6558094312105007, "grad_norm": 3.999516487121582, "learning_rate": 3.1760291970802916e-05, "loss": 1.4696, "step": 188000 }, { "epoch": 3.657754010695187, "grad_norm": 4.3747453689575195, "learning_rate": 3.17505596107056e-05, "loss": 1.654, "step": 188100 }, { "epoch": 3.6596985901798735, "grad_norm": 6.452905178070068, "learning_rate": 3.174082725060828e-05, "loss": 1.4055, "step": 188200 }, { "epoch": 3.66164316966456, "grad_norm": 2.412477970123291, "learning_rate": 3.173109489051095e-05, "loss": 1.4265, "step": 188300 }, { "epoch": 3.6635877491492463, "grad_norm": 5.277268409729004, "learning_rate": 3.172136253041363e-05, "loss": 1.4345, "step": 188400 }, { "epoch": 3.6655323286339327, "grad_norm": 1.838045597076416, "learning_rate": 3.17116301703163e-05, "loss": 1.3894, "step": 188500 }, { "epoch": 3.667476908118619, "grad_norm": 5.047424793243408, "learning_rate": 3.170189781021898e-05, "loss": 1.487, "step": 188600 }, { "epoch": 3.669421487603306, "grad_norm": 4.126437664031982, "learning_rate": 3.1692165450121655e-05, "loss": 1.4294, "step": 188700 }, { "epoch": 3.671366067087992, "grad_norm": 2.602114677429199, "learning_rate": 3.168243309002433e-05, "loss": 1.4147, "step": 188800 }, { "epoch": 3.673310646572679, "grad_norm": 4.961286544799805, "learning_rate": 3.167270072992701e-05, "loss": 1.3245, "step": 188900 }, { "epoch": 3.675255226057365, "grad_norm": 4.842992305755615, "learning_rate": 3.1662968369829686e-05, "loss": 1.4459, "step": 189000 }, { "epoch": 3.6771998055420516, "grad_norm": 1.4976671934127808, "learning_rate": 3.1653236009732363e-05, "loss": 1.4006, "step": 189100 }, { "epoch": 3.679144385026738, "grad_norm": 4.154075622558594, "learning_rate": 3.1643503649635034e-05, "loss": 1.4156, "step": 189200 }, { "epoch": 3.6810889645114244, "grad_norm": 4.200620651245117, "learning_rate": 3.163377128953772e-05, "loss": 1.5697, "step": 189300 }, { "epoch": 3.683033543996111, "grad_norm": 3.3522205352783203, "learning_rate": 3.1624038929440394e-05, "loss": 1.2757, "step": 189400 }, { "epoch": 3.6849781234807972, "grad_norm": 3.2776999473571777, "learning_rate": 3.1614306569343065e-05, "loss": 1.4184, "step": 189500 }, { "epoch": 3.6869227029654836, "grad_norm": 4.839968681335449, "learning_rate": 3.1604671532846716e-05, "loss": 1.425, "step": 189600 }, { "epoch": 3.68886728245017, "grad_norm": 7.682199001312256, "learning_rate": 3.159493917274939e-05, "loss": 1.7053, "step": 189700 }, { "epoch": 3.6908118619348564, "grad_norm": 12.51144027709961, "learning_rate": 3.158520681265207e-05, "loss": 1.5761, "step": 189800 }, { "epoch": 3.692756441419543, "grad_norm": 3.0463016033172607, "learning_rate": 3.157547445255475e-05, "loss": 1.4913, "step": 189900 }, { "epoch": 3.6947010209042297, "grad_norm": 7.6778740882873535, "learning_rate": 3.1565742092457424e-05, "loss": 1.4764, "step": 190000 }, { "epoch": 3.6947010209042297, "eval_accuracy": 0.5430388888888888, "eval_f1": 0.5266119052108879, "eval_loss": 1.2086833715438843, "eval_precision": 0.545535996719747, "eval_recall": 0.543038888888889, "eval_runtime": 11540.6593, "eval_samples_per_second": 15.597, "eval_steps_per_second": 0.487, "step": 190000 }, { "epoch": 3.6966456003889157, "grad_norm": 2.110671281814575, "learning_rate": 3.15560097323601e-05, "loss": 1.4803, "step": 190100 }, { "epoch": 3.6985901798736025, "grad_norm": 2.250070333480835, "learning_rate": 3.154627737226277e-05, "loss": 1.3916, "step": 190200 }, { "epoch": 3.700534759358289, "grad_norm": 3.487828254699707, "learning_rate": 3.1536545012165455e-05, "loss": 1.3857, "step": 190300 }, { "epoch": 3.7024793388429753, "grad_norm": 4.064331531524658, "learning_rate": 3.1526812652068125e-05, "loss": 1.3991, "step": 190400 }, { "epoch": 3.7044239183276617, "grad_norm": 3.300365447998047, "learning_rate": 3.15170802919708e-05, "loss": 1.3583, "step": 190500 }, { "epoch": 3.706368497812348, "grad_norm": 6.284401893615723, "learning_rate": 3.150734793187348e-05, "loss": 1.5071, "step": 190600 }, { "epoch": 3.7083130772970345, "grad_norm": 6.3874640464782715, "learning_rate": 3.1497615571776156e-05, "loss": 1.4261, "step": 190700 }, { "epoch": 3.710257656781721, "grad_norm": 1.9734315872192383, "learning_rate": 3.148788321167883e-05, "loss": 1.5707, "step": 190800 }, { "epoch": 3.7122022362664073, "grad_norm": 4.992983818054199, "learning_rate": 3.147815085158151e-05, "loss": 1.4766, "step": 190900 }, { "epoch": 3.7141468157510937, "grad_norm": 2.608898878097534, "learning_rate": 3.146841849148419e-05, "loss": 1.3864, "step": 191000 }, { "epoch": 3.71609139523578, "grad_norm": 4.5902838706970215, "learning_rate": 3.145868613138686e-05, "loss": 1.4177, "step": 191100 }, { "epoch": 3.7180359747204665, "grad_norm": 4.087220191955566, "learning_rate": 3.144895377128954e-05, "loss": 1.4718, "step": 191200 }, { "epoch": 3.7199805542051534, "grad_norm": 2.799799680709839, "learning_rate": 3.143922141119222e-05, "loss": 1.4577, "step": 191300 }, { "epoch": 3.7219251336898393, "grad_norm": 4.098569393157959, "learning_rate": 3.142948905109489e-05, "loss": 1.4405, "step": 191400 }, { "epoch": 3.723869713174526, "grad_norm": 5.171971797943115, "learning_rate": 3.141975669099757e-05, "loss": 1.378, "step": 191500 }, { "epoch": 3.7258142926592126, "grad_norm": 7.699488162994385, "learning_rate": 3.141002433090024e-05, "loss": 1.5024, "step": 191600 }, { "epoch": 3.727758872143899, "grad_norm": 9.79340934753418, "learning_rate": 3.140029197080292e-05, "loss": 1.4752, "step": 191700 }, { "epoch": 3.7297034516285854, "grad_norm": 6.984522342681885, "learning_rate": 3.13905596107056e-05, "loss": 1.5489, "step": 191800 }, { "epoch": 3.731648031113272, "grad_norm": 6.305161476135254, "learning_rate": 3.1380827250608274e-05, "loss": 1.4875, "step": 191900 }, { "epoch": 3.733592610597958, "grad_norm": 4.417160511016846, "learning_rate": 3.137109489051095e-05, "loss": 1.4271, "step": 192000 }, { "epoch": 3.7355371900826446, "grad_norm": 6.435615539550781, "learning_rate": 3.136136253041363e-05, "loss": 1.4433, "step": 192100 }, { "epoch": 3.737481769567331, "grad_norm": 5.378427505493164, "learning_rate": 3.1351630170316305e-05, "loss": 1.4812, "step": 192200 }, { "epoch": 3.7394263490520174, "grad_norm": 10.004172325134277, "learning_rate": 3.134189781021898e-05, "loss": 1.4531, "step": 192300 }, { "epoch": 3.741370928536704, "grad_norm": 7.18505859375, "learning_rate": 3.133216545012165e-05, "loss": 1.5939, "step": 192400 }, { "epoch": 3.7433155080213902, "grad_norm": 4.942299842834473, "learning_rate": 3.1322433090024336e-05, "loss": 1.6717, "step": 192500 }, { "epoch": 3.745260087506077, "grad_norm": 4.3931474685668945, "learning_rate": 3.1312700729927006e-05, "loss": 1.4883, "step": 192600 }, { "epoch": 3.747204666990763, "grad_norm": 3.2649576663970947, "learning_rate": 3.130296836982968e-05, "loss": 1.586, "step": 192700 }, { "epoch": 3.74914924647545, "grad_norm": 2.251145601272583, "learning_rate": 3.129323600973236e-05, "loss": 1.5174, "step": 192800 }, { "epoch": 3.751093825960136, "grad_norm": 6.209991455078125, "learning_rate": 3.128350364963504e-05, "loss": 1.4687, "step": 192900 }, { "epoch": 3.7530384054448227, "grad_norm": 4.989964485168457, "learning_rate": 3.1273771289537714e-05, "loss": 1.4298, "step": 193000 }, { "epoch": 3.754982984929509, "grad_norm": 6.581905841827393, "learning_rate": 3.126403892944039e-05, "loss": 1.7319, "step": 193100 }, { "epoch": 3.7569275644141955, "grad_norm": 3.1024155616760254, "learning_rate": 3.125430656934307e-05, "loss": 1.6635, "step": 193200 }, { "epoch": 3.758872143898882, "grad_norm": 8.236734390258789, "learning_rate": 3.124457420924574e-05, "loss": 1.5846, "step": 193300 }, { "epoch": 3.7608167233835683, "grad_norm": 2.6237785816192627, "learning_rate": 3.123484184914842e-05, "loss": 1.6321, "step": 193400 }, { "epoch": 3.7627613028682547, "grad_norm": 3.1225457191467285, "learning_rate": 3.12251094890511e-05, "loss": 1.4847, "step": 193500 }, { "epoch": 3.764705882352941, "grad_norm": 2.538477897644043, "learning_rate": 3.1215474452554744e-05, "loss": 1.4419, "step": 193600 }, { "epoch": 3.7666504618376275, "grad_norm": 3.771636962890625, "learning_rate": 3.120574209245743e-05, "loss": 1.6847, "step": 193700 }, { "epoch": 3.768595041322314, "grad_norm": 2.2320950031280518, "learning_rate": 3.11960097323601e-05, "loss": 1.4613, "step": 193800 }, { "epoch": 3.7705396208070003, "grad_norm": 3.8371126651763916, "learning_rate": 3.1186277372262775e-05, "loss": 1.4088, "step": 193900 }, { "epoch": 3.7724842002916867, "grad_norm": 4.3000335693359375, "learning_rate": 3.117654501216545e-05, "loss": 1.5245, "step": 194000 }, { "epoch": 3.7744287797763736, "grad_norm": 8.613393783569336, "learning_rate": 3.116681265206813e-05, "loss": 1.4221, "step": 194100 }, { "epoch": 3.7763733592610595, "grad_norm": 1.7842811346054077, "learning_rate": 3.1157080291970806e-05, "loss": 1.4087, "step": 194200 }, { "epoch": 3.7783179387457464, "grad_norm": 3.9233717918395996, "learning_rate": 3.114734793187348e-05, "loss": 1.525, "step": 194300 }, { "epoch": 3.780262518230433, "grad_norm": 5.06337308883667, "learning_rate": 3.113761557177616e-05, "loss": 1.6683, "step": 194400 }, { "epoch": 3.782207097715119, "grad_norm": 4.740768909454346, "learning_rate": 3.112788321167883e-05, "loss": 1.5812, "step": 194500 }, { "epoch": 3.7841516771998056, "grad_norm": 12.130316734313965, "learning_rate": 3.1118150851581514e-05, "loss": 1.3905, "step": 194600 }, { "epoch": 3.786096256684492, "grad_norm": 12.11811637878418, "learning_rate": 3.1108418491484184e-05, "loss": 1.3322, "step": 194700 }, { "epoch": 3.7880408361691784, "grad_norm": 2.428022861480713, "learning_rate": 3.109868613138686e-05, "loss": 1.6202, "step": 194800 }, { "epoch": 3.789985415653865, "grad_norm": 4.407482624053955, "learning_rate": 3.1088953771289545e-05, "loss": 1.4035, "step": 194900 }, { "epoch": 3.7919299951385512, "grad_norm": 3.655459403991699, "learning_rate": 3.1079221411192215e-05, "loss": 1.4751, "step": 195000 }, { "epoch": 3.7919299951385512, "eval_accuracy": 0.5462333333333333, "eval_f1": 0.5353169500565308, "eval_loss": 1.1999620199203491, "eval_precision": 0.5690372530840706, "eval_recall": 0.5462333333333333, "eval_runtime": 11528.0031, "eval_samples_per_second": 15.614, "eval_steps_per_second": 0.488, "step": 195000 }, { "epoch": 3.7938745746232376, "grad_norm": 3.551114797592163, "learning_rate": 3.106948905109489e-05, "loss": 1.4648, "step": 195100 }, { "epoch": 3.795819154107924, "grad_norm": 5.578760147094727, "learning_rate": 3.105975669099757e-05, "loss": 1.4939, "step": 195200 }, { "epoch": 3.7977637335926104, "grad_norm": 13.865647315979004, "learning_rate": 3.1050024330900246e-05, "loss": 1.3595, "step": 195300 }, { "epoch": 3.7997083130772973, "grad_norm": 23.888370513916016, "learning_rate": 3.104029197080292e-05, "loss": 1.4558, "step": 195400 }, { "epoch": 3.8016528925619832, "grad_norm": 3.162950038909912, "learning_rate": 3.103055961070559e-05, "loss": 1.3366, "step": 195500 }, { "epoch": 3.80359747204667, "grad_norm": 3.679992437362671, "learning_rate": 3.102092457420925e-05, "loss": 1.4431, "step": 195600 }, { "epoch": 3.8055420515313565, "grad_norm": 2.5624911785125732, "learning_rate": 3.101119221411192e-05, "loss": 1.5306, "step": 195700 }, { "epoch": 3.807486631016043, "grad_norm": 3.488995313644409, "learning_rate": 3.10014598540146e-05, "loss": 1.4515, "step": 195800 }, { "epoch": 3.8094312105007293, "grad_norm": 4.104146480560303, "learning_rate": 3.0991727493917275e-05, "loss": 1.4275, "step": 195900 }, { "epoch": 3.8113757899854157, "grad_norm": 5.978058815002441, "learning_rate": 3.098199513381995e-05, "loss": 1.5413, "step": 196000 }, { "epoch": 3.813320369470102, "grad_norm": 8.901069641113281, "learning_rate": 3.097226277372263e-05, "loss": 1.5048, "step": 196100 }, { "epoch": 3.8152649489547885, "grad_norm": 3.3925082683563232, "learning_rate": 3.0962530413625307e-05, "loss": 1.4549, "step": 196200 }, { "epoch": 3.817209528439475, "grad_norm": 2.9221749305725098, "learning_rate": 3.0952798053527984e-05, "loss": 1.2774, "step": 196300 }, { "epoch": 3.8191541079241613, "grad_norm": 2.7063097953796387, "learning_rate": 3.0943065693430654e-05, "loss": 1.406, "step": 196400 }, { "epoch": 3.8210986874088477, "grad_norm": 8.829142570495605, "learning_rate": 3.093333333333334e-05, "loss": 1.4856, "step": 196500 }, { "epoch": 3.823043266893534, "grad_norm": 3.803915023803711, "learning_rate": 3.092360097323601e-05, "loss": 1.4273, "step": 196600 }, { "epoch": 3.824987846378221, "grad_norm": 6.843234539031982, "learning_rate": 3.0913868613138685e-05, "loss": 1.4425, "step": 196700 }, { "epoch": 3.826932425862907, "grad_norm": 3.689777135848999, "learning_rate": 3.090413625304137e-05, "loss": 1.5511, "step": 196800 }, { "epoch": 3.828877005347594, "grad_norm": 7.560396671295166, "learning_rate": 3.089450121654501e-05, "loss": 1.5818, "step": 196900 }, { "epoch": 3.8308215848322797, "grad_norm": 4.473764896392822, "learning_rate": 3.088476885644769e-05, "loss": 1.4408, "step": 197000 }, { "epoch": 3.8327661643169666, "grad_norm": 1.9324196577072144, "learning_rate": 3.087503649635037e-05, "loss": 1.3407, "step": 197100 }, { "epoch": 3.834710743801653, "grad_norm": 5.763888835906982, "learning_rate": 3.0865304136253044e-05, "loss": 1.5002, "step": 197200 }, { "epoch": 3.8366553232863394, "grad_norm": 8.918253898620605, "learning_rate": 3.0855571776155714e-05, "loss": 1.4345, "step": 197300 }, { "epoch": 3.838599902771026, "grad_norm": 6.433380603790283, "learning_rate": 3.08458394160584e-05, "loss": 1.4885, "step": 197400 }, { "epoch": 3.840544482255712, "grad_norm": 5.575498580932617, "learning_rate": 3.0836107055961075e-05, "loss": 1.553, "step": 197500 }, { "epoch": 3.8424890617403986, "grad_norm": 2.248659610748291, "learning_rate": 3.0826374695863745e-05, "loss": 1.5643, "step": 197600 }, { "epoch": 3.844433641225085, "grad_norm": 11.892369270324707, "learning_rate": 3.081664233576643e-05, "loss": 1.5632, "step": 197700 }, { "epoch": 3.8463782207097714, "grad_norm": 3.311105251312256, "learning_rate": 3.08069099756691e-05, "loss": 1.396, "step": 197800 }, { "epoch": 3.848322800194458, "grad_norm": 4.0605854988098145, "learning_rate": 3.0797177615571776e-05, "loss": 1.4141, "step": 197900 }, { "epoch": 3.8502673796791442, "grad_norm": 6.731995582580566, "learning_rate": 3.078744525547445e-05, "loss": 1.5382, "step": 198000 }, { "epoch": 3.8522119591638306, "grad_norm": 3.982330322265625, "learning_rate": 3.077771289537713e-05, "loss": 1.4364, "step": 198100 }, { "epoch": 3.8541565386485175, "grad_norm": 4.332025527954102, "learning_rate": 3.076798053527981e-05, "loss": 1.5354, "step": 198200 }, { "epoch": 3.8561011181332034, "grad_norm": 9.724020004272461, "learning_rate": 3.0758248175182484e-05, "loss": 1.4404, "step": 198300 }, { "epoch": 3.8580456976178903, "grad_norm": 3.796583890914917, "learning_rate": 3.074851581508516e-05, "loss": 1.4872, "step": 198400 }, { "epoch": 3.8599902771025767, "grad_norm": 13.49592399597168, "learning_rate": 3.073878345498783e-05, "loss": 1.4737, "step": 198500 }, { "epoch": 3.861934856587263, "grad_norm": 3.8751978874206543, "learning_rate": 3.072905109489051e-05, "loss": 1.4418, "step": 198600 }, { "epoch": 3.8638794360719495, "grad_norm": 4.171014308929443, "learning_rate": 3.071931873479319e-05, "loss": 1.4406, "step": 198700 }, { "epoch": 3.865824015556636, "grad_norm": 10.204885482788086, "learning_rate": 3.070958637469586e-05, "loss": 1.528, "step": 198800 }, { "epoch": 3.8677685950413223, "grad_norm": 4.919009208679199, "learning_rate": 3.069985401459854e-05, "loss": 1.4836, "step": 198900 }, { "epoch": 3.8697131745260087, "grad_norm": 4.531192779541016, "learning_rate": 3.069012165450122e-05, "loss": 1.5383, "step": 199000 }, { "epoch": 3.871657754010695, "grad_norm": 11.11091423034668, "learning_rate": 3.0680389294403894e-05, "loss": 1.5506, "step": 199100 }, { "epoch": 3.8736023334953815, "grad_norm": 3.6906845569610596, "learning_rate": 3.067065693430657e-05, "loss": 1.507, "step": 199200 }, { "epoch": 3.875546912980068, "grad_norm": 10.979689598083496, "learning_rate": 3.066092457420925e-05, "loss": 1.366, "step": 199300 }, { "epoch": 3.8774914924647543, "grad_norm": 2.7811200618743896, "learning_rate": 3.0651192214111925e-05, "loss": 1.5103, "step": 199400 }, { "epoch": 3.879436071949441, "grad_norm": 3.3904531002044678, "learning_rate": 3.0641459854014595e-05, "loss": 1.3732, "step": 199500 }, { "epoch": 3.881380651434127, "grad_norm": 2.1431047916412354, "learning_rate": 3.063172749391728e-05, "loss": 1.5101, "step": 199600 }, { "epoch": 3.883325230918814, "grad_norm": 6.82340669631958, "learning_rate": 3.0621995133819956e-05, "loss": 1.3883, "step": 199700 }, { "epoch": 3.8852698104035004, "grad_norm": 4.5148773193359375, "learning_rate": 3.0612262773722626e-05, "loss": 1.4661, "step": 199800 }, { "epoch": 3.887214389888187, "grad_norm": 8.02463150024414, "learning_rate": 3.060253041362531e-05, "loss": 1.5796, "step": 199900 }, { "epoch": 3.889158969372873, "grad_norm": 5.109918117523193, "learning_rate": 3.059279805352798e-05, "loss": 1.5135, "step": 200000 }, { "epoch": 3.889158969372873, "eval_accuracy": 0.5467555555555556, "eval_f1": 0.5347814822512825, "eval_loss": 1.202825665473938, "eval_precision": 0.5499818144874312, "eval_recall": 0.5467555555555556, "eval_runtime": 11714.9715, "eval_samples_per_second": 15.365, "eval_steps_per_second": 0.48, "step": 200000 }, { "epoch": 3.8911035488575596, "grad_norm": 7.435001850128174, "learning_rate": 3.058306569343066e-05, "loss": 1.4766, "step": 200100 }, { "epoch": 3.893048128342246, "grad_norm": 2.620652198791504, "learning_rate": 3.057343065693431e-05, "loss": 1.3676, "step": 200200 }, { "epoch": 3.8949927078269324, "grad_norm": 1.9298584461212158, "learning_rate": 3.0563698296836985e-05, "loss": 1.3824, "step": 200300 }, { "epoch": 3.896937287311619, "grad_norm": 2.735536813735962, "learning_rate": 3.055396593673966e-05, "loss": 1.4897, "step": 200400 }, { "epoch": 3.898881866796305, "grad_norm": 14.050975799560547, "learning_rate": 3.054423357664234e-05, "loss": 1.5104, "step": 200500 }, { "epoch": 3.9008264462809916, "grad_norm": 1.3345144987106323, "learning_rate": 3.0534501216545016e-05, "loss": 1.4402, "step": 200600 }, { "epoch": 3.902771025765678, "grad_norm": 4.631580829620361, "learning_rate": 3.052476885644769e-05, "loss": 1.4588, "step": 200700 }, { "epoch": 3.904715605250365, "grad_norm": 9.534964561462402, "learning_rate": 3.051503649635037e-05, "loss": 1.446, "step": 200800 }, { "epoch": 3.906660184735051, "grad_norm": 2.7522237300872803, "learning_rate": 3.0505304136253044e-05, "loss": 1.5312, "step": 200900 }, { "epoch": 3.9086047642197377, "grad_norm": 9.335278511047363, "learning_rate": 3.0495571776155718e-05, "loss": 1.4304, "step": 201000 }, { "epoch": 3.9105493437044236, "grad_norm": 4.1501994132995605, "learning_rate": 3.0485839416058398e-05, "loss": 1.6247, "step": 201100 }, { "epoch": 3.9124939231891105, "grad_norm": 3.661510944366455, "learning_rate": 3.0476107055961072e-05, "loss": 1.5621, "step": 201200 }, { "epoch": 3.914438502673797, "grad_norm": 7.155914306640625, "learning_rate": 3.046637469586375e-05, "loss": 1.4669, "step": 201300 }, { "epoch": 3.9163830821584833, "grad_norm": 4.3583598136901855, "learning_rate": 3.0456642335766422e-05, "loss": 1.5502, "step": 201400 }, { "epoch": 3.9183276616431697, "grad_norm": 3.6409451961517334, "learning_rate": 3.0446909975669103e-05, "loss": 1.4693, "step": 201500 }, { "epoch": 3.920272241127856, "grad_norm": 8.505546569824219, "learning_rate": 3.0437177615571776e-05, "loss": 1.4703, "step": 201600 }, { "epoch": 3.9222168206125425, "grad_norm": 3.877004861831665, "learning_rate": 3.042744525547445e-05, "loss": 1.4877, "step": 201700 }, { "epoch": 3.924161400097229, "grad_norm": 2.329568862915039, "learning_rate": 3.041771289537713e-05, "loss": 1.5028, "step": 201800 }, { "epoch": 3.9261059795819153, "grad_norm": 4.214216709136963, "learning_rate": 3.0407980535279807e-05, "loss": 1.4173, "step": 201900 }, { "epoch": 3.9280505590666017, "grad_norm": 2.3455569744110107, "learning_rate": 3.039824817518248e-05, "loss": 1.6345, "step": 202000 }, { "epoch": 3.929995138551288, "grad_norm": 5.648862838745117, "learning_rate": 3.038851581508516e-05, "loss": 1.4569, "step": 202100 }, { "epoch": 3.9319397180359745, "grad_norm": 5.62740421295166, "learning_rate": 3.0378783454987835e-05, "loss": 1.5181, "step": 202200 }, { "epoch": 3.9338842975206614, "grad_norm": 6.336248874664307, "learning_rate": 3.0369051094890512e-05, "loss": 1.4979, "step": 202300 }, { "epoch": 3.9358288770053473, "grad_norm": 3.7505223751068115, "learning_rate": 3.035931873479319e-05, "loss": 1.4191, "step": 202400 }, { "epoch": 3.937773456490034, "grad_norm": 3.7401323318481445, "learning_rate": 3.0349586374695866e-05, "loss": 1.5233, "step": 202500 }, { "epoch": 3.9397180359747206, "grad_norm": 5.0043206214904785, "learning_rate": 3.033985401459854e-05, "loss": 1.4148, "step": 202600 }, { "epoch": 3.941662615459407, "grad_norm": 3.3851754665374756, "learning_rate": 3.033012165450122e-05, "loss": 1.4363, "step": 202700 }, { "epoch": 3.9436071949440934, "grad_norm": 3.129385471343994, "learning_rate": 3.0320389294403894e-05, "loss": 1.6245, "step": 202800 }, { "epoch": 3.94555177442878, "grad_norm": 4.530510425567627, "learning_rate": 3.031065693430657e-05, "loss": 1.4618, "step": 202900 }, { "epoch": 3.947496353913466, "grad_norm": 7.810832500457764, "learning_rate": 3.0300924574209248e-05, "loss": 1.6407, "step": 203000 }, { "epoch": 3.9494409333981526, "grad_norm": 4.693019866943359, "learning_rate": 3.0291192214111925e-05, "loss": 1.4038, "step": 203100 }, { "epoch": 3.951385512882839, "grad_norm": 6.30319881439209, "learning_rate": 3.02814598540146e-05, "loss": 1.5184, "step": 203200 }, { "epoch": 3.9533300923675254, "grad_norm": 4.500480651855469, "learning_rate": 3.027172749391728e-05, "loss": 1.4809, "step": 203300 }, { "epoch": 3.955274671852212, "grad_norm": 2.005037307739258, "learning_rate": 3.0261995133819953e-05, "loss": 1.4299, "step": 203400 }, { "epoch": 3.9572192513368982, "grad_norm": 3.8166632652282715, "learning_rate": 3.025226277372263e-05, "loss": 1.7648, "step": 203500 }, { "epoch": 3.959163830821585, "grad_norm": 2.1346452236175537, "learning_rate": 3.0242530413625303e-05, "loss": 1.412, "step": 203600 }, { "epoch": 3.961108410306271, "grad_norm": 2.8296024799346924, "learning_rate": 3.0232798053527984e-05, "loss": 1.4459, "step": 203700 }, { "epoch": 3.963052989790958, "grad_norm": 1.3936443328857422, "learning_rate": 3.0223065693430657e-05, "loss": 1.4667, "step": 203800 }, { "epoch": 3.9649975692756443, "grad_norm": 5.017575263977051, "learning_rate": 3.021333333333333e-05, "loss": 1.4727, "step": 203900 }, { "epoch": 3.9669421487603307, "grad_norm": 3.879246473312378, "learning_rate": 3.020360097323601e-05, "loss": 1.47, "step": 204000 }, { "epoch": 3.968886728245017, "grad_norm": 1.759311318397522, "learning_rate": 3.019386861313869e-05, "loss": 1.4628, "step": 204100 }, { "epoch": 3.9708313077297035, "grad_norm": 1.7512152194976807, "learning_rate": 3.0184233576642336e-05, "loss": 1.3943, "step": 204200 }, { "epoch": 3.97277588721439, "grad_norm": 2.734156847000122, "learning_rate": 3.0174501216545013e-05, "loss": 1.4282, "step": 204300 }, { "epoch": 3.9747204666990763, "grad_norm": 4.581643104553223, "learning_rate": 3.016476885644769e-05, "loss": 1.6385, "step": 204400 }, { "epoch": 3.9766650461837627, "grad_norm": 4.626147747039795, "learning_rate": 3.0155036496350364e-05, "loss": 1.3755, "step": 204500 }, { "epoch": 3.978609625668449, "grad_norm": 3.827012300491333, "learning_rate": 3.0145304136253044e-05, "loss": 1.6414, "step": 204600 }, { "epoch": 3.9805542051531355, "grad_norm": 3.691756248474121, "learning_rate": 3.0135571776155718e-05, "loss": 1.4865, "step": 204700 }, { "epoch": 3.982498784637822, "grad_norm": 5.596826553344727, "learning_rate": 3.0125839416058395e-05, "loss": 1.5012, "step": 204800 }, { "epoch": 3.9844433641225088, "grad_norm": 6.9225077629089355, "learning_rate": 3.0116107055961075e-05, "loss": 1.4701, "step": 204900 }, { "epoch": 3.9863879436071947, "grad_norm": 4.349961280822754, "learning_rate": 3.010637469586375e-05, "loss": 1.3999, "step": 205000 }, { "epoch": 3.9863879436071947, "eval_accuracy": 0.5621833333333334, "eval_f1": 0.551672117285682, "eval_loss": 1.1762161254882812, "eval_precision": 0.5630058403545536, "eval_recall": 0.5621833333333334, "eval_runtime": 11710.4573, "eval_samples_per_second": 15.371, "eval_steps_per_second": 0.48, "step": 205000 }, { "epoch": 3.9883325230918816, "grad_norm": 5.901556015014648, "learning_rate": 3.0096642335766422e-05, "loss": 1.5163, "step": 205100 }, { "epoch": 3.990277102576568, "grad_norm": 14.4878511428833, "learning_rate": 3.0086909975669103e-05, "loss": 1.5106, "step": 205200 }, { "epoch": 3.9922216820612544, "grad_norm": 2.0720691680908203, "learning_rate": 3.0077177615571776e-05, "loss": 1.3346, "step": 205300 }, { "epoch": 3.994166261545941, "grad_norm": 2.600416898727417, "learning_rate": 3.0067445255474453e-05, "loss": 1.5776, "step": 205400 }, { "epoch": 3.996110841030627, "grad_norm": 3.4828691482543945, "learning_rate": 3.0057712895377134e-05, "loss": 1.8538, "step": 205500 }, { "epoch": 3.9980554205153136, "grad_norm": 5.153822422027588, "learning_rate": 3.0047980535279808e-05, "loss": 1.7254, "step": 205600 }, { "epoch": 4.0, "grad_norm": 6.157608509063721, "learning_rate": 3.0038345498783455e-05, "loss": 1.5262, "step": 205700 }, { "epoch": 4.001944579484687, "grad_norm": 4.9126505851745605, "learning_rate": 3.0028613138686136e-05, "loss": 1.4877, "step": 205800 }, { "epoch": 4.003889158969373, "grad_norm": 8.761143684387207, "learning_rate": 3.001888077858881e-05, "loss": 1.4987, "step": 205900 }, { "epoch": 4.00583373845406, "grad_norm": 5.0765061378479, "learning_rate": 3.0009148418491483e-05, "loss": 1.6046, "step": 206000 }, { "epoch": 4.007778317938746, "grad_norm": 7.4184393882751465, "learning_rate": 2.9999416058394163e-05, "loss": 1.3508, "step": 206100 }, { "epoch": 4.0097228974234325, "grad_norm": 10.484474182128906, "learning_rate": 2.998968369829684e-05, "loss": 1.3205, "step": 206200 }, { "epoch": 4.011667476908118, "grad_norm": 3.3698458671569824, "learning_rate": 2.9979951338199514e-05, "loss": 1.3939, "step": 206300 }, { "epoch": 4.013612056392805, "grad_norm": 6.115997791290283, "learning_rate": 2.9970218978102194e-05, "loss": 1.5063, "step": 206400 }, { "epoch": 4.015556635877491, "grad_norm": 3.71136736869812, "learning_rate": 2.9960486618004868e-05, "loss": 1.4976, "step": 206500 }, { "epoch": 4.017501215362178, "grad_norm": 11.484439849853516, "learning_rate": 2.995075425790754e-05, "loss": 1.5476, "step": 206600 }, { "epoch": 4.019445794846864, "grad_norm": 4.317607402801514, "learning_rate": 2.9941021897810222e-05, "loss": 1.5989, "step": 206700 }, { "epoch": 4.021390374331551, "grad_norm": 5.571810245513916, "learning_rate": 2.99312895377129e-05, "loss": 1.6855, "step": 206800 }, { "epoch": 4.023334953816237, "grad_norm": 3.910285711288452, "learning_rate": 2.9921557177615573e-05, "loss": 1.4576, "step": 206900 }, { "epoch": 4.025279533300924, "grad_norm": 4.61018180847168, "learning_rate": 2.9911824817518246e-05, "loss": 1.4652, "step": 207000 }, { "epoch": 4.0272241127856105, "grad_norm": 2.7730190753936768, "learning_rate": 2.9902092457420927e-05, "loss": 1.4474, "step": 207100 }, { "epoch": 4.0291686922702965, "grad_norm": 4.421700954437256, "learning_rate": 2.98923600973236e-05, "loss": 1.4466, "step": 207200 }, { "epoch": 4.031113271754983, "grad_norm": 4.143917083740234, "learning_rate": 2.9882627737226277e-05, "loss": 1.4672, "step": 207300 }, { "epoch": 4.033057851239669, "grad_norm": 3.8173179626464844, "learning_rate": 2.9872895377128958e-05, "loss": 1.4518, "step": 207400 }, { "epoch": 4.035002430724356, "grad_norm": 7.493322849273682, "learning_rate": 2.986316301703163e-05, "loss": 1.621, "step": 207500 }, { "epoch": 4.036947010209042, "grad_norm": 4.163309574127197, "learning_rate": 2.9853430656934305e-05, "loss": 1.5082, "step": 207600 }, { "epoch": 4.038891589693729, "grad_norm": 7.825023174285889, "learning_rate": 2.9843698296836985e-05, "loss": 1.4892, "step": 207700 }, { "epoch": 4.040836169178415, "grad_norm": 4.129871368408203, "learning_rate": 2.983396593673966e-05, "loss": 1.3593, "step": 207800 }, { "epoch": 4.042780748663102, "grad_norm": 3.5260043144226074, "learning_rate": 2.9824233576642336e-05, "loss": 1.5168, "step": 207900 }, { "epoch": 4.044725328147788, "grad_norm": 11.280472755432129, "learning_rate": 2.9814501216545016e-05, "loss": 1.4657, "step": 208000 }, { "epoch": 4.046669907632475, "grad_norm": 5.325314521789551, "learning_rate": 2.980476885644769e-05, "loss": 1.5675, "step": 208100 }, { "epoch": 4.0486144871171605, "grad_norm": 5.037012577056885, "learning_rate": 2.9795036496350364e-05, "loss": 1.4674, "step": 208200 }, { "epoch": 4.050559066601847, "grad_norm": 3.1802570819854736, "learning_rate": 2.9785401459854018e-05, "loss": 1.4478, "step": 208300 }, { "epoch": 4.052503646086533, "grad_norm": 12.015389442443848, "learning_rate": 2.9775669099756692e-05, "loss": 1.4989, "step": 208400 }, { "epoch": 4.05444822557122, "grad_norm": 11.241436958312988, "learning_rate": 2.9765936739659365e-05, "loss": 1.4438, "step": 208500 }, { "epoch": 4.056392805055907, "grad_norm": 4.597778797149658, "learning_rate": 2.9756204379562046e-05, "loss": 1.4071, "step": 208600 }, { "epoch": 4.058337384540593, "grad_norm": 5.6830573081970215, "learning_rate": 2.9746472019464723e-05, "loss": 1.5049, "step": 208700 }, { "epoch": 4.06028196402528, "grad_norm": 4.108875274658203, "learning_rate": 2.9736739659367397e-05, "loss": 1.3983, "step": 208800 }, { "epoch": 4.062226543509966, "grad_norm": 3.191267490386963, "learning_rate": 2.9727007299270077e-05, "loss": 1.5357, "step": 208900 }, { "epoch": 4.064171122994653, "grad_norm": 3.2781524658203125, "learning_rate": 2.971727493917275e-05, "loss": 1.4106, "step": 209000 }, { "epoch": 4.066115702479339, "grad_norm": 2.6663858890533447, "learning_rate": 2.9707542579075424e-05, "loss": 1.3947, "step": 209100 }, { "epoch": 4.0680602819640255, "grad_norm": 5.107334136962891, "learning_rate": 2.9697810218978105e-05, "loss": 1.5155, "step": 209200 }, { "epoch": 4.070004861448711, "grad_norm": 6.310003280639648, "learning_rate": 2.968807785888078e-05, "loss": 1.4944, "step": 209300 }, { "epoch": 4.071949440933398, "grad_norm": 5.124826431274414, "learning_rate": 2.9678345498783455e-05, "loss": 1.326, "step": 209400 }, { "epoch": 4.073894020418084, "grad_norm": 2.873225212097168, "learning_rate": 2.9668613138686136e-05, "loss": 1.4606, "step": 209500 }, { "epoch": 4.075838599902771, "grad_norm": 19.395404815673828, "learning_rate": 2.965888077858881e-05, "loss": 1.4034, "step": 209600 }, { "epoch": 4.077783179387457, "grad_norm": 8.091475486755371, "learning_rate": 2.9649148418491486e-05, "loss": 1.4066, "step": 209700 }, { "epoch": 4.079727758872144, "grad_norm": 3.5593669414520264, "learning_rate": 2.963941605839416e-05, "loss": 1.363, "step": 209800 }, { "epoch": 4.081672338356831, "grad_norm": 4.246070384979248, "learning_rate": 2.962968369829684e-05, "loss": 1.4898, "step": 209900 }, { "epoch": 4.083616917841517, "grad_norm": 4.586206436157227, "learning_rate": 2.9619951338199514e-05, "loss": 1.4685, "step": 210000 }, { "epoch": 4.083616917841517, "eval_accuracy": 0.5549777777777778, "eval_f1": 0.5404715003770018, "eval_loss": 1.1819435358047485, "eval_precision": 0.5632286734629922, "eval_recall": 0.5549777777777778, "eval_runtime": 11711.2718, "eval_samples_per_second": 15.37, "eval_steps_per_second": 0.48, "step": 210000 }, { "epoch": 4.0855614973262036, "grad_norm": 11.225235939025879, "learning_rate": 2.9610218978102188e-05, "loss": 1.5932, "step": 210100 }, { "epoch": 4.0875060768108895, "grad_norm": 5.067518711090088, "learning_rate": 2.9600486618004868e-05, "loss": 1.4067, "step": 210200 }, { "epoch": 4.089450656295576, "grad_norm": 5.774002552032471, "learning_rate": 2.9590754257907545e-05, "loss": 1.418, "step": 210300 }, { "epoch": 4.091395235780262, "grad_norm": 7.605700492858887, "learning_rate": 2.958102189781022e-05, "loss": 1.4892, "step": 210400 }, { "epoch": 4.093339815264949, "grad_norm": 3.5147716999053955, "learning_rate": 2.957138686131387e-05, "loss": 1.615, "step": 210500 }, { "epoch": 4.095284394749635, "grad_norm": 11.144474029541016, "learning_rate": 2.9561654501216547e-05, "loss": 1.5829, "step": 210600 }, { "epoch": 4.097228974234322, "grad_norm": 5.678360939025879, "learning_rate": 2.955192214111922e-05, "loss": 1.7002, "step": 210700 }, { "epoch": 4.099173553719008, "grad_norm": 11.661224365234375, "learning_rate": 2.95421897810219e-05, "loss": 1.4012, "step": 210800 }, { "epoch": 4.101118133203695, "grad_norm": 5.103845119476318, "learning_rate": 2.9532457420924574e-05, "loss": 1.4388, "step": 210900 }, { "epoch": 4.103062712688381, "grad_norm": 5.591563701629639, "learning_rate": 2.952272506082725e-05, "loss": 1.4263, "step": 211000 }, { "epoch": 4.105007292173068, "grad_norm": 3.369708776473999, "learning_rate": 2.951299270072993e-05, "loss": 1.5306, "step": 211100 }, { "epoch": 4.106951871657754, "grad_norm": 4.9416608810424805, "learning_rate": 2.9503260340632605e-05, "loss": 1.473, "step": 211200 }, { "epoch": 4.10889645114244, "grad_norm": 1.9450141191482544, "learning_rate": 2.949352798053528e-05, "loss": 1.4482, "step": 211300 }, { "epoch": 4.110841030627127, "grad_norm": 7.179958343505859, "learning_rate": 2.948379562043796e-05, "loss": 1.439, "step": 211400 }, { "epoch": 4.112785610111813, "grad_norm": 4.3169941902160645, "learning_rate": 2.9474063260340633e-05, "loss": 1.6022, "step": 211500 }, { "epoch": 4.1147301895965, "grad_norm": 5.509217739105225, "learning_rate": 2.946433090024331e-05, "loss": 1.557, "step": 211600 }, { "epoch": 4.116674769081186, "grad_norm": 6.516467571258545, "learning_rate": 2.945459854014599e-05, "loss": 1.3593, "step": 211700 }, { "epoch": 4.118619348565873, "grad_norm": 4.835402488708496, "learning_rate": 2.9444866180048664e-05, "loss": 1.4724, "step": 211800 }, { "epoch": 4.120563928050559, "grad_norm": 7.990752696990967, "learning_rate": 2.9435133819951338e-05, "loss": 1.475, "step": 211900 }, { "epoch": 4.122508507535246, "grad_norm": 3.009862184524536, "learning_rate": 2.9425401459854018e-05, "loss": 1.4023, "step": 212000 }, { "epoch": 4.124453087019932, "grad_norm": 10.794246673583984, "learning_rate": 2.9415669099756692e-05, "loss": 1.4918, "step": 212100 }, { "epoch": 4.1263976665046185, "grad_norm": 2.585702657699585, "learning_rate": 2.940593673965937e-05, "loss": 1.3331, "step": 212200 }, { "epoch": 4.128342245989304, "grad_norm": 3.705134153366089, "learning_rate": 2.939620437956205e-05, "loss": 1.4069, "step": 212300 }, { "epoch": 4.130286825473991, "grad_norm": 3.6611509323120117, "learning_rate": 2.9386472019464723e-05, "loss": 1.4666, "step": 212400 }, { "epoch": 4.132231404958677, "grad_norm": 2.833343505859375, "learning_rate": 2.9376739659367397e-05, "loss": 1.5034, "step": 212500 }, { "epoch": 4.134175984443364, "grad_norm": 4.457284927368164, "learning_rate": 2.936700729927007e-05, "loss": 1.4109, "step": 212600 }, { "epoch": 4.136120563928051, "grad_norm": 3.5081417560577393, "learning_rate": 2.935727493917275e-05, "loss": 1.6162, "step": 212700 }, { "epoch": 4.138065143412737, "grad_norm": 2.4355621337890625, "learning_rate": 2.9347542579075428e-05, "loss": 1.4762, "step": 212800 }, { "epoch": 4.140009722897424, "grad_norm": 8.89276123046875, "learning_rate": 2.93378102189781e-05, "loss": 1.4242, "step": 212900 }, { "epoch": 4.14195430238211, "grad_norm": 5.959532737731934, "learning_rate": 2.932807785888078e-05, "loss": 1.3987, "step": 213000 }, { "epoch": 4.143898881866797, "grad_norm": 4.318221092224121, "learning_rate": 2.9318345498783455e-05, "loss": 1.6271, "step": 213100 }, { "epoch": 4.1458434613514825, "grad_norm": 2.9466962814331055, "learning_rate": 2.9308613138686132e-05, "loss": 1.4988, "step": 213200 }, { "epoch": 4.147788040836169, "grad_norm": 1.4194525480270386, "learning_rate": 2.929888077858881e-05, "loss": 1.5469, "step": 213300 }, { "epoch": 4.149732620320855, "grad_norm": 12.166141510009766, "learning_rate": 2.9289148418491486e-05, "loss": 1.5467, "step": 213400 }, { "epoch": 4.151677199805542, "grad_norm": 3.010443687438965, "learning_rate": 2.927941605839416e-05, "loss": 1.4087, "step": 213500 }, { "epoch": 4.153621779290228, "grad_norm": 11.242581367492676, "learning_rate": 2.926968369829684e-05, "loss": 1.4239, "step": 213600 }, { "epoch": 4.155566358774915, "grad_norm": 3.4278995990753174, "learning_rate": 2.9259951338199514e-05, "loss": 1.5424, "step": 213700 }, { "epoch": 4.157510938259601, "grad_norm": 3.2411458492279053, "learning_rate": 2.9250316301703162e-05, "loss": 1.5977, "step": 213800 }, { "epoch": 4.159455517744288, "grad_norm": 2.2193589210510254, "learning_rate": 2.9240583941605842e-05, "loss": 1.4356, "step": 213900 }, { "epoch": 4.161400097228975, "grad_norm": 4.4763665199279785, "learning_rate": 2.9230851581508516e-05, "loss": 1.5252, "step": 214000 }, { "epoch": 4.163344676713661, "grad_norm": 3.872483730316162, "learning_rate": 2.9221119221411193e-05, "loss": 1.5446, "step": 214100 }, { "epoch": 4.1652892561983474, "grad_norm": 5.309928894042969, "learning_rate": 2.9211386861313873e-05, "loss": 1.6133, "step": 214200 }, { "epoch": 4.167233835683033, "grad_norm": 2.5384926795959473, "learning_rate": 2.9201654501216547e-05, "loss": 1.6086, "step": 214300 }, { "epoch": 4.16917841516772, "grad_norm": 4.347989082336426, "learning_rate": 2.919192214111922e-05, "loss": 1.438, "step": 214400 }, { "epoch": 4.171122994652406, "grad_norm": 2.449493169784546, "learning_rate": 2.91821897810219e-05, "loss": 1.6726, "step": 214500 }, { "epoch": 4.173067574137093, "grad_norm": 7.456083297729492, "learning_rate": 2.9172457420924574e-05, "loss": 1.4674, "step": 214600 }, { "epoch": 4.175012153621779, "grad_norm": 4.811056137084961, "learning_rate": 2.916272506082725e-05, "loss": 1.6472, "step": 214700 }, { "epoch": 4.176956733106466, "grad_norm": 2.7009036540985107, "learning_rate": 2.9152992700729932e-05, "loss": 1.4251, "step": 214800 }, { "epoch": 4.178901312591152, "grad_norm": 4.145186901092529, "learning_rate": 2.9143260340632606e-05, "loss": 1.5294, "step": 214900 }, { "epoch": 4.180845892075839, "grad_norm": 1.4679800271987915, "learning_rate": 2.913352798053528e-05, "loss": 1.4338, "step": 215000 }, { "epoch": 4.180845892075839, "eval_accuracy": 0.5497666666666666, "eval_f1": 0.5353935619182736, "eval_loss": 1.1992460489273071, "eval_precision": 0.5569057582355398, "eval_recall": 0.5497666666666667, "eval_runtime": 11708.8495, "eval_samples_per_second": 15.373, "eval_steps_per_second": 0.48, "step": 215000 }, { "epoch": 4.182790471560525, "grad_norm": 2.630699634552002, "learning_rate": 2.912379562043796e-05, "loss": 1.4503, "step": 215100 }, { "epoch": 4.1847350510452115, "grad_norm": 2.473196029663086, "learning_rate": 2.9114063260340633e-05, "loss": 1.4106, "step": 215200 }, { "epoch": 4.186679630529898, "grad_norm": 3.3414223194122314, "learning_rate": 2.910433090024331e-05, "loss": 1.4842, "step": 215300 }, { "epoch": 4.188624210014584, "grad_norm": 5.3889641761779785, "learning_rate": 2.9094598540145984e-05, "loss": 1.3676, "step": 215400 }, { "epoch": 4.190568789499271, "grad_norm": 1.8113781213760376, "learning_rate": 2.9084866180048664e-05, "loss": 1.5305, "step": 215500 }, { "epoch": 4.192513368983957, "grad_norm": 6.817519664764404, "learning_rate": 2.9075133819951338e-05, "loss": 1.571, "step": 215600 }, { "epoch": 4.194457948468644, "grad_norm": 4.310055732727051, "learning_rate": 2.9065401459854015e-05, "loss": 1.3665, "step": 215700 }, { "epoch": 4.19640252795333, "grad_norm": 11.185601234436035, "learning_rate": 2.9055669099756695e-05, "loss": 1.6921, "step": 215800 }, { "epoch": 4.198347107438017, "grad_norm": 5.038329124450684, "learning_rate": 2.904593673965937e-05, "loss": 1.577, "step": 215900 }, { "epoch": 4.200291686922703, "grad_norm": 4.857022285461426, "learning_rate": 2.9036204379562043e-05, "loss": 1.4495, "step": 216000 }, { "epoch": 4.20223626640739, "grad_norm": 2.6684703826904297, "learning_rate": 2.9026472019464723e-05, "loss": 1.3546, "step": 216100 }, { "epoch": 4.2041808458920755, "grad_norm": 6.696786403656006, "learning_rate": 2.9016739659367397e-05, "loss": 1.4349, "step": 216200 }, { "epoch": 4.206125425376762, "grad_norm": 4.981998920440674, "learning_rate": 2.9007007299270074e-05, "loss": 1.3638, "step": 216300 }, { "epoch": 4.208070004861448, "grad_norm": 4.627358436584473, "learning_rate": 2.8997274939172754e-05, "loss": 1.4989, "step": 216400 }, { "epoch": 4.210014584346135, "grad_norm": 1.383352279663086, "learning_rate": 2.8987639902676402e-05, "loss": 1.4937, "step": 216500 }, { "epoch": 4.211959163830821, "grad_norm": 3.4067201614379883, "learning_rate": 2.8977907542579075e-05, "loss": 1.5916, "step": 216600 }, { "epoch": 4.213903743315508, "grad_norm": 4.3186445236206055, "learning_rate": 2.8968175182481756e-05, "loss": 1.5009, "step": 216700 }, { "epoch": 4.215848322800195, "grad_norm": 5.124577045440674, "learning_rate": 2.895844282238443e-05, "loss": 1.3581, "step": 216800 }, { "epoch": 4.217792902284881, "grad_norm": 3.418398857116699, "learning_rate": 2.8948710462287103e-05, "loss": 1.579, "step": 216900 }, { "epoch": 4.219737481769568, "grad_norm": 4.102955341339111, "learning_rate": 2.8938978102189783e-05, "loss": 1.4609, "step": 217000 }, { "epoch": 4.221682061254254, "grad_norm": 4.859246253967285, "learning_rate": 2.892924574209246e-05, "loss": 1.3269, "step": 217100 }, { "epoch": 4.2236266407389405, "grad_norm": 3.997636318206787, "learning_rate": 2.8919513381995134e-05, "loss": 1.6491, "step": 217200 }, { "epoch": 4.225571220223626, "grad_norm": 10.838839530944824, "learning_rate": 2.8909781021897815e-05, "loss": 1.3768, "step": 217300 }, { "epoch": 4.227515799708313, "grad_norm": 7.1575751304626465, "learning_rate": 2.8900048661800488e-05, "loss": 1.3539, "step": 217400 }, { "epoch": 4.229460379192999, "grad_norm": 13.8803071975708, "learning_rate": 2.8890316301703162e-05, "loss": 1.4124, "step": 217500 }, { "epoch": 4.231404958677686, "grad_norm": 6.409053325653076, "learning_rate": 2.8880583941605842e-05, "loss": 1.5201, "step": 217600 }, { "epoch": 4.233349538162372, "grad_norm": 4.4074602127075195, "learning_rate": 2.887085158150852e-05, "loss": 1.5353, "step": 217700 }, { "epoch": 4.235294117647059, "grad_norm": 3.0405523777008057, "learning_rate": 2.8861119221411193e-05, "loss": 1.364, "step": 217800 }, { "epoch": 4.237238697131745, "grad_norm": 3.0542500019073486, "learning_rate": 2.8851386861313873e-05, "loss": 1.3661, "step": 217900 }, { "epoch": 4.239183276616432, "grad_norm": 5.2077250480651855, "learning_rate": 2.8841654501216547e-05, "loss": 1.5752, "step": 218000 }, { "epoch": 4.2411278561011185, "grad_norm": 3.2078843116760254, "learning_rate": 2.883192214111922e-05, "loss": 1.5137, "step": 218100 }, { "epoch": 4.2430724355858045, "grad_norm": 2.8832452297210693, "learning_rate": 2.8822189781021898e-05, "loss": 1.596, "step": 218200 }, { "epoch": 4.245017015070491, "grad_norm": 4.408921718597412, "learning_rate": 2.8812457420924578e-05, "loss": 1.4739, "step": 218300 }, { "epoch": 4.246961594555177, "grad_norm": 10.771284103393555, "learning_rate": 2.880272506082725e-05, "loss": 1.4319, "step": 218400 }, { "epoch": 4.248906174039864, "grad_norm": 3.4291462898254395, "learning_rate": 2.8792992700729925e-05, "loss": 1.4981, "step": 218500 }, { "epoch": 4.25085075352455, "grad_norm": 3.0889737606048584, "learning_rate": 2.8783260340632606e-05, "loss": 1.5472, "step": 218600 }, { "epoch": 4.252795333009237, "grad_norm": 6.767003536224365, "learning_rate": 2.877352798053528e-05, "loss": 1.4489, "step": 218700 }, { "epoch": 4.254739912493923, "grad_norm": 6.540287017822266, "learning_rate": 2.8763795620437956e-05, "loss": 1.3044, "step": 218800 }, { "epoch": 4.25668449197861, "grad_norm": 6.153176784515381, "learning_rate": 2.8754063260340637e-05, "loss": 1.5545, "step": 218900 }, { "epoch": 4.258629071463296, "grad_norm": 1.818819522857666, "learning_rate": 2.874433090024331e-05, "loss": 1.4785, "step": 219000 }, { "epoch": 4.260573650947983, "grad_norm": 6.435334205627441, "learning_rate": 2.8734598540145984e-05, "loss": 1.4558, "step": 219100 }, { "epoch": 4.2625182304326685, "grad_norm": 6.219513893127441, "learning_rate": 2.872496350364964e-05, "loss": 1.4721, "step": 219200 }, { "epoch": 4.264462809917355, "grad_norm": 4.906131744384766, "learning_rate": 2.8715231143552312e-05, "loss": 1.4473, "step": 219300 }, { "epoch": 4.266407389402042, "grad_norm": 2.742398262023926, "learning_rate": 2.8705498783454986e-05, "loss": 1.5933, "step": 219400 }, { "epoch": 4.268351968886728, "grad_norm": 4.8509745597839355, "learning_rate": 2.8695766423357666e-05, "loss": 1.4486, "step": 219500 }, { "epoch": 4.270296548371415, "grad_norm": 11.306498527526855, "learning_rate": 2.8686034063260343e-05, "loss": 1.4653, "step": 219600 }, { "epoch": 4.272241127856101, "grad_norm": 7.931895732879639, "learning_rate": 2.8676301703163017e-05, "loss": 1.4105, "step": 219700 }, { "epoch": 4.274185707340788, "grad_norm": 4.235708236694336, "learning_rate": 2.8666569343065697e-05, "loss": 1.4066, "step": 219800 }, { "epoch": 4.276130286825474, "grad_norm": 6.182926654815674, "learning_rate": 2.865683698296837e-05, "loss": 1.6198, "step": 219900 }, { "epoch": 4.278074866310161, "grad_norm": 4.9571099281311035, "learning_rate": 2.8647104622871044e-05, "loss": 1.6445, "step": 220000 }, { "epoch": 4.278074866310161, "eval_accuracy": 0.5424277777777777, "eval_f1": 0.5296729989741076, "eval_loss": 1.2039426565170288, "eval_precision": 0.5603374024598902, "eval_recall": 0.5424277777777778, "eval_runtime": 11698.3325, "eval_samples_per_second": 15.387, "eval_steps_per_second": 0.481, "step": 220000 }, { "epoch": 4.280019445794847, "grad_norm": 3.587460517883301, "learning_rate": 2.8637372262773725e-05, "loss": 1.5633, "step": 220100 }, { "epoch": 4.2819640252795335, "grad_norm": 9.277827262878418, "learning_rate": 2.8627639902676402e-05, "loss": 1.4543, "step": 220200 }, { "epoch": 4.283908604764219, "grad_norm": 4.945542335510254, "learning_rate": 2.8617907542579075e-05, "loss": 1.678, "step": 220300 }, { "epoch": 4.285853184248906, "grad_norm": 2.417222738265991, "learning_rate": 2.8608175182481756e-05, "loss": 1.4804, "step": 220400 }, { "epoch": 4.287797763733592, "grad_norm": 10.735498428344727, "learning_rate": 2.859844282238443e-05, "loss": 1.4412, "step": 220500 }, { "epoch": 4.289742343218279, "grad_norm": 8.217190742492676, "learning_rate": 2.8588710462287106e-05, "loss": 1.4425, "step": 220600 }, { "epoch": 4.291686922702965, "grad_norm": 4.995087146759033, "learning_rate": 2.8578978102189784e-05, "loss": 1.357, "step": 220700 }, { "epoch": 4.293631502187652, "grad_norm": 7.858221530914307, "learning_rate": 2.856924574209246e-05, "loss": 1.5152, "step": 220800 }, { "epoch": 4.295576081672339, "grad_norm": 7.12753438949585, "learning_rate": 2.8559513381995134e-05, "loss": 1.4003, "step": 220900 }, { "epoch": 4.297520661157025, "grad_norm": 2.7894303798675537, "learning_rate": 2.8549781021897808e-05, "loss": 1.4807, "step": 221000 }, { "epoch": 4.2994652406417115, "grad_norm": 2.4743566513061523, "learning_rate": 2.8540048661800488e-05, "loss": 1.4084, "step": 221100 }, { "epoch": 4.3014098201263975, "grad_norm": 8.443131446838379, "learning_rate": 2.8530316301703165e-05, "loss": 1.3962, "step": 221200 }, { "epoch": 4.303354399611084, "grad_norm": 11.557991981506348, "learning_rate": 2.852058394160584e-05, "loss": 1.5191, "step": 221300 }, { "epoch": 4.30529897909577, "grad_norm": 2.9692111015319824, "learning_rate": 2.851085158150852e-05, "loss": 1.5882, "step": 221400 }, { "epoch": 4.307243558580457, "grad_norm": 1.6326361894607544, "learning_rate": 2.8501119221411193e-05, "loss": 1.3015, "step": 221500 }, { "epoch": 4.309188138065143, "grad_norm": 2.5291266441345215, "learning_rate": 2.8491386861313867e-05, "loss": 1.4658, "step": 221600 }, { "epoch": 4.31113271754983, "grad_norm": 4.45059061050415, "learning_rate": 2.8481654501216547e-05, "loss": 1.4268, "step": 221700 }, { "epoch": 4.313077297034516, "grad_norm": 1.7893049716949463, "learning_rate": 2.8471922141119224e-05, "loss": 1.4679, "step": 221800 }, { "epoch": 4.315021876519203, "grad_norm": 1.8975350856781006, "learning_rate": 2.8462189781021898e-05, "loss": 1.431, "step": 221900 }, { "epoch": 4.31696645600389, "grad_norm": 4.866546154022217, "learning_rate": 2.8452457420924578e-05, "loss": 1.4102, "step": 222000 }, { "epoch": 4.318911035488576, "grad_norm": 2.697139024734497, "learning_rate": 2.844272506082725e-05, "loss": 1.3688, "step": 222100 }, { "epoch": 4.320855614973262, "grad_norm": 2.3353185653686523, "learning_rate": 2.8432992700729925e-05, "loss": 1.4999, "step": 222200 }, { "epoch": 4.322800194457948, "grad_norm": 4.806412220001221, "learning_rate": 2.8423260340632606e-05, "loss": 1.5388, "step": 222300 }, { "epoch": 4.324744773942635, "grad_norm": 6.371898651123047, "learning_rate": 2.8413527980535283e-05, "loss": 1.5572, "step": 222400 }, { "epoch": 4.326689353427321, "grad_norm": 5.596405029296875, "learning_rate": 2.8403795620437956e-05, "loss": 1.4636, "step": 222500 }, { "epoch": 4.328633932912008, "grad_norm": 2.9418437480926514, "learning_rate": 2.8394063260340637e-05, "loss": 1.8017, "step": 222600 }, { "epoch": 4.330578512396694, "grad_norm": 2.7126858234405518, "learning_rate": 2.838433090024331e-05, "loss": 1.4283, "step": 222700 }, { "epoch": 4.332523091881381, "grad_norm": 5.173280715942383, "learning_rate": 2.8374598540145987e-05, "loss": 1.7602, "step": 222800 }, { "epoch": 4.334467671366067, "grad_norm": 4.0461106300354, "learning_rate": 2.8364866180048664e-05, "loss": 1.3995, "step": 222900 }, { "epoch": 4.336412250850754, "grad_norm": 9.008594512939453, "learning_rate": 2.835513381995134e-05, "loss": 1.4441, "step": 223000 }, { "epoch": 4.33835683033544, "grad_norm": 8.451848030090332, "learning_rate": 2.8345401459854015e-05, "loss": 1.3631, "step": 223100 }, { "epoch": 4.3403014098201265, "grad_norm": 9.3141508102417, "learning_rate": 2.833566909975669e-05, "loss": 1.393, "step": 223200 }, { "epoch": 4.342245989304812, "grad_norm": 15.53127670288086, "learning_rate": 2.8326034063260343e-05, "loss": 1.4353, "step": 223300 }, { "epoch": 4.344190568789499, "grad_norm": 6.556751728057861, "learning_rate": 2.8316301703163017e-05, "loss": 1.5278, "step": 223400 }, { "epoch": 4.346135148274186, "grad_norm": 5.37595796585083, "learning_rate": 2.8306569343065697e-05, "loss": 1.4799, "step": 223500 }, { "epoch": 4.348079727758872, "grad_norm": 4.457785606384277, "learning_rate": 2.829683698296837e-05, "loss": 1.4277, "step": 223600 }, { "epoch": 4.350024307243559, "grad_norm": 2.862748861312866, "learning_rate": 2.8287104622871048e-05, "loss": 1.5136, "step": 223700 }, { "epoch": 4.351968886728245, "grad_norm": 3.4651355743408203, "learning_rate": 2.827737226277372e-05, "loss": 1.5113, "step": 223800 }, { "epoch": 4.353913466212932, "grad_norm": 4.878674507141113, "learning_rate": 2.8267639902676402e-05, "loss": 1.335, "step": 223900 }, { "epoch": 4.355858045697618, "grad_norm": 7.142143249511719, "learning_rate": 2.8257907542579075e-05, "loss": 1.4846, "step": 224000 }, { "epoch": 4.3578026251823045, "grad_norm": 4.304786682128906, "learning_rate": 2.8248175182481753e-05, "loss": 1.4511, "step": 224100 }, { "epoch": 4.3597472046669905, "grad_norm": 2.6109373569488525, "learning_rate": 2.823844282238443e-05, "loss": 1.5565, "step": 224200 }, { "epoch": 4.361691784151677, "grad_norm": 4.3871283531188965, "learning_rate": 2.8228710462287107e-05, "loss": 1.5529, "step": 224300 }, { "epoch": 4.363636363636363, "grad_norm": 12.119462013244629, "learning_rate": 2.821897810218978e-05, "loss": 1.4729, "step": 224400 }, { "epoch": 4.36558094312105, "grad_norm": 8.86164379119873, "learning_rate": 2.820924574209246e-05, "loss": 1.7682, "step": 224500 }, { "epoch": 4.367525522605736, "grad_norm": 3.5160510540008545, "learning_rate": 2.8199513381995134e-05, "loss": 1.4975, "step": 224600 }, { "epoch": 4.369470102090423, "grad_norm": 4.2378973960876465, "learning_rate": 2.818978102189781e-05, "loss": 1.4173, "step": 224700 }, { "epoch": 4.371414681575109, "grad_norm": 2.3085131645202637, "learning_rate": 2.818004866180049e-05, "loss": 1.6754, "step": 224800 }, { "epoch": 4.373359261059796, "grad_norm": 5.013606548309326, "learning_rate": 2.8170316301703165e-05, "loss": 1.5484, "step": 224900 }, { "epoch": 4.375303840544483, "grad_norm": 2.481166362762451, "learning_rate": 2.816058394160584e-05, "loss": 1.4788, "step": 225000 }, { "epoch": 4.375303840544483, "eval_accuracy": 0.5549, "eval_f1": 0.5457502493278646, "eval_loss": 1.1929957866668701, "eval_precision": 0.55248776632753, "eval_recall": 0.5549000000000001, "eval_runtime": 11708.3391, "eval_samples_per_second": 15.374, "eval_steps_per_second": 0.48, "step": 225000 }, { "epoch": 4.377248420029169, "grad_norm": 3.7072575092315674, "learning_rate": 2.815085158150852e-05, "loss": 1.5085, "step": 225100 }, { "epoch": 4.379192999513855, "grad_norm": 2.3585448265075684, "learning_rate": 2.8141216545012167e-05, "loss": 1.688, "step": 225200 }, { "epoch": 4.381137578998541, "grad_norm": 15.125020980834961, "learning_rate": 2.813148418491484e-05, "loss": 1.5394, "step": 225300 }, { "epoch": 4.383082158483228, "grad_norm": 3.5809311866760254, "learning_rate": 2.812175182481752e-05, "loss": 1.4549, "step": 225400 }, { "epoch": 4.385026737967914, "grad_norm": 5.1817307472229, "learning_rate": 2.8112019464720195e-05, "loss": 1.4017, "step": 225500 }, { "epoch": 4.386971317452601, "grad_norm": 2.811269998550415, "learning_rate": 2.810228710462287e-05, "loss": 1.3867, "step": 225600 }, { "epoch": 4.388915896937287, "grad_norm": 9.528701782226562, "learning_rate": 2.8092554744525552e-05, "loss": 1.4446, "step": 225700 }, { "epoch": 4.390860476421974, "grad_norm": 4.103501319885254, "learning_rate": 2.8082822384428226e-05, "loss": 1.3437, "step": 225800 }, { "epoch": 4.39280505590666, "grad_norm": 8.441765785217285, "learning_rate": 2.80730900243309e-05, "loss": 1.4912, "step": 225900 }, { "epoch": 4.394749635391347, "grad_norm": 2.1586854457855225, "learning_rate": 2.806335766423358e-05, "loss": 1.5486, "step": 226000 }, { "epoch": 4.3966942148760335, "grad_norm": 2.2364661693573, "learning_rate": 2.8053625304136257e-05, "loss": 1.4949, "step": 226100 }, { "epoch": 4.3986387943607195, "grad_norm": 7.96087646484375, "learning_rate": 2.804389294403893e-05, "loss": 1.4842, "step": 226200 }, { "epoch": 4.400583373845406, "grad_norm": 8.046324729919434, "learning_rate": 2.803416058394161e-05, "loss": 1.3678, "step": 226300 }, { "epoch": 4.402527953330092, "grad_norm": 42.326725006103516, "learning_rate": 2.8024428223844284e-05, "loss": 1.8025, "step": 226400 }, { "epoch": 4.404472532814779, "grad_norm": 8.747584342956543, "learning_rate": 2.8014695863746958e-05, "loss": 1.49, "step": 226500 }, { "epoch": 4.406417112299465, "grad_norm": 12.050472259521484, "learning_rate": 2.8004963503649635e-05, "loss": 1.4106, "step": 226600 }, { "epoch": 4.408361691784152, "grad_norm": 6.153656005859375, "learning_rate": 2.7995231143552316e-05, "loss": 1.4071, "step": 226700 }, { "epoch": 4.410306271268838, "grad_norm": 2.92917799949646, "learning_rate": 2.798549878345499e-05, "loss": 1.45, "step": 226800 }, { "epoch": 4.412250850753525, "grad_norm": 4.756988048553467, "learning_rate": 2.7975766423357663e-05, "loss": 1.3642, "step": 226900 }, { "epoch": 4.414195430238211, "grad_norm": 2.8766977787017822, "learning_rate": 2.7966034063260343e-05, "loss": 1.5103, "step": 227000 }, { "epoch": 4.4161400097228976, "grad_norm": 4.278975486755371, "learning_rate": 2.7956301703163017e-05, "loss": 1.5244, "step": 227100 }, { "epoch": 4.4180845892075835, "grad_norm": 7.701866626739502, "learning_rate": 2.7946569343065694e-05, "loss": 1.4175, "step": 227200 }, { "epoch": 4.42002916869227, "grad_norm": 8.059228897094727, "learning_rate": 2.7936836982968374e-05, "loss": 1.5479, "step": 227300 }, { "epoch": 4.421973748176956, "grad_norm": 6.637621879577637, "learning_rate": 2.7927201946472022e-05, "loss": 1.6123, "step": 227400 }, { "epoch": 4.423918327661643, "grad_norm": 7.665101528167725, "learning_rate": 2.7917469586374696e-05, "loss": 1.3963, "step": 227500 }, { "epoch": 4.42586290714633, "grad_norm": 8.01789379119873, "learning_rate": 2.7907737226277376e-05, "loss": 1.4247, "step": 227600 }, { "epoch": 4.427807486631016, "grad_norm": 8.087237358093262, "learning_rate": 2.789800486618005e-05, "loss": 1.4328, "step": 227700 }, { "epoch": 4.429752066115703, "grad_norm": 6.107560157775879, "learning_rate": 2.7888272506082723e-05, "loss": 1.4026, "step": 227800 }, { "epoch": 4.431696645600389, "grad_norm": 5.499512195587158, "learning_rate": 2.7878540145985404e-05, "loss": 1.44, "step": 227900 }, { "epoch": 4.433641225085076, "grad_norm": 9.793966293334961, "learning_rate": 2.786880778588808e-05, "loss": 1.5207, "step": 228000 }, { "epoch": 4.435585804569762, "grad_norm": 4.11014986038208, "learning_rate": 2.7859075425790754e-05, "loss": 1.589, "step": 228100 }, { "epoch": 4.437530384054448, "grad_norm": 3.5173964500427246, "learning_rate": 2.7849343065693435e-05, "loss": 1.5036, "step": 228200 }, { "epoch": 4.439474963539134, "grad_norm": 11.6688232421875, "learning_rate": 2.783961070559611e-05, "loss": 1.596, "step": 228300 }, { "epoch": 4.441419543023821, "grad_norm": 2.269023895263672, "learning_rate": 2.7829878345498782e-05, "loss": 1.5183, "step": 228400 }, { "epoch": 4.443364122508507, "grad_norm": 20.11492156982422, "learning_rate": 2.7820145985401462e-05, "loss": 1.3572, "step": 228500 }, { "epoch": 4.445308701993194, "grad_norm": 7.999027252197266, "learning_rate": 2.781041362530414e-05, "loss": 1.5635, "step": 228600 }, { "epoch": 4.44725328147788, "grad_norm": 2.3212976455688477, "learning_rate": 2.7800681265206813e-05, "loss": 1.4878, "step": 228700 }, { "epoch": 4.449197860962567, "grad_norm": 2.3799660205841064, "learning_rate": 2.7790948905109493e-05, "loss": 1.486, "step": 228800 }, { "epoch": 4.451142440447253, "grad_norm": 3.224071741104126, "learning_rate": 2.7781216545012167e-05, "loss": 1.4274, "step": 228900 }, { "epoch": 4.45308701993194, "grad_norm": 8.089990615844727, "learning_rate": 2.777148418491484e-05, "loss": 1.4321, "step": 229000 }, { "epoch": 4.4550315994166265, "grad_norm": 3.6665737628936768, "learning_rate": 2.776175182481752e-05, "loss": 1.338, "step": 229100 }, { "epoch": 4.4569761789013125, "grad_norm": 2.314396381378174, "learning_rate": 2.7752019464720198e-05, "loss": 1.464, "step": 229200 }, { "epoch": 4.458920758385999, "grad_norm": 6.287067413330078, "learning_rate": 2.7742287104622872e-05, "loss": 1.6474, "step": 229300 }, { "epoch": 4.460865337870685, "grad_norm": 3.4992408752441406, "learning_rate": 2.7732554744525545e-05, "loss": 1.403, "step": 229400 }, { "epoch": 4.462809917355372, "grad_norm": 4.5358967781066895, "learning_rate": 2.7722822384428226e-05, "loss": 1.4945, "step": 229500 }, { "epoch": 4.464754496840058, "grad_norm": 4.7089619636535645, "learning_rate": 2.77130900243309e-05, "loss": 1.4716, "step": 229600 }, { "epoch": 4.466699076324745, "grad_norm": 8.982900619506836, "learning_rate": 2.7703357664233576e-05, "loss": 1.4935, "step": 229700 }, { "epoch": 4.468643655809431, "grad_norm": 4.99952507019043, "learning_rate": 2.7693625304136257e-05, "loss": 1.5664, "step": 229800 }, { "epoch": 4.470588235294118, "grad_norm": 3.28888201713562, "learning_rate": 2.768389294403893e-05, "loss": 1.3846, "step": 229900 }, { "epoch": 4.472532814778804, "grad_norm": 7.337288856506348, "learning_rate": 2.7674160583941604e-05, "loss": 1.3937, "step": 230000 }, { "epoch": 4.472532814778804, "eval_accuracy": 0.5571277777777778, "eval_f1": 0.5508906683248994, "eval_loss": 1.1761850118637085, "eval_precision": 0.5552028620548997, "eval_recall": 0.5571277777777778, "eval_runtime": 11713.606, "eval_samples_per_second": 15.367, "eval_steps_per_second": 0.48, "step": 230000 }, { "epoch": 4.474477394263491, "grad_norm": 3.6232433319091797, "learning_rate": 2.7664428223844285e-05, "loss": 1.6191, "step": 230100 }, { "epoch": 4.476421973748177, "grad_norm": 2.555018186569214, "learning_rate": 2.7654793187347932e-05, "loss": 1.3834, "step": 230200 }, { "epoch": 4.478366553232863, "grad_norm": 8.976601600646973, "learning_rate": 2.7645060827250606e-05, "loss": 1.7334, "step": 230300 }, { "epoch": 4.48031113271755, "grad_norm": 5.64934778213501, "learning_rate": 2.7635328467153286e-05, "loss": 1.3608, "step": 230400 }, { "epoch": 4.482255712202236, "grad_norm": 6.074303150177002, "learning_rate": 2.7625596107055963e-05, "loss": 1.3909, "step": 230500 }, { "epoch": 4.484200291686923, "grad_norm": 2.9360601902008057, "learning_rate": 2.7615863746958637e-05, "loss": 1.3743, "step": 230600 }, { "epoch": 4.486144871171609, "grad_norm": 3.839054584503174, "learning_rate": 2.7606131386861317e-05, "loss": 1.4777, "step": 230700 }, { "epoch": 4.488089450656296, "grad_norm": 5.112955570220947, "learning_rate": 2.759639902676399e-05, "loss": 1.5182, "step": 230800 }, { "epoch": 4.490034030140982, "grad_norm": 6.615780353546143, "learning_rate": 2.7586666666666665e-05, "loss": 1.3993, "step": 230900 }, { "epoch": 4.491978609625669, "grad_norm": 2.8243637084960938, "learning_rate": 2.7576934306569345e-05, "loss": 1.4127, "step": 231000 }, { "epoch": 4.493923189110355, "grad_norm": 5.3762969970703125, "learning_rate": 2.7567201946472022e-05, "loss": 1.5092, "step": 231100 }, { "epoch": 4.4958677685950414, "grad_norm": 10.950200080871582, "learning_rate": 2.7557469586374696e-05, "loss": 1.5843, "step": 231200 }, { "epoch": 4.497812348079727, "grad_norm": 5.409755229949951, "learning_rate": 2.7547737226277376e-05, "loss": 1.4682, "step": 231300 }, { "epoch": 4.499756927564414, "grad_norm": 3.407945156097412, "learning_rate": 2.753800486618005e-05, "loss": 1.5015, "step": 231400 }, { "epoch": 4.5017015070491, "grad_norm": 3.6975908279418945, "learning_rate": 2.7528272506082727e-05, "loss": 1.3477, "step": 231500 }, { "epoch": 4.503646086533787, "grad_norm": 2.7111189365386963, "learning_rate": 2.7518540145985404e-05, "loss": 1.3562, "step": 231600 }, { "epoch": 4.505590666018474, "grad_norm": 7.641227722167969, "learning_rate": 2.750880778588808e-05, "loss": 1.4859, "step": 231700 }, { "epoch": 4.50753524550316, "grad_norm": 4.716830253601074, "learning_rate": 2.7499075425790754e-05, "loss": 1.4587, "step": 231800 }, { "epoch": 4.509479824987847, "grad_norm": 8.991113662719727, "learning_rate": 2.7489343065693428e-05, "loss": 1.4422, "step": 231900 }, { "epoch": 4.511424404472533, "grad_norm": 6.4693074226379395, "learning_rate": 2.747961070559611e-05, "loss": 1.4775, "step": 232000 }, { "epoch": 4.5133689839572195, "grad_norm": 3.618825674057007, "learning_rate": 2.7469878345498785e-05, "loss": 1.512, "step": 232100 }, { "epoch": 4.5153135634419055, "grad_norm": 5.487786769866943, "learning_rate": 2.746014598540146e-05, "loss": 1.6684, "step": 232200 }, { "epoch": 4.517258142926592, "grad_norm": 2.8960697650909424, "learning_rate": 2.745041362530414e-05, "loss": 1.4943, "step": 232300 }, { "epoch": 4.519202722411278, "grad_norm": 3.7568225860595703, "learning_rate": 2.7440681265206813e-05, "loss": 1.663, "step": 232400 }, { "epoch": 4.521147301895965, "grad_norm": 5.968195915222168, "learning_rate": 2.7430948905109487e-05, "loss": 1.4588, "step": 232500 }, { "epoch": 4.523091881380651, "grad_norm": 4.108310699462891, "learning_rate": 2.7421216545012167e-05, "loss": 1.4991, "step": 232600 }, { "epoch": 4.525036460865338, "grad_norm": 3.3823177814483643, "learning_rate": 2.7411484184914844e-05, "loss": 1.405, "step": 232700 }, { "epoch": 4.526981040350025, "grad_norm": 5.090798854827881, "learning_rate": 2.7401751824817518e-05, "loss": 1.4497, "step": 232800 }, { "epoch": 4.528925619834711, "grad_norm": 2.313173770904541, "learning_rate": 2.7392019464720198e-05, "loss": 1.3615, "step": 232900 }, { "epoch": 4.530870199319397, "grad_norm": 12.356549263000488, "learning_rate": 2.7382287104622872e-05, "loss": 1.4523, "step": 233000 }, { "epoch": 4.532814778804084, "grad_norm": 5.951916217803955, "learning_rate": 2.7372554744525545e-05, "loss": 1.5255, "step": 233100 }, { "epoch": 4.53475935828877, "grad_norm": 11.711676597595215, "learning_rate": 2.7362822384428226e-05, "loss": 1.5513, "step": 233200 }, { "epoch": 4.536703937773456, "grad_norm": 3.4288673400878906, "learning_rate": 2.7353090024330903e-05, "loss": 1.4888, "step": 233300 }, { "epoch": 4.538648517258143, "grad_norm": 7.190371990203857, "learning_rate": 2.7343357664233576e-05, "loss": 1.4254, "step": 233400 }, { "epoch": 4.540593096742829, "grad_norm": 3.912181854248047, "learning_rate": 2.7333625304136257e-05, "loss": 1.3581, "step": 233500 }, { "epoch": 4.542537676227516, "grad_norm": 4.37331485748291, "learning_rate": 2.732389294403893e-05, "loss": 1.3983, "step": 233600 }, { "epoch": 4.544482255712202, "grad_norm": 4.8684258460998535, "learning_rate": 2.7314160583941608e-05, "loss": 1.4261, "step": 233700 }, { "epoch": 4.546426835196889, "grad_norm": 6.72487735748291, "learning_rate": 2.7304428223844285e-05, "loss": 1.5451, "step": 233800 }, { "epoch": 4.548371414681575, "grad_norm": 4.833137512207031, "learning_rate": 2.729469586374696e-05, "loss": 1.5478, "step": 233900 }, { "epoch": 4.550315994166262, "grad_norm": 3.0412652492523193, "learning_rate": 2.7284963503649635e-05, "loss": 1.4292, "step": 234000 }, { "epoch": 4.552260573650948, "grad_norm": 3.4705922603607178, "learning_rate": 2.727523114355231e-05, "loss": 1.363, "step": 234100 }, { "epoch": 4.5542051531356345, "grad_norm": 5.614694595336914, "learning_rate": 2.7265596107055963e-05, "loss": 1.4031, "step": 234200 }, { "epoch": 4.556149732620321, "grad_norm": 5.231081485748291, "learning_rate": 2.7255863746958637e-05, "loss": 1.4842, "step": 234300 }, { "epoch": 4.558094312105007, "grad_norm": 2.2846333980560303, "learning_rate": 2.7246131386861317e-05, "loss": 1.4437, "step": 234400 }, { "epoch": 4.560038891589694, "grad_norm": 7.130106449127197, "learning_rate": 2.723639902676399e-05, "loss": 1.4431, "step": 234500 }, { "epoch": 4.56198347107438, "grad_norm": 4.8099870681762695, "learning_rate": 2.7226666666666668e-05, "loss": 1.5549, "step": 234600 }, { "epoch": 4.563928050559067, "grad_norm": 14.975275993347168, "learning_rate": 2.721693430656934e-05, "loss": 1.5331, "step": 234700 }, { "epoch": 4.565872630043753, "grad_norm": 2.2198843955993652, "learning_rate": 2.7207201946472022e-05, "loss": 1.3555, "step": 234800 }, { "epoch": 4.56781720952844, "grad_norm": 6.29705810546875, "learning_rate": 2.7197469586374696e-05, "loss": 1.4456, "step": 234900 }, { "epoch": 4.569761789013126, "grad_norm": 4.8266072273254395, "learning_rate": 2.718783454987835e-05, "loss": 1.3932, "step": 235000 }, { "epoch": 4.569761789013126, "eval_accuracy": 0.5471222222222222, "eval_f1": 0.5338366418887914, "eval_loss": 1.2015811204910278, "eval_precision": 0.5522906756297901, "eval_recall": 0.5471222222222222, "eval_runtime": 11714.0833, "eval_samples_per_second": 15.366, "eval_steps_per_second": 0.48, "step": 235000 }, { "epoch": 4.5717063684978125, "grad_norm": 6.40280818939209, "learning_rate": 2.7178102189781024e-05, "loss": 1.5187, "step": 235100 }, { "epoch": 4.5736509479824985, "grad_norm": 2.3679280281066895, "learning_rate": 2.7168369829683697e-05, "loss": 1.5496, "step": 235200 }, { "epoch": 4.575595527467185, "grad_norm": 5.676403999328613, "learning_rate": 2.7158637469586374e-05, "loss": 1.4345, "step": 235300 }, { "epoch": 4.577540106951871, "grad_norm": 2.772041082382202, "learning_rate": 2.7148905109489055e-05, "loss": 1.5532, "step": 235400 }, { "epoch": 4.579484686436558, "grad_norm": 3.710331916809082, "learning_rate": 2.713917274939173e-05, "loss": 1.2826, "step": 235500 }, { "epoch": 4.581429265921244, "grad_norm": 4.785013198852539, "learning_rate": 2.7129440389294402e-05, "loss": 1.4137, "step": 235600 }, { "epoch": 4.583373845405931, "grad_norm": 2.954562187194824, "learning_rate": 2.7119708029197082e-05, "loss": 1.4616, "step": 235700 }, { "epoch": 4.585318424890618, "grad_norm": 3.643867015838623, "learning_rate": 2.7109975669099756e-05, "loss": 1.4429, "step": 235800 }, { "epoch": 4.587263004375304, "grad_norm": 4.291001796722412, "learning_rate": 2.7100243309002433e-05, "loss": 1.5353, "step": 235900 }, { "epoch": 4.589207583859991, "grad_norm": 2.342255115509033, "learning_rate": 2.7090510948905114e-05, "loss": 1.3723, "step": 236000 }, { "epoch": 4.591152163344677, "grad_norm": 2.4709248542785645, "learning_rate": 2.7080778588807787e-05, "loss": 1.512, "step": 236100 }, { "epoch": 4.593096742829363, "grad_norm": 7.077822208404541, "learning_rate": 2.707104622871046e-05, "loss": 1.4896, "step": 236200 }, { "epoch": 4.595041322314049, "grad_norm": 2.669619560241699, "learning_rate": 2.706131386861314e-05, "loss": 1.4851, "step": 236300 }, { "epoch": 4.596985901798736, "grad_norm": 3.309354782104492, "learning_rate": 2.7051581508515815e-05, "loss": 1.5101, "step": 236400 }, { "epoch": 4.598930481283422, "grad_norm": 3.01458740234375, "learning_rate": 2.7041849148418492e-05, "loss": 1.3648, "step": 236500 }, { "epoch": 4.600875060768109, "grad_norm": 5.464343070983887, "learning_rate": 2.7032116788321172e-05, "loss": 1.6152, "step": 236600 }, { "epoch": 4.602819640252795, "grad_norm": 4.087784290313721, "learning_rate": 2.7022384428223846e-05, "loss": 1.4791, "step": 236700 }, { "epoch": 4.604764219737482, "grad_norm": 4.71937894821167, "learning_rate": 2.701265206812652e-05, "loss": 1.4505, "step": 236800 }, { "epoch": 4.606708799222169, "grad_norm": 7.360558986663818, "learning_rate": 2.70029197080292e-05, "loss": 1.5649, "step": 236900 }, { "epoch": 4.608653378706855, "grad_norm": 5.757205963134766, "learning_rate": 2.6993187347931877e-05, "loss": 1.4734, "step": 237000 }, { "epoch": 4.610597958191541, "grad_norm": 4.226054668426514, "learning_rate": 2.698345498783455e-05, "loss": 1.5006, "step": 237100 }, { "epoch": 4.6125425376762275, "grad_norm": 12.99587631225586, "learning_rate": 2.697372262773723e-05, "loss": 1.5057, "step": 237200 }, { "epoch": 4.614487117160914, "grad_norm": 5.095037937164307, "learning_rate": 2.6963990267639905e-05, "loss": 1.5026, "step": 237300 }, { "epoch": 4.6164316966456, "grad_norm": 5.279080390930176, "learning_rate": 2.6954257907542578e-05, "loss": 1.3931, "step": 237400 }, { "epoch": 4.618376276130287, "grad_norm": 3.2744061946868896, "learning_rate": 2.6944525547445255e-05, "loss": 1.4866, "step": 237500 }, { "epoch": 4.620320855614973, "grad_norm": 5.751636028289795, "learning_rate": 2.6934793187347936e-05, "loss": 1.4066, "step": 237600 }, { "epoch": 4.62226543509966, "grad_norm": 2.9270834922790527, "learning_rate": 2.692506082725061e-05, "loss": 1.4959, "step": 237700 }, { "epoch": 4.624210014584346, "grad_norm": 5.389616966247559, "learning_rate": 2.6915328467153283e-05, "loss": 1.4604, "step": 237800 }, { "epoch": 4.626154594069033, "grad_norm": 3.4464709758758545, "learning_rate": 2.6905596107055963e-05, "loss": 1.6537, "step": 237900 }, { "epoch": 4.628099173553719, "grad_norm": 3.417886972427368, "learning_rate": 2.6895863746958637e-05, "loss": 1.5253, "step": 238000 }, { "epoch": 4.6300437530384055, "grad_norm": 6.820400238037109, "learning_rate": 2.6886131386861314e-05, "loss": 1.3836, "step": 238100 }, { "epoch": 4.6319883325230915, "grad_norm": 6.657670974731445, "learning_rate": 2.6876399026763994e-05, "loss": 1.3866, "step": 238200 }, { "epoch": 4.633932912007778, "grad_norm": 10.50017261505127, "learning_rate": 2.6866666666666668e-05, "loss": 1.4642, "step": 238300 }, { "epoch": 4.635877491492465, "grad_norm": 6.541896820068359, "learning_rate": 2.6856934306569342e-05, "loss": 1.3331, "step": 238400 }, { "epoch": 4.637822070977151, "grad_norm": 3.170313835144043, "learning_rate": 2.6847201946472022e-05, "loss": 1.5044, "step": 238500 }, { "epoch": 4.639766650461838, "grad_norm": 13.83486270904541, "learning_rate": 2.6837469586374696e-05, "loss": 1.4219, "step": 238600 }, { "epoch": 4.641711229946524, "grad_norm": 3.8292407989501953, "learning_rate": 2.6827737226277373e-05, "loss": 1.4575, "step": 238700 }, { "epoch": 4.643655809431211, "grad_norm": 5.290345668792725, "learning_rate": 2.6818004866180053e-05, "loss": 1.492, "step": 238800 }, { "epoch": 4.645600388915897, "grad_norm": 3.1484975814819336, "learning_rate": 2.6808272506082727e-05, "loss": 1.4739, "step": 238900 }, { "epoch": 4.647544968400584, "grad_norm": 5.140705108642578, "learning_rate": 2.6798637469586374e-05, "loss": 1.4461, "step": 239000 }, { "epoch": 4.64948954788527, "grad_norm": 8.56822395324707, "learning_rate": 2.6788905109489055e-05, "loss": 1.4997, "step": 239100 }, { "epoch": 4.651434127369956, "grad_norm": 3.5415382385253906, "learning_rate": 2.677917274939173e-05, "loss": 1.6089, "step": 239200 }, { "epoch": 4.653378706854642, "grad_norm": 11.624340057373047, "learning_rate": 2.6769440389294402e-05, "loss": 1.4626, "step": 239300 }, { "epoch": 4.655323286339329, "grad_norm": 2.458104372024536, "learning_rate": 2.6759708029197083e-05, "loss": 1.4357, "step": 239400 }, { "epoch": 4.657267865824015, "grad_norm": 7.871074676513672, "learning_rate": 2.674997566909976e-05, "loss": 1.5079, "step": 239500 }, { "epoch": 4.659212445308702, "grad_norm": 3.0604541301727295, "learning_rate": 2.6740243309002433e-05, "loss": 1.3862, "step": 239600 }, { "epoch": 4.661157024793388, "grad_norm": 3.324070692062378, "learning_rate": 2.6730510948905114e-05, "loss": 1.3992, "step": 239700 }, { "epoch": 4.663101604278075, "grad_norm": 5.404443740844727, "learning_rate": 2.6720778588807787e-05, "loss": 1.4592, "step": 239800 }, { "epoch": 4.665046183762762, "grad_norm": 1.7681089639663696, "learning_rate": 2.671104622871046e-05, "loss": 1.3657, "step": 239900 }, { "epoch": 4.666990763247448, "grad_norm": 5.547670364379883, "learning_rate": 2.670131386861314e-05, "loss": 1.5177, "step": 240000 }, { "epoch": 4.666990763247448, "eval_accuracy": 0.5576944444444445, "eval_f1": 0.5449438486575827, "eval_loss": 1.1786311864852905, "eval_precision": 0.5665888971491637, "eval_recall": 0.5576944444444444, "eval_runtime": 11712.9685, "eval_samples_per_second": 15.368, "eval_steps_per_second": 0.48, "step": 240000 }, { "epoch": 4.6689353427321345, "grad_norm": 2.8503894805908203, "learning_rate": 2.6691581508515818e-05, "loss": 1.4892, "step": 240100 }, { "epoch": 4.6708799222168205, "grad_norm": 2.3800458908081055, "learning_rate": 2.6681849148418492e-05, "loss": 1.4774, "step": 240200 }, { "epoch": 4.672824501701507, "grad_norm": 3.9526116847991943, "learning_rate": 2.6672116788321166e-05, "loss": 1.467, "step": 240300 }, { "epoch": 4.674769081186193, "grad_norm": 3.0676004886627197, "learning_rate": 2.6662384428223846e-05, "loss": 1.5104, "step": 240400 }, { "epoch": 4.67671366067088, "grad_norm": 4.533634662628174, "learning_rate": 2.665265206812652e-05, "loss": 1.5331, "step": 240500 }, { "epoch": 4.678658240155566, "grad_norm": 3.693882942199707, "learning_rate": 2.6642919708029197e-05, "loss": 1.4877, "step": 240600 }, { "epoch": 4.680602819640253, "grad_norm": 25.726377487182617, "learning_rate": 2.6633187347931877e-05, "loss": 1.4384, "step": 240700 }, { "epoch": 4.682547399124939, "grad_norm": 15.462388038635254, "learning_rate": 2.662345498783455e-05, "loss": 1.4788, "step": 240800 }, { "epoch": 4.684491978609626, "grad_norm": 5.781219959259033, "learning_rate": 2.6613722627737224e-05, "loss": 1.3839, "step": 240900 }, { "epoch": 4.686436558094313, "grad_norm": 4.054870128631592, "learning_rate": 2.660408759124088e-05, "loss": 1.3523, "step": 241000 }, { "epoch": 4.6883811375789985, "grad_norm": 4.884646892547607, "learning_rate": 2.6594355231143552e-05, "loss": 1.5321, "step": 241100 }, { "epoch": 4.6903257170636845, "grad_norm": 8.619452476501465, "learning_rate": 2.6584622871046226e-05, "loss": 1.5687, "step": 241200 }, { "epoch": 4.692270296548371, "grad_norm": 9.62576961517334, "learning_rate": 2.6574890510948906e-05, "loss": 1.5858, "step": 241300 }, { "epoch": 4.694214876033058, "grad_norm": 5.952028274536133, "learning_rate": 2.6565158150851583e-05, "loss": 1.3569, "step": 241400 }, { "epoch": 4.696159455517744, "grad_norm": 5.10033655166626, "learning_rate": 2.6555425790754257e-05, "loss": 1.611, "step": 241500 }, { "epoch": 4.698104035002431, "grad_norm": 4.018094539642334, "learning_rate": 2.6545693430656937e-05, "loss": 1.5503, "step": 241600 }, { "epoch": 4.700048614487117, "grad_norm": 4.961721420288086, "learning_rate": 2.653596107055961e-05, "loss": 1.4409, "step": 241700 }, { "epoch": 4.701993193971804, "grad_norm": 6.249082565307617, "learning_rate": 2.6526228710462285e-05, "loss": 1.5186, "step": 241800 }, { "epoch": 4.70393777345649, "grad_norm": 3.082526683807373, "learning_rate": 2.6516496350364965e-05, "loss": 1.4359, "step": 241900 }, { "epoch": 4.705882352941177, "grad_norm": 4.089860916137695, "learning_rate": 2.6506763990267642e-05, "loss": 1.443, "step": 242000 }, { "epoch": 4.707826932425863, "grad_norm": 3.7910940647125244, "learning_rate": 2.6497031630170316e-05, "loss": 1.3707, "step": 242100 }, { "epoch": 4.709771511910549, "grad_norm": 2.6887528896331787, "learning_rate": 2.6487299270072996e-05, "loss": 1.4039, "step": 242200 }, { "epoch": 4.711716091395235, "grad_norm": 3.91471266746521, "learning_rate": 2.647756690997567e-05, "loss": 1.6141, "step": 242300 }, { "epoch": 4.713660670879922, "grad_norm": 2.9684178829193115, "learning_rate": 2.6467834549878347e-05, "loss": 1.3437, "step": 242400 }, { "epoch": 4.715605250364609, "grad_norm": 2.5829975605010986, "learning_rate": 2.6458102189781024e-05, "loss": 1.5536, "step": 242500 }, { "epoch": 4.717549829849295, "grad_norm": 9.728389739990234, "learning_rate": 2.64483698296837e-05, "loss": 1.4815, "step": 242600 }, { "epoch": 4.719494409333982, "grad_norm": 9.416900634765625, "learning_rate": 2.6438637469586375e-05, "loss": 1.5379, "step": 242700 }, { "epoch": 4.721438988818668, "grad_norm": 2.65329909324646, "learning_rate": 2.6428905109489055e-05, "loss": 1.4172, "step": 242800 }, { "epoch": 4.723383568303355, "grad_norm": 8.600390434265137, "learning_rate": 2.641917274939173e-05, "loss": 1.4829, "step": 242900 }, { "epoch": 4.725328147788041, "grad_norm": 8.547297477722168, "learning_rate": 2.6409440389294406e-05, "loss": 1.3964, "step": 243000 }, { "epoch": 4.7272727272727275, "grad_norm": 9.632619857788086, "learning_rate": 2.6399805352798057e-05, "loss": 1.5101, "step": 243100 }, { "epoch": 4.7292173067574135, "grad_norm": 5.757220268249512, "learning_rate": 2.639007299270073e-05, "loss": 1.475, "step": 243200 }, { "epoch": 4.7311618862421, "grad_norm": 6.191256999969482, "learning_rate": 2.6380340632603407e-05, "loss": 1.5029, "step": 243300 }, { "epoch": 4.733106465726786, "grad_norm": 4.132471561431885, "learning_rate": 2.6370608272506088e-05, "loss": 1.6582, "step": 243400 }, { "epoch": 4.735051045211473, "grad_norm": 4.092419147491455, "learning_rate": 2.636087591240876e-05, "loss": 1.3814, "step": 243500 }, { "epoch": 4.736995624696159, "grad_norm": 8.791953086853027, "learning_rate": 2.6351143552311435e-05, "loss": 1.4239, "step": 243600 }, { "epoch": 4.738940204180846, "grad_norm": 3.6262059211730957, "learning_rate": 2.6341411192214112e-05, "loss": 1.4975, "step": 243700 }, { "epoch": 4.740884783665532, "grad_norm": 8.331620216369629, "learning_rate": 2.633167883211679e-05, "loss": 1.5299, "step": 243800 }, { "epoch": 4.742829363150219, "grad_norm": 4.869324684143066, "learning_rate": 2.6321946472019466e-05, "loss": 1.5315, "step": 243900 }, { "epoch": 4.744773942634906, "grad_norm": 3.131413698196411, "learning_rate": 2.631221411192214e-05, "loss": 1.5601, "step": 244000 }, { "epoch": 4.7467185221195916, "grad_norm": 3.556523561477661, "learning_rate": 2.630248175182482e-05, "loss": 1.5311, "step": 244100 }, { "epoch": 4.748663101604278, "grad_norm": 10.839997291564941, "learning_rate": 2.6292749391727494e-05, "loss": 1.5046, "step": 244200 }, { "epoch": 4.750607681088964, "grad_norm": 6.0826215744018555, "learning_rate": 2.628301703163017e-05, "loss": 1.3923, "step": 244300 }, { "epoch": 4.752552260573651, "grad_norm": 3.4690515995025635, "learning_rate": 2.627328467153285e-05, "loss": 1.4011, "step": 244400 }, { "epoch": 4.754496840058337, "grad_norm": 6.069045543670654, "learning_rate": 2.6263552311435525e-05, "loss": 1.499, "step": 244500 }, { "epoch": 4.756441419543024, "grad_norm": 4.443669319152832, "learning_rate": 2.62538199513382e-05, "loss": 1.4707, "step": 244600 }, { "epoch": 4.75838599902771, "grad_norm": 4.9564433097839355, "learning_rate": 2.624408759124088e-05, "loss": 1.4524, "step": 244700 }, { "epoch": 4.760330578512397, "grad_norm": 6.776137351989746, "learning_rate": 2.6234355231143552e-05, "loss": 1.5894, "step": 244800 }, { "epoch": 4.762275157997083, "grad_norm": 3.361548900604248, "learning_rate": 2.622462287104623e-05, "loss": 1.4195, "step": 244900 }, { "epoch": 4.76421973748177, "grad_norm": 4.041417598724365, "learning_rate": 2.621489051094891e-05, "loss": 1.5508, "step": 245000 }, { "epoch": 4.76421973748177, "eval_accuracy": 0.5540388888888889, "eval_f1": 0.552116460941403, "eval_loss": 1.1771687269210815, "eval_precision": 0.5825573882705116, "eval_recall": 0.5540388888888889, "eval_runtime": 11717.1599, "eval_samples_per_second": 15.362, "eval_steps_per_second": 0.48, "step": 245000 }, { "epoch": 4.7661643169664565, "grad_norm": 5.422684192657471, "learning_rate": 2.6205255474452554e-05, "loss": 1.6842, "step": 245100 }, { "epoch": 4.768108896451142, "grad_norm": 4.991487979888916, "learning_rate": 2.619552311435523e-05, "loss": 1.5474, "step": 245200 }, { "epoch": 4.770053475935828, "grad_norm": 1.6011195182800293, "learning_rate": 2.618579075425791e-05, "loss": 1.4573, "step": 245300 }, { "epoch": 4.771998055420515, "grad_norm": 2.7792036533355713, "learning_rate": 2.6176058394160585e-05, "loss": 1.3838, "step": 245400 }, { "epoch": 4.773942634905202, "grad_norm": 1.9600093364715576, "learning_rate": 2.616632603406326e-05, "loss": 1.4597, "step": 245500 }, { "epoch": 4.775887214389888, "grad_norm": 6.187428951263428, "learning_rate": 2.615659367396594e-05, "loss": 1.4962, "step": 245600 }, { "epoch": 4.777831793874575, "grad_norm": 2.2613677978515625, "learning_rate": 2.6146861313868616e-05, "loss": 1.3246, "step": 245700 }, { "epoch": 4.779776373359261, "grad_norm": 6.302009105682373, "learning_rate": 2.613712895377129e-05, "loss": 1.5978, "step": 245800 }, { "epoch": 4.781720952843948, "grad_norm": 4.898573398590088, "learning_rate": 2.612739659367397e-05, "loss": 1.4493, "step": 245900 }, { "epoch": 4.783665532328634, "grad_norm": 2.8169052600860596, "learning_rate": 2.6117664233576644e-05, "loss": 1.4659, "step": 246000 }, { "epoch": 4.7856101118133205, "grad_norm": 2.385591983795166, "learning_rate": 2.6107931873479318e-05, "loss": 1.4595, "step": 246100 }, { "epoch": 4.7875546912980065, "grad_norm": 2.6297430992126465, "learning_rate": 2.6098199513381998e-05, "loss": 1.5334, "step": 246200 }, { "epoch": 4.789499270782693, "grad_norm": 8.23507022857666, "learning_rate": 2.6088467153284675e-05, "loss": 1.4353, "step": 246300 }, { "epoch": 4.791443850267379, "grad_norm": 4.439421653747559, "learning_rate": 2.607873479318735e-05, "loss": 1.3681, "step": 246400 }, { "epoch": 4.793388429752066, "grad_norm": 5.702634811401367, "learning_rate": 2.6069002433090022e-05, "loss": 1.5022, "step": 246500 }, { "epoch": 4.795333009236753, "grad_norm": 4.622781276702881, "learning_rate": 2.6059270072992703e-05, "loss": 1.4029, "step": 246600 }, { "epoch": 4.797277588721439, "grad_norm": 8.818670272827148, "learning_rate": 2.6049537712895376e-05, "loss": 1.5441, "step": 246700 }, { "epoch": 4.799222168206126, "grad_norm": 3.0306105613708496, "learning_rate": 2.6039805352798053e-05, "loss": 1.576, "step": 246800 }, { "epoch": 4.801166747690812, "grad_norm": 2.900210380554199, "learning_rate": 2.6030072992700734e-05, "loss": 1.5086, "step": 246900 }, { "epoch": 4.803111327175499, "grad_norm": 9.04820442199707, "learning_rate": 2.6020340632603407e-05, "loss": 1.5555, "step": 247000 }, { "epoch": 4.805055906660185, "grad_norm": 3.3256168365478516, "learning_rate": 2.601060827250608e-05, "loss": 1.5592, "step": 247100 }, { "epoch": 4.807000486144871, "grad_norm": 15.332719802856445, "learning_rate": 2.6000973236009735e-05, "loss": 1.6711, "step": 247200 }, { "epoch": 4.808945065629557, "grad_norm": 5.1250176429748535, "learning_rate": 2.5991338199513383e-05, "loss": 1.3747, "step": 247300 }, { "epoch": 4.810889645114244, "grad_norm": 2.5385282039642334, "learning_rate": 2.5981605839416064e-05, "loss": 1.605, "step": 247400 }, { "epoch": 4.81283422459893, "grad_norm": 3.944002628326416, "learning_rate": 2.5971873479318737e-05, "loss": 1.4318, "step": 247500 }, { "epoch": 4.814778804083617, "grad_norm": 3.233670234680176, "learning_rate": 2.596214111922141e-05, "loss": 1.4733, "step": 247600 }, { "epoch": 4.816723383568303, "grad_norm": 4.6006574630737305, "learning_rate": 2.5952408759124084e-05, "loss": 1.5092, "step": 247700 }, { "epoch": 4.81866796305299, "grad_norm": 4.528766632080078, "learning_rate": 2.5942676399026765e-05, "loss": 1.4776, "step": 247800 }, { "epoch": 4.820612542537676, "grad_norm": 1.4612284898757935, "learning_rate": 2.5932944038929442e-05, "loss": 1.3794, "step": 247900 }, { "epoch": 4.822557122022363, "grad_norm": 7.157467365264893, "learning_rate": 2.5923211678832115e-05, "loss": 1.3494, "step": 248000 }, { "epoch": 4.8245017015070495, "grad_norm": 1.5554066896438599, "learning_rate": 2.5913479318734796e-05, "loss": 1.3916, "step": 248100 }, { "epoch": 4.8264462809917354, "grad_norm": 4.972797393798828, "learning_rate": 2.590374695863747e-05, "loss": 1.4405, "step": 248200 }, { "epoch": 4.828390860476422, "grad_norm": 2.7133054733276367, "learning_rate": 2.5894014598540147e-05, "loss": 1.4897, "step": 248300 }, { "epoch": 4.830335439961108, "grad_norm": 4.524113655090332, "learning_rate": 2.5884282238442824e-05, "loss": 1.8203, "step": 248400 }, { "epoch": 4.832280019445795, "grad_norm": 4.720386981964111, "learning_rate": 2.58745498783455e-05, "loss": 1.6725, "step": 248500 }, { "epoch": 4.834224598930481, "grad_norm": 3.2266738414764404, "learning_rate": 2.5864817518248174e-05, "loss": 1.4564, "step": 248600 }, { "epoch": 4.836169178415168, "grad_norm": 7.124098300933838, "learning_rate": 2.5855085158150855e-05, "loss": 1.4303, "step": 248700 }, { "epoch": 4.838113757899854, "grad_norm": 6.179281711578369, "learning_rate": 2.5845352798053528e-05, "loss": 1.5385, "step": 248800 }, { "epoch": 4.840058337384541, "grad_norm": 3.0843794345855713, "learning_rate": 2.5835620437956205e-05, "loss": 1.3889, "step": 248900 }, { "epoch": 4.842002916869227, "grad_norm": 3.4473612308502197, "learning_rate": 2.5825888077858886e-05, "loss": 1.6578, "step": 249000 }, { "epoch": 4.8439474963539135, "grad_norm": 27.25227165222168, "learning_rate": 2.581615571776156e-05, "loss": 1.4201, "step": 249100 }, { "epoch": 4.8458920758386, "grad_norm": 6.179550647735596, "learning_rate": 2.5806423357664233e-05, "loss": 1.5088, "step": 249200 }, { "epoch": 4.847836655323286, "grad_norm": 4.156920909881592, "learning_rate": 2.5796690997566913e-05, "loss": 1.554, "step": 249300 }, { "epoch": 4.849781234807972, "grad_norm": 3.4863414764404297, "learning_rate": 2.5786958637469587e-05, "loss": 1.4663, "step": 249400 }, { "epoch": 4.851725814292659, "grad_norm": 2.5567269325256348, "learning_rate": 2.5777226277372264e-05, "loss": 1.3525, "step": 249500 }, { "epoch": 4.853670393777346, "grad_norm": 2.815678358078003, "learning_rate": 2.5767591240875915e-05, "loss": 1.4981, "step": 249600 }, { "epoch": 4.855614973262032, "grad_norm": 11.68917179107666, "learning_rate": 2.575785888077859e-05, "loss": 1.3487, "step": 249700 }, { "epoch": 4.857559552746719, "grad_norm": 14.061014175415039, "learning_rate": 2.5748126520681266e-05, "loss": 1.5112, "step": 249800 }, { "epoch": 4.859504132231405, "grad_norm": 4.65812349319458, "learning_rate": 2.5738394160583946e-05, "loss": 1.6709, "step": 249900 }, { "epoch": 4.861448711716092, "grad_norm": 6.203651428222656, "learning_rate": 2.572866180048662e-05, "loss": 1.4184, "step": 250000 }, { "epoch": 4.861448711716092, "eval_accuracy": 0.5581111111111111, "eval_f1": 0.5455398380289532, "eval_loss": 1.17731773853302, "eval_precision": 0.5682366281043744, "eval_recall": 0.5581111111111111, "eval_runtime": 11719.1602, "eval_samples_per_second": 15.359, "eval_steps_per_second": 0.48, "step": 250000 }, { "epoch": 4.863393291200778, "grad_norm": 5.0012922286987305, "learning_rate": 2.5718929440389293e-05, "loss": 1.4904, "step": 250100 }, { "epoch": 4.865337870685464, "grad_norm": 2.5600833892822266, "learning_rate": 2.5709197080291974e-05, "loss": 1.5196, "step": 250200 }, { "epoch": 4.86728245017015, "grad_norm": 5.5219197273254395, "learning_rate": 2.569946472019465e-05, "loss": 1.4614, "step": 250300 }, { "epoch": 4.869227029654837, "grad_norm": 12.131685256958008, "learning_rate": 2.5689732360097324e-05, "loss": 1.4744, "step": 250400 }, { "epoch": 4.871171609139523, "grad_norm": 2.964383602142334, "learning_rate": 2.5679999999999998e-05, "loss": 1.6283, "step": 250500 }, { "epoch": 4.87311618862421, "grad_norm": 5.234283447265625, "learning_rate": 2.567026763990268e-05, "loss": 1.39, "step": 250600 }, { "epoch": 4.875060768108897, "grad_norm": 3.9589762687683105, "learning_rate": 2.5660535279805352e-05, "loss": 1.5181, "step": 250700 }, { "epoch": 4.877005347593583, "grad_norm": 6.179830551147461, "learning_rate": 2.565080291970803e-05, "loss": 1.4251, "step": 250800 }, { "epoch": 4.87894992707827, "grad_norm": 6.89276123046875, "learning_rate": 2.564107055961071e-05, "loss": 1.4688, "step": 250900 }, { "epoch": 4.880894506562956, "grad_norm": 6.130283832550049, "learning_rate": 2.5631338199513383e-05, "loss": 1.6219, "step": 251000 }, { "epoch": 4.8828390860476425, "grad_norm": 8.941246032714844, "learning_rate": 2.5621605839416057e-05, "loss": 1.3492, "step": 251100 }, { "epoch": 4.8847836655323285, "grad_norm": 5.41383695602417, "learning_rate": 2.5611873479318737e-05, "loss": 1.5478, "step": 251200 }, { "epoch": 4.886728245017015, "grad_norm": 6.3910813331604, "learning_rate": 2.560214111922141e-05, "loss": 1.4713, "step": 251300 }, { "epoch": 4.888672824501701, "grad_norm": 3.200479745864868, "learning_rate": 2.5592408759124088e-05, "loss": 1.4085, "step": 251400 }, { "epoch": 4.890617403986388, "grad_norm": 3.22975492477417, "learning_rate": 2.5582676399026768e-05, "loss": 1.4219, "step": 251500 }, { "epoch": 4.892561983471074, "grad_norm": 3.6297600269317627, "learning_rate": 2.5572944038929442e-05, "loss": 1.6365, "step": 251600 }, { "epoch": 4.894506562955761, "grad_norm": 1.6490774154663086, "learning_rate": 2.5563211678832116e-05, "loss": 1.5039, "step": 251700 }, { "epoch": 4.896451142440447, "grad_norm": 4.049961566925049, "learning_rate": 2.5553479318734796e-05, "loss": 1.6418, "step": 251800 }, { "epoch": 4.898395721925134, "grad_norm": 5.3460540771484375, "learning_rate": 2.554374695863747e-05, "loss": 1.4746, "step": 251900 }, { "epoch": 4.90034030140982, "grad_norm": 7.2721381187438965, "learning_rate": 2.5534014598540147e-05, "loss": 1.3314, "step": 252000 }, { "epoch": 4.9022848808945065, "grad_norm": 2.9502756595611572, "learning_rate": 2.5524282238442827e-05, "loss": 1.3367, "step": 252100 }, { "epoch": 4.904229460379193, "grad_norm": 9.23670768737793, "learning_rate": 2.55145498783455e-05, "loss": 1.3972, "step": 252200 }, { "epoch": 4.906174039863879, "grad_norm": 1.9041876792907715, "learning_rate": 2.5504817518248174e-05, "loss": 1.3151, "step": 252300 }, { "epoch": 4.908118619348566, "grad_norm": 8.797319412231445, "learning_rate": 2.5495085158150855e-05, "loss": 1.3899, "step": 252400 }, { "epoch": 4.910063198833252, "grad_norm": 5.1339640617370605, "learning_rate": 2.5485352798053532e-05, "loss": 1.5509, "step": 252500 }, { "epoch": 4.912007778317939, "grad_norm": 4.311157703399658, "learning_rate": 2.5475620437956205e-05, "loss": 1.5386, "step": 252600 }, { "epoch": 4.913952357802625, "grad_norm": 6.039874076843262, "learning_rate": 2.5465985401459856e-05, "loss": 1.4182, "step": 252700 }, { "epoch": 4.915896937287312, "grad_norm": 5.611106872558594, "learning_rate": 2.5456253041362533e-05, "loss": 1.5019, "step": 252800 }, { "epoch": 4.917841516771998, "grad_norm": 7.3393425941467285, "learning_rate": 2.5446520681265207e-05, "loss": 1.3768, "step": 252900 }, { "epoch": 4.919786096256685, "grad_norm": 22.10979652404785, "learning_rate": 2.5436788321167887e-05, "loss": 1.6133, "step": 253000 }, { "epoch": 4.921730675741371, "grad_norm": 6.848813056945801, "learning_rate": 2.542705596107056e-05, "loss": 1.4133, "step": 253100 }, { "epoch": 4.923675255226057, "grad_norm": 4.489315032958984, "learning_rate": 2.5417323600973235e-05, "loss": 1.4254, "step": 253200 }, { "epoch": 4.925619834710744, "grad_norm": 2.8362836837768555, "learning_rate": 2.5407591240875912e-05, "loss": 1.4335, "step": 253300 }, { "epoch": 4.92756441419543, "grad_norm": 7.284862041473389, "learning_rate": 2.5397858880778592e-05, "loss": 1.4145, "step": 253400 }, { "epoch": 4.929508993680116, "grad_norm": 4.160228729248047, "learning_rate": 2.5388126520681266e-05, "loss": 1.4291, "step": 253500 }, { "epoch": 4.931453573164803, "grad_norm": 2.929344654083252, "learning_rate": 2.537839416058394e-05, "loss": 1.5565, "step": 253600 }, { "epoch": 4.93339815264949, "grad_norm": 8.32841682434082, "learning_rate": 2.536866180048662e-05, "loss": 1.3772, "step": 253700 }, { "epoch": 4.935342732134176, "grad_norm": 3.3223683834075928, "learning_rate": 2.5358929440389297e-05, "loss": 1.3959, "step": 253800 }, { "epoch": 4.937287311618863, "grad_norm": 6.1166534423828125, "learning_rate": 2.534919708029197e-05, "loss": 1.5383, "step": 253900 }, { "epoch": 4.939231891103549, "grad_norm": 2.3543148040771484, "learning_rate": 2.533946472019465e-05, "loss": 1.4782, "step": 254000 }, { "epoch": 4.9411764705882355, "grad_norm": 4.182586669921875, "learning_rate": 2.5329732360097325e-05, "loss": 1.497, "step": 254100 }, { "epoch": 4.9431210500729215, "grad_norm": 5.653079509735107, "learning_rate": 2.5319999999999998e-05, "loss": 1.3744, "step": 254200 }, { "epoch": 4.945065629557608, "grad_norm": 11.447168350219727, "learning_rate": 2.531026763990268e-05, "loss": 1.6058, "step": 254300 }, { "epoch": 4.947010209042294, "grad_norm": 8.359665870666504, "learning_rate": 2.5300535279805356e-05, "loss": 1.5291, "step": 254400 }, { "epoch": 4.948954788526981, "grad_norm": 4.541885852813721, "learning_rate": 2.529080291970803e-05, "loss": 1.4609, "step": 254500 }, { "epoch": 4.950899368011667, "grad_norm": 1.7998865842819214, "learning_rate": 2.528107055961071e-05, "loss": 1.5941, "step": 254600 }, { "epoch": 4.952843947496354, "grad_norm": 5.0234198570251465, "learning_rate": 2.5271338199513383e-05, "loss": 1.4317, "step": 254700 }, { "epoch": 4.954788526981041, "grad_norm": 6.936820983886719, "learning_rate": 2.5261605839416057e-05, "loss": 1.4628, "step": 254800 }, { "epoch": 4.956733106465727, "grad_norm": 7.862447261810303, "learning_rate": 2.5251873479318737e-05, "loss": 1.4611, "step": 254900 }, { "epoch": 4.958677685950414, "grad_norm": 4.7313761711120605, "learning_rate": 2.5242141119221414e-05, "loss": 1.5349, "step": 255000 }, { "epoch": 4.958677685950414, "eval_accuracy": 0.5580666666666667, "eval_f1": 0.54404665206941, "eval_loss": 1.182754635810852, "eval_precision": 0.5663425507960544, "eval_recall": 0.5580666666666666, "eval_runtime": 11719.0587, "eval_samples_per_second": 15.36, "eval_steps_per_second": 0.48, "step": 255000 }, { "epoch": 4.9606222654350995, "grad_norm": 6.746758937835693, "learning_rate": 2.5232408759124088e-05, "loss": 1.3973, "step": 255100 }, { "epoch": 4.962566844919786, "grad_norm": 16.954763412475586, "learning_rate": 2.522267639902677e-05, "loss": 1.3992, "step": 255200 }, { "epoch": 4.964511424404472, "grad_norm": 4.710229873657227, "learning_rate": 2.5212944038929442e-05, "loss": 1.371, "step": 255300 }, { "epoch": 4.966456003889159, "grad_norm": 5.522642135620117, "learning_rate": 2.5203211678832116e-05, "loss": 1.4588, "step": 255400 }, { "epoch": 4.968400583373845, "grad_norm": 4.972048759460449, "learning_rate": 2.5193479318734793e-05, "loss": 1.481, "step": 255500 }, { "epoch": 4.970345162858532, "grad_norm": 3.41739559173584, "learning_rate": 2.5183746958637473e-05, "loss": 1.5271, "step": 255600 }, { "epoch": 4.972289742343218, "grad_norm": 2.833813190460205, "learning_rate": 2.5174014598540147e-05, "loss": 1.4656, "step": 255700 }, { "epoch": 4.974234321827905, "grad_norm": 3.1349380016326904, "learning_rate": 2.516428223844282e-05, "loss": 1.3787, "step": 255800 }, { "epoch": 4.976178901312591, "grad_norm": 9.626154899597168, "learning_rate": 2.51545498783455e-05, "loss": 1.4593, "step": 255900 }, { "epoch": 4.978123480797278, "grad_norm": 12.39770793914795, "learning_rate": 2.5144817518248174e-05, "loss": 1.386, "step": 256000 }, { "epoch": 4.980068060281964, "grad_norm": 4.065727710723877, "learning_rate": 2.513508515815085e-05, "loss": 1.3366, "step": 256100 }, { "epoch": 4.98201263976665, "grad_norm": 5.28975248336792, "learning_rate": 2.5125352798053532e-05, "loss": 1.5205, "step": 256200 }, { "epoch": 4.983957219251337, "grad_norm": 2.7753102779388428, "learning_rate": 2.5115620437956205e-05, "loss": 1.383, "step": 256300 }, { "epoch": 4.985901798736023, "grad_norm": 5.37788200378418, "learning_rate": 2.510588807785888e-05, "loss": 1.3535, "step": 256400 }, { "epoch": 4.98784637822071, "grad_norm": 3.9174797534942627, "learning_rate": 2.509615571776156e-05, "loss": 1.4372, "step": 256500 }, { "epoch": 4.989790957705396, "grad_norm": 5.285958290100098, "learning_rate": 2.5086423357664236e-05, "loss": 1.6909, "step": 256600 }, { "epoch": 4.991735537190083, "grad_norm": 5.402735233306885, "learning_rate": 2.507678832116788e-05, "loss": 1.2989, "step": 256700 }, { "epoch": 4.993680116674769, "grad_norm": 3.8525049686431885, "learning_rate": 2.506705596107056e-05, "loss": 1.388, "step": 256800 }, { "epoch": 4.995624696159456, "grad_norm": 5.727946758270264, "learning_rate": 2.5057323600973238e-05, "loss": 1.6115, "step": 256900 }, { "epoch": 4.997569275644142, "grad_norm": 3.5692007541656494, "learning_rate": 2.5047591240875912e-05, "loss": 1.4822, "step": 257000 }, { "epoch": 4.9995138551288285, "grad_norm": 3.9572174549102783, "learning_rate": 2.5037858880778592e-05, "loss": 1.3711, "step": 257100 }, { "epoch": 5.0014584346135145, "grad_norm": 11.468111991882324, "learning_rate": 2.5028126520681266e-05, "loss": 1.5927, "step": 257200 }, { "epoch": 5.003403014098201, "grad_norm": 4.317477226257324, "learning_rate": 2.501839416058394e-05, "loss": 1.3995, "step": 257300 }, { "epoch": 5.005347593582887, "grad_norm": 5.14781379699707, "learning_rate": 2.500866180048662e-05, "loss": 1.4078, "step": 257400 }, { "epoch": 5.007292173067574, "grad_norm": 2.6109910011291504, "learning_rate": 2.4998929440389297e-05, "loss": 1.4328, "step": 257500 }, { "epoch": 5.009236752552261, "grad_norm": 3.973421335220337, "learning_rate": 2.4989197080291974e-05, "loss": 1.4433, "step": 257600 }, { "epoch": 5.011181332036947, "grad_norm": 3.8181326389312744, "learning_rate": 2.4979464720194648e-05, "loss": 1.3777, "step": 257700 }, { "epoch": 5.013125911521634, "grad_norm": 5.794046401977539, "learning_rate": 2.4969732360097325e-05, "loss": 1.5924, "step": 257800 }, { "epoch": 5.01507049100632, "grad_norm": 3.973275661468506, "learning_rate": 2.496e-05, "loss": 1.5143, "step": 257900 }, { "epoch": 5.017015070491007, "grad_norm": 2.793217897415161, "learning_rate": 2.495026763990268e-05, "loss": 1.4409, "step": 258000 }, { "epoch": 5.0189596499756925, "grad_norm": 2.4957807064056396, "learning_rate": 2.4940535279805356e-05, "loss": 1.5887, "step": 258100 }, { "epoch": 5.020904229460379, "grad_norm": 2.9778048992156982, "learning_rate": 2.493080291970803e-05, "loss": 1.7094, "step": 258200 }, { "epoch": 5.022848808945065, "grad_norm": 2.7532997131347656, "learning_rate": 2.4921070559610706e-05, "loss": 1.3997, "step": 258300 }, { "epoch": 5.024793388429752, "grad_norm": 4.195626258850098, "learning_rate": 2.4911338199513383e-05, "loss": 1.541, "step": 258400 }, { "epoch": 5.026737967914438, "grad_norm": 3.9751064777374268, "learning_rate": 2.490160583941606e-05, "loss": 1.3659, "step": 258500 }, { "epoch": 5.028682547399125, "grad_norm": 3.6971523761749268, "learning_rate": 2.4891873479318737e-05, "loss": 1.5034, "step": 258600 }, { "epoch": 5.030627126883811, "grad_norm": 3.963543176651001, "learning_rate": 2.4882238442822385e-05, "loss": 1.463, "step": 258700 }, { "epoch": 5.032571706368498, "grad_norm": 3.5563809871673584, "learning_rate": 2.4872506082725062e-05, "loss": 1.5607, "step": 258800 }, { "epoch": 5.034516285853185, "grad_norm": 3.849820613861084, "learning_rate": 2.486277372262774e-05, "loss": 1.3993, "step": 258900 }, { "epoch": 5.036460865337871, "grad_norm": 5.88995361328125, "learning_rate": 2.4853041362530416e-05, "loss": 1.5334, "step": 259000 }, { "epoch": 5.0384054448225575, "grad_norm": 4.109772205352783, "learning_rate": 2.484330900243309e-05, "loss": 1.4273, "step": 259100 }, { "epoch": 5.040350024307243, "grad_norm": 4.218865394592285, "learning_rate": 2.4833576642335767e-05, "loss": 1.4153, "step": 259200 }, { "epoch": 5.04229460379193, "grad_norm": 4.842090129852295, "learning_rate": 2.4823844282238444e-05, "loss": 1.4471, "step": 259300 }, { "epoch": 5.044239183276616, "grad_norm": 5.006601333618164, "learning_rate": 2.481411192214112e-05, "loss": 1.3642, "step": 259400 }, { "epoch": 5.046183762761303, "grad_norm": 5.284773826599121, "learning_rate": 2.4804379562043798e-05, "loss": 1.379, "step": 259500 }, { "epoch": 5.048128342245989, "grad_norm": 5.087965965270996, "learning_rate": 2.479464720194647e-05, "loss": 1.5498, "step": 259600 }, { "epoch": 5.050072921730676, "grad_norm": 5.789027214050293, "learning_rate": 2.478491484184915e-05, "loss": 1.3596, "step": 259700 }, { "epoch": 5.052017501215362, "grad_norm": 4.288670063018799, "learning_rate": 2.4775182481751825e-05, "loss": 1.4555, "step": 259800 }, { "epoch": 5.053962080700049, "grad_norm": 3.712052822113037, "learning_rate": 2.4765450121654502e-05, "loss": 1.5655, "step": 259900 }, { "epoch": 5.055906660184735, "grad_norm": 4.867649078369141, "learning_rate": 2.475571776155718e-05, "loss": 1.4414, "step": 260000 }, { "epoch": 5.055906660184735, "eval_accuracy": 0.5536222222222222, "eval_f1": 0.5437361391706091, "eval_loss": 1.1803556680679321, "eval_precision": 0.569869638175712, "eval_recall": 0.5536222222222222, "eval_runtime": 11725.6384, "eval_samples_per_second": 15.351, "eval_steps_per_second": 0.48, "step": 260000 }, { "epoch": 5.0578512396694215, "grad_norm": 6.374873638153076, "learning_rate": 2.4745985401459857e-05, "loss": 1.5641, "step": 260100 }, { "epoch": 5.0597958191541075, "grad_norm": 12.739728927612305, "learning_rate": 2.473625304136253e-05, "loss": 1.482, "step": 260200 }, { "epoch": 5.061740398638794, "grad_norm": 3.9438045024871826, "learning_rate": 2.4726520681265207e-05, "loss": 1.3994, "step": 260300 }, { "epoch": 5.063684978123481, "grad_norm": 4.394181251525879, "learning_rate": 2.4716788321167884e-05, "loss": 1.3633, "step": 260400 }, { "epoch": 5.065629557608167, "grad_norm": 5.601223945617676, "learning_rate": 2.470705596107056e-05, "loss": 1.4483, "step": 260500 }, { "epoch": 5.067574137092854, "grad_norm": 14.382640838623047, "learning_rate": 2.4697323600973238e-05, "loss": 1.4632, "step": 260600 }, { "epoch": 5.06951871657754, "grad_norm": 4.253689289093018, "learning_rate": 2.468768856447689e-05, "loss": 1.4791, "step": 260700 }, { "epoch": 5.071463296062227, "grad_norm": 7.792820453643799, "learning_rate": 2.4677956204379563e-05, "loss": 1.7163, "step": 260800 }, { "epoch": 5.073407875546913, "grad_norm": 4.818062782287598, "learning_rate": 2.466822384428224e-05, "loss": 1.5378, "step": 260900 }, { "epoch": 5.0753524550316, "grad_norm": 9.838109970092773, "learning_rate": 2.4658491484184917e-05, "loss": 1.4594, "step": 261000 }, { "epoch": 5.077297034516286, "grad_norm": 3.8644328117370605, "learning_rate": 2.464875912408759e-05, "loss": 1.633, "step": 261100 }, { "epoch": 5.079241614000972, "grad_norm": 3.3318889141082764, "learning_rate": 2.463902676399027e-05, "loss": 1.5204, "step": 261200 }, { "epoch": 5.081186193485658, "grad_norm": 3.6050684452056885, "learning_rate": 2.4629294403892945e-05, "loss": 1.552, "step": 261300 }, { "epoch": 5.083130772970345, "grad_norm": 3.938458204269409, "learning_rate": 2.461956204379562e-05, "loss": 1.4175, "step": 261400 }, { "epoch": 5.085075352455031, "grad_norm": 4.488956451416016, "learning_rate": 2.46098296836983e-05, "loss": 1.5761, "step": 261500 }, { "epoch": 5.087019931939718, "grad_norm": 4.0630011558532715, "learning_rate": 2.4600097323600972e-05, "loss": 1.6612, "step": 261600 }, { "epoch": 5.088964511424405, "grad_norm": 3.092634916305542, "learning_rate": 2.459036496350365e-05, "loss": 1.4771, "step": 261700 }, { "epoch": 5.090909090909091, "grad_norm": 2.53788161277771, "learning_rate": 2.458063260340633e-05, "loss": 1.4406, "step": 261800 }, { "epoch": 5.092853670393778, "grad_norm": 3.22296404838562, "learning_rate": 2.4570900243309003e-05, "loss": 1.6901, "step": 261900 }, { "epoch": 5.094798249878464, "grad_norm": 5.6319780349731445, "learning_rate": 2.456116788321168e-05, "loss": 1.5166, "step": 262000 }, { "epoch": 5.0967428293631505, "grad_norm": 1.6766849756240845, "learning_rate": 2.4551435523114357e-05, "loss": 1.5255, "step": 262100 }, { "epoch": 5.0986874088478364, "grad_norm": 10.2339448928833, "learning_rate": 2.454170316301703e-05, "loss": 1.6419, "step": 262200 }, { "epoch": 5.100631988332523, "grad_norm": 2.3163421154022217, "learning_rate": 2.453197080291971e-05, "loss": 1.4535, "step": 262300 }, { "epoch": 5.102576567817209, "grad_norm": 11.211511611938477, "learning_rate": 2.4522238442822385e-05, "loss": 1.4573, "step": 262400 }, { "epoch": 5.104521147301896, "grad_norm": 1.4274566173553467, "learning_rate": 2.4512506082725062e-05, "loss": 1.7048, "step": 262500 }, { "epoch": 5.106465726786582, "grad_norm": 5.561498165130615, "learning_rate": 2.450277372262774e-05, "loss": 1.519, "step": 262600 }, { "epoch": 5.108410306271269, "grad_norm": 5.480257034301758, "learning_rate": 2.449313868613139e-05, "loss": 1.4355, "step": 262700 }, { "epoch": 5.110354885755955, "grad_norm": 6.463021278381348, "learning_rate": 2.4483406326034064e-05, "loss": 1.3991, "step": 262800 }, { "epoch": 5.112299465240642, "grad_norm": 3.633103370666504, "learning_rate": 2.447367396593674e-05, "loss": 1.3956, "step": 262900 }, { "epoch": 5.114244044725329, "grad_norm": 10.460658073425293, "learning_rate": 2.4463941605839414e-05, "loss": 1.4874, "step": 263000 }, { "epoch": 5.1161886242100145, "grad_norm": 3.401123523712158, "learning_rate": 2.4454209245742095e-05, "loss": 1.5113, "step": 263100 }, { "epoch": 5.118133203694701, "grad_norm": 3.2501962184906006, "learning_rate": 2.4444476885644772e-05, "loss": 1.3982, "step": 263200 }, { "epoch": 5.120077783179387, "grad_norm": 3.382086992263794, "learning_rate": 2.4434744525547446e-05, "loss": 1.4756, "step": 263300 }, { "epoch": 5.122022362664074, "grad_norm": 1.6874250173568726, "learning_rate": 2.4425012165450123e-05, "loss": 1.3393, "step": 263400 }, { "epoch": 5.12396694214876, "grad_norm": 9.720108032226562, "learning_rate": 2.44152798053528e-05, "loss": 1.3426, "step": 263500 }, { "epoch": 5.125911521633447, "grad_norm": 5.037644863128662, "learning_rate": 2.4405547445255477e-05, "loss": 1.3956, "step": 263600 }, { "epoch": 5.127856101118133, "grad_norm": 3.0464375019073486, "learning_rate": 2.4395815085158154e-05, "loss": 1.4679, "step": 263700 }, { "epoch": 5.12980068060282, "grad_norm": 2.5046274662017822, "learning_rate": 2.438608272506083e-05, "loss": 1.4338, "step": 263800 }, { "epoch": 5.131745260087506, "grad_norm": 6.054426193237305, "learning_rate": 2.4376350364963504e-05, "loss": 1.599, "step": 263900 }, { "epoch": 5.133689839572193, "grad_norm": 3.765808582305908, "learning_rate": 2.436661800486618e-05, "loss": 1.457, "step": 264000 }, { "epoch": 5.135634419056879, "grad_norm": 4.9228739738464355, "learning_rate": 2.4356885644768855e-05, "loss": 1.5805, "step": 264100 }, { "epoch": 5.137578998541565, "grad_norm": 3.5227766036987305, "learning_rate": 2.4347153284671535e-05, "loss": 1.6274, "step": 264200 }, { "epoch": 5.139523578026251, "grad_norm": 8.999431610107422, "learning_rate": 2.4337420924574212e-05, "loss": 1.4885, "step": 264300 }, { "epoch": 5.141468157510938, "grad_norm": 4.823368072509766, "learning_rate": 2.4327688564476886e-05, "loss": 1.4285, "step": 264400 }, { "epoch": 5.143412736995625, "grad_norm": 4.559749126434326, "learning_rate": 2.4317956204379563e-05, "loss": 1.3241, "step": 264500 }, { "epoch": 5.145357316480311, "grad_norm": 5.257164001464844, "learning_rate": 2.430822384428224e-05, "loss": 1.5483, "step": 264600 }, { "epoch": 5.147301895964998, "grad_norm": 6.411623477935791, "learning_rate": 2.4298588807785888e-05, "loss": 1.4381, "step": 264700 }, { "epoch": 5.149246475449684, "grad_norm": 3.901271343231201, "learning_rate": 2.4288856447688565e-05, "loss": 1.4359, "step": 264800 }, { "epoch": 5.151191054934371, "grad_norm": 4.528796672821045, "learning_rate": 2.4279124087591242e-05, "loss": 1.4224, "step": 264900 }, { "epoch": 5.153135634419057, "grad_norm": 3.963549852371216, "learning_rate": 2.426939172749392e-05, "loss": 1.4374, "step": 265000 }, { "epoch": 5.153135634419057, "eval_accuracy": 0.5525388888888889, "eval_f1": 0.5355981691489363, "eval_loss": 1.1909781694412231, "eval_precision": 0.5576227771912778, "eval_recall": 0.5525388888888889, "eval_runtime": 11701.2854, "eval_samples_per_second": 15.383, "eval_steps_per_second": 0.481, "step": 265000 }, { "epoch": 5.1550802139037435, "grad_norm": 3.5167737007141113, "learning_rate": 2.4259659367396596e-05, "loss": 1.4337, "step": 265100 }, { "epoch": 5.1570247933884295, "grad_norm": 8.186117172241211, "learning_rate": 2.4249927007299273e-05, "loss": 1.7599, "step": 265200 }, { "epoch": 5.158969372873116, "grad_norm": 7.5996856689453125, "learning_rate": 2.4240194647201946e-05, "loss": 1.3919, "step": 265300 }, { "epoch": 5.160913952357802, "grad_norm": 15.185575485229492, "learning_rate": 2.4230462287104623e-05, "loss": 1.495, "step": 265400 }, { "epoch": 5.162858531842489, "grad_norm": 3.640678882598877, "learning_rate": 2.42207299270073e-05, "loss": 1.373, "step": 265500 }, { "epoch": 5.164803111327176, "grad_norm": 10.743998527526855, "learning_rate": 2.4210997566909977e-05, "loss": 1.4676, "step": 265600 }, { "epoch": 5.166747690811862, "grad_norm": 3.532264232635498, "learning_rate": 2.4201265206812654e-05, "loss": 1.5131, "step": 265700 }, { "epoch": 5.168692270296549, "grad_norm": 25.8144588470459, "learning_rate": 2.4191532846715328e-05, "loss": 1.5749, "step": 265800 }, { "epoch": 5.170636849781235, "grad_norm": 3.6721255779266357, "learning_rate": 2.4181800486618005e-05, "loss": 1.4434, "step": 265900 }, { "epoch": 5.172581429265922, "grad_norm": 19.927867889404297, "learning_rate": 2.4172068126520682e-05, "loss": 1.4397, "step": 266000 }, { "epoch": 5.1745260087506075, "grad_norm": 3.161503314971924, "learning_rate": 2.416233576642336e-05, "loss": 1.6085, "step": 266100 }, { "epoch": 5.176470588235294, "grad_norm": 4.129986763000488, "learning_rate": 2.4152603406326036e-05, "loss": 1.4468, "step": 266200 }, { "epoch": 5.17841516771998, "grad_norm": 4.216347694396973, "learning_rate": 2.4142871046228713e-05, "loss": 1.4303, "step": 266300 }, { "epoch": 5.180359747204667, "grad_norm": 1.5858263969421387, "learning_rate": 2.4133138686131387e-05, "loss": 1.4416, "step": 266400 }, { "epoch": 5.182304326689353, "grad_norm": 3.9937751293182373, "learning_rate": 2.4123406326034064e-05, "loss": 1.5954, "step": 266500 }, { "epoch": 5.18424890617404, "grad_norm": 10.878015518188477, "learning_rate": 2.411367396593674e-05, "loss": 1.6435, "step": 266600 }, { "epoch": 5.186193485658726, "grad_norm": 5.430533409118652, "learning_rate": 2.4103941605839418e-05, "loss": 1.5648, "step": 266700 }, { "epoch": 5.188138065143413, "grad_norm": 6.608767986297607, "learning_rate": 2.4094306569343066e-05, "loss": 1.5008, "step": 266800 }, { "epoch": 5.190082644628099, "grad_norm": 6.699178695678711, "learning_rate": 2.4084574209245746e-05, "loss": 1.472, "step": 266900 }, { "epoch": 5.192027224112786, "grad_norm": 3.6713945865631104, "learning_rate": 2.407484184914842e-05, "loss": 1.5474, "step": 267000 }, { "epoch": 5.1939718035974725, "grad_norm": 4.62306547164917, "learning_rate": 2.4065109489051097e-05, "loss": 1.4399, "step": 267100 }, { "epoch": 5.195916383082158, "grad_norm": 14.408308982849121, "learning_rate": 2.4055377128953774e-05, "loss": 1.4307, "step": 267200 }, { "epoch": 5.197860962566845, "grad_norm": 4.600755214691162, "learning_rate": 2.4045644768856447e-05, "loss": 1.5618, "step": 267300 }, { "epoch": 5.199805542051531, "grad_norm": 4.7075324058532715, "learning_rate": 2.4035912408759124e-05, "loss": 1.4883, "step": 267400 }, { "epoch": 5.201750121536218, "grad_norm": 3.9692747592926025, "learning_rate": 2.40261800486618e-05, "loss": 1.4923, "step": 267500 }, { "epoch": 5.203694701020904, "grad_norm": 2.6385278701782227, "learning_rate": 2.401644768856448e-05, "loss": 1.6057, "step": 267600 }, { "epoch": 5.205639280505591, "grad_norm": 4.855040550231934, "learning_rate": 2.4006715328467155e-05, "loss": 1.532, "step": 267700 }, { "epoch": 5.207583859990277, "grad_norm": 3.9068548679351807, "learning_rate": 2.399698296836983e-05, "loss": 1.38, "step": 267800 }, { "epoch": 5.209528439474964, "grad_norm": 3.8513622283935547, "learning_rate": 2.398734793187348e-05, "loss": 1.4693, "step": 267900 }, { "epoch": 5.21147301895965, "grad_norm": 8.953530311584473, "learning_rate": 2.3977615571776157e-05, "loss": 1.5034, "step": 268000 }, { "epoch": 5.2134175984443365, "grad_norm": 3.8073456287384033, "learning_rate": 2.396788321167883e-05, "loss": 1.4596, "step": 268100 }, { "epoch": 5.2153621779290225, "grad_norm": 3.39847731590271, "learning_rate": 2.395815085158151e-05, "loss": 1.5055, "step": 268200 }, { "epoch": 5.217306757413709, "grad_norm": 4.119603633880615, "learning_rate": 2.3948418491484188e-05, "loss": 1.601, "step": 268300 }, { "epoch": 5.219251336898395, "grad_norm": 3.059004068374634, "learning_rate": 2.3938686131386862e-05, "loss": 1.4964, "step": 268400 }, { "epoch": 5.221195916383082, "grad_norm": 7.942428112030029, "learning_rate": 2.392895377128954e-05, "loss": 1.5306, "step": 268500 }, { "epoch": 5.223140495867769, "grad_norm": 8.754537582397461, "learning_rate": 2.3919221411192216e-05, "loss": 1.3604, "step": 268600 }, { "epoch": 5.225085075352455, "grad_norm": 5.1517333984375, "learning_rate": 2.390948905109489e-05, "loss": 1.4348, "step": 268700 }, { "epoch": 5.227029654837142, "grad_norm": 2.564457893371582, "learning_rate": 2.389975669099757e-05, "loss": 1.4611, "step": 268800 }, { "epoch": 5.228974234321828, "grad_norm": 3.2134878635406494, "learning_rate": 2.3890024330900247e-05, "loss": 1.4994, "step": 268900 }, { "epoch": 5.230918813806515, "grad_norm": 3.7943222522735596, "learning_rate": 2.388029197080292e-05, "loss": 1.4963, "step": 269000 }, { "epoch": 5.2328633932912005, "grad_norm": 2.711167812347412, "learning_rate": 2.3870559610705598e-05, "loss": 1.5417, "step": 269100 }, { "epoch": 5.234807972775887, "grad_norm": 4.158735752105713, "learning_rate": 2.386082725060827e-05, "loss": 1.4214, "step": 269200 }, { "epoch": 5.236752552260573, "grad_norm": 3.079258680343628, "learning_rate": 2.385109489051095e-05, "loss": 1.4545, "step": 269300 }, { "epoch": 5.23869713174526, "grad_norm": 4.453938007354736, "learning_rate": 2.384136253041363e-05, "loss": 1.5827, "step": 269400 }, { "epoch": 5.240641711229946, "grad_norm": 2.8742916584014893, "learning_rate": 2.3831630170316302e-05, "loss": 1.4455, "step": 269500 }, { "epoch": 5.242586290714633, "grad_norm": 1.8677544593811035, "learning_rate": 2.382189781021898e-05, "loss": 1.6494, "step": 269600 }, { "epoch": 5.24453087019932, "grad_norm": 12.594664573669434, "learning_rate": 2.3812165450121656e-05, "loss": 1.4552, "step": 269700 }, { "epoch": 5.246475449684006, "grad_norm": 2.402961015701294, "learning_rate": 2.380243309002433e-05, "loss": 1.5341, "step": 269800 }, { "epoch": 5.248420029168693, "grad_norm": 5.3207106590271, "learning_rate": 2.379270072992701e-05, "loss": 1.4646, "step": 269900 }, { "epoch": 5.250364608653379, "grad_norm": 5.456286430358887, "learning_rate": 2.3782968369829684e-05, "loss": 1.4101, "step": 270000 }, { "epoch": 5.250364608653379, "eval_accuracy": 0.5547555555555556, "eval_f1": 0.5426843081934469, "eval_loss": 1.1854274272918701, "eval_precision": 0.5647788078442956, "eval_recall": 0.5547555555555556, "eval_runtime": 11716.4138, "eval_samples_per_second": 15.363, "eval_steps_per_second": 0.48, "step": 270000 }, { "epoch": 5.2523091881380655, "grad_norm": 17.702560424804688, "learning_rate": 2.377323600973236e-05, "loss": 1.634, "step": 270100 }, { "epoch": 5.254253767622751, "grad_norm": 2.38095760345459, "learning_rate": 2.3763503649635038e-05, "loss": 1.4343, "step": 270200 }, { "epoch": 5.256198347107438, "grad_norm": 2.811647891998291, "learning_rate": 2.375377128953771e-05, "loss": 1.4572, "step": 270300 }, { "epoch": 5.258142926592124, "grad_norm": 7.414549350738525, "learning_rate": 2.3744038929440392e-05, "loss": 1.4308, "step": 270400 }, { "epoch": 5.260087506076811, "grad_norm": 2.863037347793579, "learning_rate": 2.373430656934307e-05, "loss": 1.5903, "step": 270500 }, { "epoch": 5.262032085561497, "grad_norm": 3.9669013023376465, "learning_rate": 2.3724574209245743e-05, "loss": 1.5495, "step": 270600 }, { "epoch": 5.263976665046184, "grad_norm": 3.876180410385132, "learning_rate": 2.371484184914842e-05, "loss": 1.3459, "step": 270700 }, { "epoch": 5.26592124453087, "grad_norm": 6.782803535461426, "learning_rate": 2.3705109489051097e-05, "loss": 1.4107, "step": 270800 }, { "epoch": 5.267865824015557, "grad_norm": 2.768390655517578, "learning_rate": 2.369537712895377e-05, "loss": 1.4021, "step": 270900 }, { "epoch": 5.269810403500243, "grad_norm": 7.013218879699707, "learning_rate": 2.368564476885645e-05, "loss": 1.5879, "step": 271000 }, { "epoch": 5.2717549829849295, "grad_norm": 7.384672164916992, "learning_rate": 2.3675912408759124e-05, "loss": 1.4905, "step": 271100 }, { "epoch": 5.273699562469616, "grad_norm": 4.06222677230835, "learning_rate": 2.36661800486618e-05, "loss": 1.4143, "step": 271200 }, { "epoch": 5.275644141954302, "grad_norm": 9.004575729370117, "learning_rate": 2.365644768856448e-05, "loss": 1.4145, "step": 271300 }, { "epoch": 5.277588721438989, "grad_norm": 7.031576156616211, "learning_rate": 2.364681265206813e-05, "loss": 1.5162, "step": 271400 }, { "epoch": 5.279533300923675, "grad_norm": 4.645366668701172, "learning_rate": 2.3637080291970803e-05, "loss": 1.376, "step": 271500 }, { "epoch": 5.281477880408362, "grad_norm": 6.401686191558838, "learning_rate": 2.362734793187348e-05, "loss": 1.4015, "step": 271600 }, { "epoch": 5.283422459893048, "grad_norm": 6.094709396362305, "learning_rate": 2.3617615571776157e-05, "loss": 1.5967, "step": 271700 }, { "epoch": 5.285367039377735, "grad_norm": 4.863739967346191, "learning_rate": 2.3607883211678834e-05, "loss": 1.3941, "step": 271800 }, { "epoch": 5.287311618862421, "grad_norm": 5.8989057540893555, "learning_rate": 2.359815085158151e-05, "loss": 1.6056, "step": 271900 }, { "epoch": 5.289256198347108, "grad_norm": 5.39784049987793, "learning_rate": 2.3588418491484185e-05, "loss": 1.4212, "step": 272000 }, { "epoch": 5.2912007778317935, "grad_norm": 2.1502552032470703, "learning_rate": 2.3578686131386862e-05, "loss": 1.6069, "step": 272100 }, { "epoch": 5.29314535731648, "grad_norm": 6.255486011505127, "learning_rate": 2.356895377128954e-05, "loss": 1.4715, "step": 272200 }, { "epoch": 5.295089936801166, "grad_norm": 5.10242223739624, "learning_rate": 2.3559221411192216e-05, "loss": 1.317, "step": 272300 }, { "epoch": 5.297034516285853, "grad_norm": 5.040768623352051, "learning_rate": 2.3549489051094893e-05, "loss": 1.384, "step": 272400 }, { "epoch": 5.298979095770539, "grad_norm": 3.9261868000030518, "learning_rate": 2.353975669099757e-05, "loss": 1.4126, "step": 272500 }, { "epoch": 5.300923675255226, "grad_norm": 3.0999207496643066, "learning_rate": 2.3530024330900244e-05, "loss": 1.3643, "step": 272600 }, { "epoch": 5.302868254739913, "grad_norm": 6.546746730804443, "learning_rate": 2.352029197080292e-05, "loss": 1.4622, "step": 272700 }, { "epoch": 5.304812834224599, "grad_norm": 2.72564697265625, "learning_rate": 2.3510559610705594e-05, "loss": 1.6633, "step": 272800 }, { "epoch": 5.306757413709286, "grad_norm": 2.922092914581299, "learning_rate": 2.3500827250608275e-05, "loss": 1.5561, "step": 272900 }, { "epoch": 5.308701993193972, "grad_norm": 8.358819961547852, "learning_rate": 2.349109489051095e-05, "loss": 1.5593, "step": 273000 }, { "epoch": 5.3106465726786585, "grad_norm": 4.3036417961120605, "learning_rate": 2.3481362530413625e-05, "loss": 1.4904, "step": 273100 }, { "epoch": 5.312591152163344, "grad_norm": 5.88431453704834, "learning_rate": 2.3471630170316302e-05, "loss": 1.4314, "step": 273200 }, { "epoch": 5.314535731648031, "grad_norm": 1.6641788482666016, "learning_rate": 2.346189781021898e-05, "loss": 1.4076, "step": 273300 }, { "epoch": 5.316480311132717, "grad_norm": 5.4306721687316895, "learning_rate": 2.3452165450121656e-05, "loss": 1.4178, "step": 273400 }, { "epoch": 5.318424890617404, "grad_norm": 7.291306495666504, "learning_rate": 2.3442433090024333e-05, "loss": 1.4944, "step": 273500 }, { "epoch": 5.32036947010209, "grad_norm": 3.449319362640381, "learning_rate": 2.343270072992701e-05, "loss": 1.4618, "step": 273600 }, { "epoch": 5.322314049586777, "grad_norm": 11.144508361816406, "learning_rate": 2.3422968369829684e-05, "loss": 1.5677, "step": 273700 }, { "epoch": 5.324258629071464, "grad_norm": 3.2037224769592285, "learning_rate": 2.341323600973236e-05, "loss": 1.452, "step": 273800 }, { "epoch": 5.32620320855615, "grad_norm": 23.440555572509766, "learning_rate": 2.3403503649635035e-05, "loss": 1.5793, "step": 273900 }, { "epoch": 5.3281477880408366, "grad_norm": 5.229746341705322, "learning_rate": 2.3393771289537715e-05, "loss": 1.6843, "step": 274000 }, { "epoch": 5.3300923675255225, "grad_norm": 8.197285652160645, "learning_rate": 2.3384038929440392e-05, "loss": 1.3959, "step": 274100 }, { "epoch": 5.332036947010209, "grad_norm": 3.1775808334350586, "learning_rate": 2.3374306569343066e-05, "loss": 1.4557, "step": 274200 }, { "epoch": 5.333981526494895, "grad_norm": 3.206312656402588, "learning_rate": 2.3364574209245743e-05, "loss": 1.5487, "step": 274300 }, { "epoch": 5.335926105979582, "grad_norm": 4.041548728942871, "learning_rate": 2.335484184914842e-05, "loss": 1.4816, "step": 274400 }, { "epoch": 5.337870685464268, "grad_norm": 2.939347982406616, "learning_rate": 2.3345109489051097e-05, "loss": 1.5185, "step": 274500 }, { "epoch": 5.339815264948955, "grad_norm": 8.536235809326172, "learning_rate": 2.3335377128953774e-05, "loss": 1.4432, "step": 274600 }, { "epoch": 5.341759844433641, "grad_norm": 5.429789066314697, "learning_rate": 2.332564476885645e-05, "loss": 1.448, "step": 274700 }, { "epoch": 5.343704423918328, "grad_norm": 2.8483195304870605, "learning_rate": 2.3315912408759124e-05, "loss": 1.4722, "step": 274800 }, { "epoch": 5.345649003403014, "grad_norm": 7.454665660858154, "learning_rate": 2.33061800486618e-05, "loss": 1.5047, "step": 274900 }, { "epoch": 5.347593582887701, "grad_norm": 6.443779468536377, "learning_rate": 2.3296447688564475e-05, "loss": 1.6934, "step": 275000 }, { "epoch": 5.347593582887701, "eval_accuracy": 0.53985, "eval_f1": 0.5183873827049165, "eval_loss": 1.2124755382537842, "eval_precision": 0.5598933004858979, "eval_recall": 0.53985, "eval_runtime": 11718.6593, "eval_samples_per_second": 15.36, "eval_steps_per_second": 0.48, "step": 275000 }, { "epoch": 5.3495381623723866, "grad_norm": 4.802511692047119, "learning_rate": 2.3286715328467155e-05, "loss": 1.3389, "step": 275100 }, { "epoch": 5.351482741857073, "grad_norm": 5.373320579528809, "learning_rate": 2.3276982968369832e-05, "loss": 1.3508, "step": 275200 }, { "epoch": 5.35342732134176, "grad_norm": 2.4816718101501465, "learning_rate": 2.3267250608272506e-05, "loss": 1.3528, "step": 275300 }, { "epoch": 5.355371900826446, "grad_norm": 9.622933387756348, "learning_rate": 2.3257615571776157e-05, "loss": 1.3875, "step": 275400 }, { "epoch": 5.357316480311133, "grad_norm": 9.33761215209961, "learning_rate": 2.3247883211678834e-05, "loss": 1.4822, "step": 275500 }, { "epoch": 5.359261059795819, "grad_norm": 9.663235664367676, "learning_rate": 2.3238150851581508e-05, "loss": 1.5322, "step": 275600 }, { "epoch": 5.361205639280506, "grad_norm": 4.71354866027832, "learning_rate": 2.3228418491484185e-05, "loss": 1.424, "step": 275700 }, { "epoch": 5.363150218765192, "grad_norm": 5.334109306335449, "learning_rate": 2.3218686131386862e-05, "loss": 1.5054, "step": 275800 }, { "epoch": 5.365094798249879, "grad_norm": 8.203119277954102, "learning_rate": 2.320895377128954e-05, "loss": 1.5388, "step": 275900 }, { "epoch": 5.367039377734565, "grad_norm": 2.779278039932251, "learning_rate": 2.3199221411192216e-05, "loss": 1.4712, "step": 276000 }, { "epoch": 5.3689839572192515, "grad_norm": 4.811086177825928, "learning_rate": 2.3189489051094893e-05, "loss": 1.4813, "step": 276100 }, { "epoch": 5.370928536703937, "grad_norm": 3.734912633895874, "learning_rate": 2.3179756690997567e-05, "loss": 1.5177, "step": 276200 }, { "epoch": 5.372873116188624, "grad_norm": 1.5101590156555176, "learning_rate": 2.3170024330900244e-05, "loss": 1.5149, "step": 276300 }, { "epoch": 5.37481769567331, "grad_norm": 5.965051174163818, "learning_rate": 2.316029197080292e-05, "loss": 1.5209, "step": 276400 }, { "epoch": 5.376762275157997, "grad_norm": 7.335174560546875, "learning_rate": 2.3150559610705598e-05, "loss": 1.3907, "step": 276500 }, { "epoch": 5.378706854642683, "grad_norm": 12.369961738586426, "learning_rate": 2.3140827250608275e-05, "loss": 1.7133, "step": 276600 }, { "epoch": 5.38065143412737, "grad_norm": 3.151568651199341, "learning_rate": 2.3131094890510948e-05, "loss": 1.4869, "step": 276700 }, { "epoch": 5.382596013612057, "grad_norm": 9.818385124206543, "learning_rate": 2.3121362530413625e-05, "loss": 1.468, "step": 276800 }, { "epoch": 5.384540593096743, "grad_norm": 3.2982397079467773, "learning_rate": 2.3111630170316302e-05, "loss": 1.4703, "step": 276900 }, { "epoch": 5.38648517258143, "grad_norm": 6.93708610534668, "learning_rate": 2.310189781021898e-05, "loss": 1.543, "step": 277000 }, { "epoch": 5.3884297520661155, "grad_norm": 5.575623989105225, "learning_rate": 2.3092165450121656e-05, "loss": 1.421, "step": 277100 }, { "epoch": 5.390374331550802, "grad_norm": 4.691611289978027, "learning_rate": 2.3082433090024333e-05, "loss": 1.4585, "step": 277200 }, { "epoch": 5.392318911035488, "grad_norm": 3.794107437133789, "learning_rate": 2.3072700729927007e-05, "loss": 1.3975, "step": 277300 }, { "epoch": 5.394263490520175, "grad_norm": 2.1916751861572266, "learning_rate": 2.3062968369829684e-05, "loss": 1.4859, "step": 277400 }, { "epoch": 5.396208070004861, "grad_norm": 20.66193199157715, "learning_rate": 2.3053333333333335e-05, "loss": 1.5963, "step": 277500 }, { "epoch": 5.398152649489548, "grad_norm": 6.6272292137146, "learning_rate": 2.304360097323601e-05, "loss": 1.5209, "step": 277600 }, { "epoch": 5.400097228974234, "grad_norm": 5.094894886016846, "learning_rate": 2.3033868613138686e-05, "loss": 1.4704, "step": 277700 }, { "epoch": 5.402041808458921, "grad_norm": 3.6407415866851807, "learning_rate": 2.3024233576642337e-05, "loss": 1.7717, "step": 277800 }, { "epoch": 5.403986387943608, "grad_norm": 3.5183136463165283, "learning_rate": 2.301450121654501e-05, "loss": 1.5289, "step": 277900 }, { "epoch": 5.405930967428294, "grad_norm": 3.1340792179107666, "learning_rate": 2.300476885644769e-05, "loss": 1.5841, "step": 278000 }, { "epoch": 5.4078755469129804, "grad_norm": 2.9373817443847656, "learning_rate": 2.2995036496350368e-05, "loss": 1.5414, "step": 278100 }, { "epoch": 5.409820126397666, "grad_norm": 10.392374038696289, "learning_rate": 2.298530413625304e-05, "loss": 1.6509, "step": 278200 }, { "epoch": 5.411764705882353, "grad_norm": 5.183082580566406, "learning_rate": 2.297557177615572e-05, "loss": 1.4753, "step": 278300 }, { "epoch": 5.413709285367039, "grad_norm": 4.7945122718811035, "learning_rate": 2.2965839416058396e-05, "loss": 1.6961, "step": 278400 }, { "epoch": 5.415653864851726, "grad_norm": 6.1543169021606445, "learning_rate": 2.295610705596107e-05, "loss": 1.7603, "step": 278500 }, { "epoch": 5.417598444336412, "grad_norm": 5.121323108673096, "learning_rate": 2.294637469586375e-05, "loss": 1.4546, "step": 278600 }, { "epoch": 5.419543023821099, "grad_norm": 11.438403129577637, "learning_rate": 2.2936642335766427e-05, "loss": 1.601, "step": 278700 }, { "epoch": 5.421487603305785, "grad_norm": 4.431312561035156, "learning_rate": 2.29269099756691e-05, "loss": 1.415, "step": 278800 }, { "epoch": 5.423432182790472, "grad_norm": 7.408588886260986, "learning_rate": 2.2917177615571777e-05, "loss": 1.7067, "step": 278900 }, { "epoch": 5.425376762275158, "grad_norm": 3.0873610973358154, "learning_rate": 2.290744525547445e-05, "loss": 1.4268, "step": 279000 }, { "epoch": 5.4273213417598445, "grad_norm": 3.4912214279174805, "learning_rate": 2.289771289537713e-05, "loss": 1.3856, "step": 279100 }, { "epoch": 5.4292659212445304, "grad_norm": 30.239973068237305, "learning_rate": 2.288798053527981e-05, "loss": 1.7754, "step": 279200 }, { "epoch": 5.431210500729217, "grad_norm": 8.566444396972656, "learning_rate": 2.2878248175182482e-05, "loss": 1.4278, "step": 279300 }, { "epoch": 5.433155080213904, "grad_norm": 14.421789169311523, "learning_rate": 2.286851581508516e-05, "loss": 1.462, "step": 279400 }, { "epoch": 5.43509965969859, "grad_norm": 4.65327787399292, "learning_rate": 2.2858783454987836e-05, "loss": 1.4006, "step": 279500 }, { "epoch": 5.437044239183277, "grad_norm": 4.472044467926025, "learning_rate": 2.284905109489051e-05, "loss": 1.4623, "step": 279600 }, { "epoch": 5.438988818667963, "grad_norm": 4.42574405670166, "learning_rate": 2.283931873479319e-05, "loss": 1.5739, "step": 279700 }, { "epoch": 5.44093339815265, "grad_norm": 5.746420860290527, "learning_rate": 2.2829586374695867e-05, "loss": 1.6471, "step": 279800 }, { "epoch": 5.442877977637336, "grad_norm": 3.783182382583618, "learning_rate": 2.281985401459854e-05, "loss": 1.639, "step": 279900 }, { "epoch": 5.444822557122023, "grad_norm": 4.03708553314209, "learning_rate": 2.2810121654501218e-05, "loss": 1.4133, "step": 280000 }, { "epoch": 5.444822557122023, "eval_accuracy": 0.5591222222222222, "eval_f1": 0.5486821745715152, "eval_loss": 1.1745212078094482, "eval_precision": 0.5694054616140833, "eval_recall": 0.5591222222222222, "eval_runtime": 11711.2219, "eval_samples_per_second": 15.37, "eval_steps_per_second": 0.48, "step": 280000 }, { "epoch": 5.4467671366067085, "grad_norm": 5.621281147003174, "learning_rate": 2.280038929440389e-05, "loss": 1.4423, "step": 280100 }, { "epoch": 5.448711716091395, "grad_norm": 2.646484136581421, "learning_rate": 2.2790656934306572e-05, "loss": 1.4719, "step": 280200 }, { "epoch": 5.450656295576081, "grad_norm": 7.482976913452148, "learning_rate": 2.278092457420925e-05, "loss": 1.365, "step": 280300 }, { "epoch": 5.452600875060768, "grad_norm": 4.448878765106201, "learning_rate": 2.2771192214111922e-05, "loss": 1.4089, "step": 280400 }, { "epoch": 5.454545454545454, "grad_norm": 2.799978733062744, "learning_rate": 2.27614598540146e-05, "loss": 1.5023, "step": 280500 }, { "epoch": 5.456490034030141, "grad_norm": 4.901774883270264, "learning_rate": 2.2751727493917276e-05, "loss": 1.5081, "step": 280600 }, { "epoch": 5.458434613514827, "grad_norm": 6.8645124435424805, "learning_rate": 2.274199513381995e-05, "loss": 1.4867, "step": 280700 }, { "epoch": 5.460379192999514, "grad_norm": 4.526914119720459, "learning_rate": 2.273226277372263e-05, "loss": 1.6086, "step": 280800 }, { "epoch": 5.462323772484201, "grad_norm": 4.389404773712158, "learning_rate": 2.2722530413625307e-05, "loss": 1.4325, "step": 280900 }, { "epoch": 5.464268351968887, "grad_norm": 3.043478488922119, "learning_rate": 2.271279805352798e-05, "loss": 1.4005, "step": 281000 }, { "epoch": 5.4662129314535735, "grad_norm": 2.930150270462036, "learning_rate": 2.2703065693430658e-05, "loss": 1.4408, "step": 281100 }, { "epoch": 5.468157510938259, "grad_norm": 4.498072624206543, "learning_rate": 2.2693333333333332e-05, "loss": 1.5781, "step": 281200 }, { "epoch": 5.470102090422946, "grad_norm": 5.3371357917785645, "learning_rate": 2.2683600973236012e-05, "loss": 1.5096, "step": 281300 }, { "epoch": 5.472046669907632, "grad_norm": 3.517900228500366, "learning_rate": 2.267386861313869e-05, "loss": 1.4928, "step": 281400 }, { "epoch": 5.473991249392319, "grad_norm": 10.83023738861084, "learning_rate": 2.2664136253041363e-05, "loss": 1.4247, "step": 281500 }, { "epoch": 5.475935828877005, "grad_norm": 3.6974666118621826, "learning_rate": 2.265440389294404e-05, "loss": 1.4682, "step": 281600 }, { "epoch": 5.477880408361692, "grad_norm": 2.463233470916748, "learning_rate": 2.2644671532846717e-05, "loss": 1.6219, "step": 281700 }, { "epoch": 5.479824987846378, "grad_norm": 4.696543216705322, "learning_rate": 2.2635036496350365e-05, "loss": 1.4086, "step": 281800 }, { "epoch": 5.481769567331065, "grad_norm": 4.786525726318359, "learning_rate": 2.262530413625304e-05, "loss": 1.4309, "step": 281900 }, { "epoch": 5.4837141468157515, "grad_norm": 4.206057548522949, "learning_rate": 2.261557177615572e-05, "loss": 1.4022, "step": 282000 }, { "epoch": 5.4856587263004375, "grad_norm": 7.580013275146484, "learning_rate": 2.2605839416058396e-05, "loss": 1.473, "step": 282100 }, { "epoch": 5.487603305785124, "grad_norm": 5.598472595214844, "learning_rate": 2.2596107055961073e-05, "loss": 1.4112, "step": 282200 }, { "epoch": 5.48954788526981, "grad_norm": 5.837133407592773, "learning_rate": 2.258637469586375e-05, "loss": 1.5246, "step": 282300 }, { "epoch": 5.491492464754497, "grad_norm": 3.986525058746338, "learning_rate": 2.2576642335766423e-05, "loss": 1.3846, "step": 282400 }, { "epoch": 5.493437044239183, "grad_norm": 6.705593109130859, "learning_rate": 2.25669099756691e-05, "loss": 1.423, "step": 282500 }, { "epoch": 5.49538162372387, "grad_norm": 4.76995325088501, "learning_rate": 2.2557177615571777e-05, "loss": 1.5271, "step": 282600 }, { "epoch": 5.497326203208556, "grad_norm": 5.923313140869141, "learning_rate": 2.2547445255474454e-05, "loss": 1.4448, "step": 282700 }, { "epoch": 5.499270782693243, "grad_norm": 4.175518989562988, "learning_rate": 2.253771289537713e-05, "loss": 1.4362, "step": 282800 }, { "epoch": 5.501215362177929, "grad_norm": 1.5733002424240112, "learning_rate": 2.2527980535279805e-05, "loss": 1.3614, "step": 282900 }, { "epoch": 5.503159941662616, "grad_norm": 5.3359694480896, "learning_rate": 2.2518248175182482e-05, "loss": 1.4451, "step": 283000 }, { "epoch": 5.5051045211473015, "grad_norm": 2.448381185531616, "learning_rate": 2.250851581508516e-05, "loss": 1.5881, "step": 283100 }, { "epoch": 5.507049100631988, "grad_norm": 5.079776287078857, "learning_rate": 2.2498783454987836e-05, "loss": 1.4383, "step": 283200 }, { "epoch": 5.508993680116674, "grad_norm": 9.0829439163208, "learning_rate": 2.2489051094890513e-05, "loss": 1.5073, "step": 283300 }, { "epoch": 5.510938259601361, "grad_norm": 3.5957775115966797, "learning_rate": 2.247931873479319e-05, "loss": 1.5755, "step": 283400 }, { "epoch": 5.512882839086048, "grad_norm": 4.6626505851745605, "learning_rate": 2.2469586374695864e-05, "loss": 1.459, "step": 283500 }, { "epoch": 5.514827418570734, "grad_norm": 7.415253162384033, "learning_rate": 2.245985401459854e-05, "loss": 1.4079, "step": 283600 }, { "epoch": 5.516771998055421, "grad_norm": 4.7456374168396, "learning_rate": 2.2450121654501218e-05, "loss": 1.3473, "step": 283700 }, { "epoch": 5.518716577540107, "grad_norm": 2.791362762451172, "learning_rate": 2.2440389294403895e-05, "loss": 1.45, "step": 283800 }, { "epoch": 5.520661157024794, "grad_norm": 6.348779201507568, "learning_rate": 2.2430754257907542e-05, "loss": 1.4175, "step": 283900 }, { "epoch": 5.52260573650948, "grad_norm": 3.7740819454193115, "learning_rate": 2.242102189781022e-05, "loss": 1.4345, "step": 284000 }, { "epoch": 5.5245503159941665, "grad_norm": 2.21464467048645, "learning_rate": 2.2411289537712896e-05, "loss": 1.4073, "step": 284100 }, { "epoch": 5.526494895478852, "grad_norm": 4.13753604888916, "learning_rate": 2.2401557177615574e-05, "loss": 1.337, "step": 284200 }, { "epoch": 5.528439474963539, "grad_norm": 4.033194065093994, "learning_rate": 2.239182481751825e-05, "loss": 1.4205, "step": 284300 }, { "epoch": 5.530384054448225, "grad_norm": 2.9144394397735596, "learning_rate": 2.2382092457420924e-05, "loss": 1.4741, "step": 284400 }, { "epoch": 5.532328633932912, "grad_norm": 5.114164352416992, "learning_rate": 2.23723600973236e-05, "loss": 1.6813, "step": 284500 }, { "epoch": 5.534273213417599, "grad_norm": 6.796812534332275, "learning_rate": 2.2362627737226278e-05, "loss": 1.6353, "step": 284600 }, { "epoch": 5.536217792902285, "grad_norm": 4.757305145263672, "learning_rate": 2.2352895377128955e-05, "loss": 1.3839, "step": 284700 }, { "epoch": 5.538162372386971, "grad_norm": 2.652226209640503, "learning_rate": 2.2343163017031632e-05, "loss": 1.4437, "step": 284800 }, { "epoch": 5.540106951871658, "grad_norm": 5.1463704109191895, "learning_rate": 2.2333430656934306e-05, "loss": 1.5318, "step": 284900 }, { "epoch": 5.5420515313563445, "grad_norm": 12.046945571899414, "learning_rate": 2.2323698296836983e-05, "loss": 1.5981, "step": 285000 }, { "epoch": 5.5420515313563445, "eval_accuracy": 0.5390777777777778, "eval_f1": 0.5317101130969939, "eval_loss": 1.207826852798462, "eval_precision": 0.5644234957340023, "eval_recall": 0.5390777777777779, "eval_runtime": 11714.2288, "eval_samples_per_second": 15.366, "eval_steps_per_second": 0.48, "step": 285000 }, { "epoch": 5.5439961108410305, "grad_norm": 2.2252368927001953, "learning_rate": 2.231396593673966e-05, "loss": 1.7411, "step": 285100 }, { "epoch": 5.545940690325717, "grad_norm": 4.766777038574219, "learning_rate": 2.2304233576642337e-05, "loss": 1.6334, "step": 285200 }, { "epoch": 5.547885269810403, "grad_norm": 2.5089666843414307, "learning_rate": 2.2294501216545014e-05, "loss": 1.4748, "step": 285300 }, { "epoch": 5.54982984929509, "grad_norm": 4.352461814880371, "learning_rate": 2.228476885644769e-05, "loss": 1.3387, "step": 285400 }, { "epoch": 5.551774428779776, "grad_norm": 14.653986930847168, "learning_rate": 2.2275036496350365e-05, "loss": 1.444, "step": 285500 }, { "epoch": 5.553719008264463, "grad_norm": 9.244460105895996, "learning_rate": 2.226530413625304e-05, "loss": 1.5664, "step": 285600 }, { "epoch": 5.555663587749149, "grad_norm": 3.5288875102996826, "learning_rate": 2.225557177615572e-05, "loss": 1.5175, "step": 285700 }, { "epoch": 5.557608167233836, "grad_norm": 4.206080913543701, "learning_rate": 2.2245839416058396e-05, "loss": 1.4312, "step": 285800 }, { "epoch": 5.559552746718522, "grad_norm": 6.703122138977051, "learning_rate": 2.2236204379562047e-05, "loss": 1.4249, "step": 285900 }, { "epoch": 5.561497326203209, "grad_norm": 3.293968915939331, "learning_rate": 2.2226569343065694e-05, "loss": 1.473, "step": 286000 }, { "epoch": 5.563441905687895, "grad_norm": 4.800937652587891, "learning_rate": 2.221683698296837e-05, "loss": 1.4013, "step": 286100 }, { "epoch": 5.565386485172581, "grad_norm": 1.9970263242721558, "learning_rate": 2.220710462287105e-05, "loss": 1.5203, "step": 286200 }, { "epoch": 5.567331064657268, "grad_norm": 4.897462368011475, "learning_rate": 2.2197372262773725e-05, "loss": 1.4346, "step": 286300 }, { "epoch": 5.569275644141954, "grad_norm": 2.7594754695892334, "learning_rate": 2.21876399026764e-05, "loss": 1.4612, "step": 286400 }, { "epoch": 5.571220223626641, "grad_norm": 3.622833251953125, "learning_rate": 2.2177907542579076e-05, "loss": 1.4326, "step": 286500 }, { "epoch": 5.573164803111327, "grad_norm": 2.170287847518921, "learning_rate": 2.2168175182481753e-05, "loss": 1.506, "step": 286600 }, { "epoch": 5.575109382596014, "grad_norm": 5.0816473960876465, "learning_rate": 2.215844282238443e-05, "loss": 1.3693, "step": 286700 }, { "epoch": 5.5770539620807, "grad_norm": 5.171382904052734, "learning_rate": 2.2148710462287107e-05, "loss": 1.4371, "step": 286800 }, { "epoch": 5.578998541565387, "grad_norm": 3.817363739013672, "learning_rate": 2.213897810218978e-05, "loss": 1.3876, "step": 286900 }, { "epoch": 5.580943121050073, "grad_norm": 3.648770570755005, "learning_rate": 2.2129245742092458e-05, "loss": 1.4917, "step": 287000 }, { "epoch": 5.5828877005347595, "grad_norm": 2.945552110671997, "learning_rate": 2.2119513381995135e-05, "loss": 1.4117, "step": 287100 }, { "epoch": 5.584832280019445, "grad_norm": 3.506589889526367, "learning_rate": 2.2109781021897812e-05, "loss": 1.3539, "step": 287200 }, { "epoch": 5.586776859504132, "grad_norm": 4.20979642868042, "learning_rate": 2.210004866180049e-05, "loss": 1.4222, "step": 287300 }, { "epoch": 5.588721438988818, "grad_norm": 6.660010814666748, "learning_rate": 2.2090316301703166e-05, "loss": 1.6615, "step": 287400 }, { "epoch": 5.590666018473505, "grad_norm": 2.5411062240600586, "learning_rate": 2.208058394160584e-05, "loss": 1.3913, "step": 287500 }, { "epoch": 5.592610597958192, "grad_norm": 2.9179351329803467, "learning_rate": 2.2070851581508517e-05, "loss": 1.515, "step": 287600 }, { "epoch": 5.594555177442878, "grad_norm": 4.701021671295166, "learning_rate": 2.2061119221411194e-05, "loss": 1.6061, "step": 287700 }, { "epoch": 5.596499756927565, "grad_norm": 7.820613384246826, "learning_rate": 2.205138686131387e-05, "loss": 1.5654, "step": 287800 }, { "epoch": 5.598444336412251, "grad_norm": 4.913032531738281, "learning_rate": 2.2041654501216548e-05, "loss": 1.634, "step": 287900 }, { "epoch": 5.6003889158969375, "grad_norm": 1.9850351810455322, "learning_rate": 2.203192214111922e-05, "loss": 1.5176, "step": 288000 }, { "epoch": 5.6023334953816235, "grad_norm": 2.0455987453460693, "learning_rate": 2.2022189781021898e-05, "loss": 1.4726, "step": 288100 }, { "epoch": 5.60427807486631, "grad_norm": 4.435601711273193, "learning_rate": 2.2012457420924575e-05, "loss": 1.4968, "step": 288200 }, { "epoch": 5.606222654350996, "grad_norm": 3.5430071353912354, "learning_rate": 2.200272506082725e-05, "loss": 1.4934, "step": 288300 }, { "epoch": 5.608167233835683, "grad_norm": 45.145896911621094, "learning_rate": 2.199299270072993e-05, "loss": 1.6951, "step": 288400 }, { "epoch": 5.610111813320369, "grad_norm": 6.036892414093018, "learning_rate": 2.1983260340632606e-05, "loss": 1.5368, "step": 288500 }, { "epoch": 5.612056392805056, "grad_norm": 9.456437110900879, "learning_rate": 2.197352798053528e-05, "loss": 1.4271, "step": 288600 }, { "epoch": 5.614000972289743, "grad_norm": 5.20734977722168, "learning_rate": 2.1963795620437957e-05, "loss": 1.4109, "step": 288700 }, { "epoch": 5.615945551774429, "grad_norm": 6.615850448608398, "learning_rate": 2.1954160583941608e-05, "loss": 1.6973, "step": 288800 }, { "epoch": 5.617890131259115, "grad_norm": 3.3000872135162354, "learning_rate": 2.194442822384428e-05, "loss": 1.4722, "step": 288900 }, { "epoch": 5.619834710743802, "grad_norm": 5.353959560394287, "learning_rate": 2.193469586374696e-05, "loss": 1.5116, "step": 289000 }, { "epoch": 5.621779290228488, "grad_norm": 4.159512042999268, "learning_rate": 2.1924963503649636e-05, "loss": 1.3296, "step": 289100 }, { "epoch": 5.623723869713174, "grad_norm": 4.576170444488525, "learning_rate": 2.1915231143552313e-05, "loss": 1.4364, "step": 289200 }, { "epoch": 5.625668449197861, "grad_norm": 2.868464231491089, "learning_rate": 2.190549878345499e-05, "loss": 1.4456, "step": 289300 }, { "epoch": 5.627613028682547, "grad_norm": 3.6382029056549072, "learning_rate": 2.1895766423357667e-05, "loss": 1.5592, "step": 289400 }, { "epoch": 5.629557608167234, "grad_norm": 5.088788986206055, "learning_rate": 2.188603406326034e-05, "loss": 1.3237, "step": 289500 }, { "epoch": 5.63150218765192, "grad_norm": 6.591573715209961, "learning_rate": 2.1876301703163017e-05, "loss": 1.5397, "step": 289600 }, { "epoch": 5.633446767136607, "grad_norm": 1.63970148563385, "learning_rate": 2.1866569343065694e-05, "loss": 1.4127, "step": 289700 }, { "epoch": 5.635391346621293, "grad_norm": 6.362478256225586, "learning_rate": 2.185683698296837e-05, "loss": 1.4752, "step": 289800 }, { "epoch": 5.63733592610598, "grad_norm": 3.996631622314453, "learning_rate": 2.184710462287105e-05, "loss": 1.5489, "step": 289900 }, { "epoch": 5.639280505590666, "grad_norm": 8.790959358215332, "learning_rate": 2.1837372262773722e-05, "loss": 1.4194, "step": 290000 }, { "epoch": 5.639280505590666, "eval_accuracy": 0.5506555555555556, "eval_f1": 0.5414240087048581, "eval_loss": 1.183394193649292, "eval_precision": 0.565352427882883, "eval_recall": 0.5506555555555556, "eval_runtime": 11713.9686, "eval_samples_per_second": 15.366, "eval_steps_per_second": 0.48, "step": 290000 }, { "epoch": 5.6412250850753525, "grad_norm": 5.158703327178955, "learning_rate": 2.18276399026764e-05, "loss": 1.626, "step": 290100 }, { "epoch": 5.643169664560039, "grad_norm": 5.334196090698242, "learning_rate": 2.1817907542579076e-05, "loss": 1.6088, "step": 290200 }, { "epoch": 5.645114244044725, "grad_norm": 3.1548922061920166, "learning_rate": 2.1808175182481753e-05, "loss": 1.4679, "step": 290300 }, { "epoch": 5.647058823529412, "grad_norm": 5.3183369636535645, "learning_rate": 2.179844282238443e-05, "loss": 1.4364, "step": 290400 }, { "epoch": 5.649003403014098, "grad_norm": 3.8246285915374756, "learning_rate": 2.1788710462287107e-05, "loss": 1.3859, "step": 290500 }, { "epoch": 5.650947982498785, "grad_norm": 4.753172874450684, "learning_rate": 2.177897810218978e-05, "loss": 1.4545, "step": 290600 }, { "epoch": 5.652892561983471, "grad_norm": 13.426947593688965, "learning_rate": 2.1769245742092458e-05, "loss": 1.5346, "step": 290700 }, { "epoch": 5.654837141468158, "grad_norm": 9.930489540100098, "learning_rate": 2.1759513381995135e-05, "loss": 1.5787, "step": 290800 }, { "epoch": 5.656781720952844, "grad_norm": 5.3537139892578125, "learning_rate": 2.1749781021897812e-05, "loss": 1.4157, "step": 290900 }, { "epoch": 5.6587263004375306, "grad_norm": 3.388944625854492, "learning_rate": 2.174004866180049e-05, "loss": 1.5142, "step": 291000 }, { "epoch": 5.6606708799222165, "grad_norm": 7.211799144744873, "learning_rate": 2.1730316301703163e-05, "loss": 1.5104, "step": 291100 }, { "epoch": 5.662615459406903, "grad_norm": 6.720145225524902, "learning_rate": 2.172058394160584e-05, "loss": 1.5595, "step": 291200 }, { "epoch": 5.664560038891589, "grad_norm": 3.2618842124938965, "learning_rate": 2.1710851581508517e-05, "loss": 1.4405, "step": 291300 }, { "epoch": 5.666504618376276, "grad_norm": 8.687882423400879, "learning_rate": 2.1701119221411194e-05, "loss": 1.4632, "step": 291400 }, { "epoch": 5.668449197860962, "grad_norm": 7.792916774749756, "learning_rate": 2.169138686131387e-05, "loss": 1.4552, "step": 291500 }, { "epoch": 5.670393777345649, "grad_norm": 6.583304405212402, "learning_rate": 2.1681654501216548e-05, "loss": 1.6123, "step": 291600 }, { "epoch": 5.672338356830336, "grad_norm": 2.318476915359497, "learning_rate": 2.167192214111922e-05, "loss": 1.6061, "step": 291700 }, { "epoch": 5.674282936315022, "grad_norm": 2.2763113975524902, "learning_rate": 2.16621897810219e-05, "loss": 1.382, "step": 291800 }, { "epoch": 5.676227515799709, "grad_norm": 1.98052978515625, "learning_rate": 2.1652457420924575e-05, "loss": 1.5239, "step": 291900 }, { "epoch": 5.678172095284395, "grad_norm": 3.184504985809326, "learning_rate": 2.1642725060827252e-05, "loss": 1.328, "step": 292000 }, { "epoch": 5.680116674769081, "grad_norm": 10.664702415466309, "learning_rate": 2.163299270072993e-05, "loss": 1.4311, "step": 292100 }, { "epoch": 5.682061254253767, "grad_norm": 4.548878192901611, "learning_rate": 2.1623260340632603e-05, "loss": 1.4489, "step": 292200 }, { "epoch": 5.684005833738454, "grad_norm": 2.9485154151916504, "learning_rate": 2.161352798053528e-05, "loss": 1.3825, "step": 292300 }, { "epoch": 5.68595041322314, "grad_norm": 3.9207537174224854, "learning_rate": 2.1603795620437957e-05, "loss": 1.4241, "step": 292400 }, { "epoch": 5.687894992707827, "grad_norm": 3.4399285316467285, "learning_rate": 2.1594063260340634e-05, "loss": 1.3936, "step": 292500 }, { "epoch": 5.689839572192513, "grad_norm": 5.311613082885742, "learning_rate": 2.158433090024331e-05, "loss": 1.5461, "step": 292600 }, { "epoch": 5.6917841516772, "grad_norm": 7.653489589691162, "learning_rate": 2.1574598540145988e-05, "loss": 1.6261, "step": 292700 }, { "epoch": 5.693728731161887, "grad_norm": 6.019251823425293, "learning_rate": 2.1564963503649636e-05, "loss": 1.6161, "step": 292800 }, { "epoch": 5.695673310646573, "grad_norm": 3.484555959701538, "learning_rate": 2.1555231143552313e-05, "loss": 1.4222, "step": 292900 }, { "epoch": 5.697617890131259, "grad_norm": 8.212474822998047, "learning_rate": 2.154549878345499e-05, "loss": 1.6117, "step": 293000 }, { "epoch": 5.6995624696159455, "grad_norm": 4.9883012771606445, "learning_rate": 2.1535766423357663e-05, "loss": 1.488, "step": 293100 }, { "epoch": 5.701507049100632, "grad_norm": 5.8983235359191895, "learning_rate": 2.152603406326034e-05, "loss": 1.5864, "step": 293200 }, { "epoch": 5.703451628585318, "grad_norm": 3.85331654548645, "learning_rate": 2.151630170316302e-05, "loss": 1.2663, "step": 293300 }, { "epoch": 5.705396208070005, "grad_norm": 4.069347381591797, "learning_rate": 2.1506569343065695e-05, "loss": 1.4489, "step": 293400 }, { "epoch": 5.707340787554691, "grad_norm": 4.975993633270264, "learning_rate": 2.149683698296837e-05, "loss": 1.3561, "step": 293500 }, { "epoch": 5.709285367039378, "grad_norm": 6.7090301513671875, "learning_rate": 2.1487104622871045e-05, "loss": 1.4305, "step": 293600 }, { "epoch": 5.711229946524064, "grad_norm": 5.108490943908691, "learning_rate": 2.1477372262773722e-05, "loss": 1.5111, "step": 293700 }, { "epoch": 5.713174526008751, "grad_norm": 5.101434230804443, "learning_rate": 2.14676399026764e-05, "loss": 1.5529, "step": 293800 }, { "epoch": 5.715119105493437, "grad_norm": 3.5954813957214355, "learning_rate": 2.1457907542579076e-05, "loss": 1.4427, "step": 293900 }, { "epoch": 5.717063684978124, "grad_norm": 12.729826927185059, "learning_rate": 2.1448175182481753e-05, "loss": 1.4658, "step": 294000 }, { "epoch": 5.7190082644628095, "grad_norm": 4.421119213104248, "learning_rate": 2.143844282238443e-05, "loss": 1.3389, "step": 294100 }, { "epoch": 5.720952843947496, "grad_norm": 5.879907608032227, "learning_rate": 2.1428710462287104e-05, "loss": 1.5495, "step": 294200 }, { "epoch": 5.722897423432183, "grad_norm": 3.032226324081421, "learning_rate": 2.141897810218978e-05, "loss": 1.3316, "step": 294300 }, { "epoch": 5.724842002916869, "grad_norm": 9.806411743164062, "learning_rate": 2.140924574209246e-05, "loss": 1.3687, "step": 294400 }, { "epoch": 5.726786582401556, "grad_norm": 3.7422525882720947, "learning_rate": 2.1399513381995135e-05, "loss": 1.4407, "step": 294500 }, { "epoch": 5.728731161886242, "grad_norm": 14.796088218688965, "learning_rate": 2.1389781021897812e-05, "loss": 1.3761, "step": 294600 }, { "epoch": 5.730675741370929, "grad_norm": 5.225278854370117, "learning_rate": 2.1380048661800486e-05, "loss": 1.3311, "step": 294700 }, { "epoch": 5.732620320855615, "grad_norm": 3.0056955814361572, "learning_rate": 2.1370316301703163e-05, "loss": 1.4214, "step": 294800 }, { "epoch": 5.734564900340302, "grad_norm": 16.333229064941406, "learning_rate": 2.1360681265206814e-05, "loss": 1.4515, "step": 294900 }, { "epoch": 5.736509479824988, "grad_norm": 5.137723445892334, "learning_rate": 2.135094890510949e-05, "loss": 1.5619, "step": 295000 }, { "epoch": 5.736509479824988, "eval_accuracy": 0.5484722222222222, "eval_f1": 0.5356048179614298, "eval_loss": 1.1950734853744507, "eval_precision": 0.568514688167899, "eval_recall": 0.5484722222222221, "eval_runtime": 11713.6169, "eval_samples_per_second": 15.367, "eval_steps_per_second": 0.48, "step": 295000 }, { "epoch": 5.7384540593096744, "grad_norm": 5.041721343994141, "learning_rate": 2.1341216545012164e-05, "loss": 1.4097, "step": 295100 }, { "epoch": 5.74039863879436, "grad_norm": 2.129291296005249, "learning_rate": 2.1331484184914845e-05, "loss": 1.3712, "step": 295200 }, { "epoch": 5.742343218279047, "grad_norm": 11.390162467956543, "learning_rate": 2.132175182481752e-05, "loss": 1.3844, "step": 295300 }, { "epoch": 5.744287797763733, "grad_norm": 6.348367214202881, "learning_rate": 2.1312019464720195e-05, "loss": 1.3894, "step": 295400 }, { "epoch": 5.74623237724842, "grad_norm": 3.887601375579834, "learning_rate": 2.1302384428223846e-05, "loss": 1.5283, "step": 295500 }, { "epoch": 5.748176956733106, "grad_norm": 5.461944580078125, "learning_rate": 2.1292652068126523e-05, "loss": 1.3468, "step": 295600 }, { "epoch": 5.750121536217793, "grad_norm": 7.01231050491333, "learning_rate": 2.1282919708029197e-05, "loss": 1.4918, "step": 295700 }, { "epoch": 5.75206611570248, "grad_norm": 10.621657371520996, "learning_rate": 2.1273187347931874e-05, "loss": 1.6394, "step": 295800 }, { "epoch": 5.754010695187166, "grad_norm": 6.417404651641846, "learning_rate": 2.126345498783455e-05, "loss": 1.3839, "step": 295900 }, { "epoch": 5.7559552746718525, "grad_norm": 7.994333267211914, "learning_rate": 2.1253722627737228e-05, "loss": 1.611, "step": 296000 }, { "epoch": 5.7578998541565385, "grad_norm": 3.1821839809417725, "learning_rate": 2.1243990267639905e-05, "loss": 1.3477, "step": 296100 }, { "epoch": 5.759844433641225, "grad_norm": 1.7471511363983154, "learning_rate": 2.123425790754258e-05, "loss": 1.5038, "step": 296200 }, { "epoch": 5.761789013125911, "grad_norm": 3.5552022457122803, "learning_rate": 2.1224525547445256e-05, "loss": 1.3856, "step": 296300 }, { "epoch": 5.763733592610598, "grad_norm": 8.815410614013672, "learning_rate": 2.1214793187347933e-05, "loss": 1.3247, "step": 296400 }, { "epoch": 5.765678172095284, "grad_norm": 4.488330841064453, "learning_rate": 2.120506082725061e-05, "loss": 1.3788, "step": 296500 }, { "epoch": 5.767622751579971, "grad_norm": 12.123502731323242, "learning_rate": 2.1195328467153287e-05, "loss": 1.4156, "step": 296600 }, { "epoch": 5.769567331064657, "grad_norm": 3.8111541271209717, "learning_rate": 2.1185596107055964e-05, "loss": 1.431, "step": 296700 }, { "epoch": 5.771511910549344, "grad_norm": 4.78992223739624, "learning_rate": 2.1175863746958638e-05, "loss": 1.5304, "step": 296800 }, { "epoch": 5.773456490034031, "grad_norm": 3.9300622940063477, "learning_rate": 2.1166131386861315e-05, "loss": 1.4547, "step": 296900 }, { "epoch": 5.775401069518717, "grad_norm": 4.190883636474609, "learning_rate": 2.115639902676399e-05, "loss": 1.4649, "step": 297000 }, { "epoch": 5.7773456490034025, "grad_norm": 6.430525302886963, "learning_rate": 2.114666666666667e-05, "loss": 1.5168, "step": 297100 }, { "epoch": 5.779290228488089, "grad_norm": 3.7398524284362793, "learning_rate": 2.1136934306569346e-05, "loss": 1.3787, "step": 297200 }, { "epoch": 5.781234807972776, "grad_norm": 5.67103910446167, "learning_rate": 2.112720194647202e-05, "loss": 1.354, "step": 297300 }, { "epoch": 5.783179387457462, "grad_norm": 7.821011066436768, "learning_rate": 2.1117469586374696e-05, "loss": 1.4212, "step": 297400 }, { "epoch": 5.785123966942149, "grad_norm": 5.7346601486206055, "learning_rate": 2.1107737226277373e-05, "loss": 1.5598, "step": 297500 }, { "epoch": 5.787068546426835, "grad_norm": 3.3113534450531006, "learning_rate": 2.109800486618005e-05, "loss": 1.4726, "step": 297600 }, { "epoch": 5.789013125911522, "grad_norm": 5.216361045837402, "learning_rate": 2.1088272506082727e-05, "loss": 1.4348, "step": 297700 }, { "epoch": 5.790957705396208, "grad_norm": 4.333157539367676, "learning_rate": 2.1078540145985404e-05, "loss": 1.4954, "step": 297800 }, { "epoch": 5.792902284880895, "grad_norm": 4.534646034240723, "learning_rate": 2.1068807785888078e-05, "loss": 1.4624, "step": 297900 }, { "epoch": 5.794846864365581, "grad_norm": 9.672568321228027, "learning_rate": 2.1059075425790755e-05, "loss": 1.496, "step": 298000 }, { "epoch": 5.7967914438502675, "grad_norm": 3.920032024383545, "learning_rate": 2.1049343065693432e-05, "loss": 1.579, "step": 298100 }, { "epoch": 5.798736023334953, "grad_norm": 6.057929992675781, "learning_rate": 2.103961070559611e-05, "loss": 1.5324, "step": 298200 }, { "epoch": 5.80068060281964, "grad_norm": 8.549723625183105, "learning_rate": 2.1029878345498786e-05, "loss": 1.7936, "step": 298300 }, { "epoch": 5.802625182304327, "grad_norm": 5.113042831420898, "learning_rate": 2.102014598540146e-05, "loss": 1.3348, "step": 298400 }, { "epoch": 5.804569761789013, "grad_norm": 3.8168506622314453, "learning_rate": 2.1010413625304137e-05, "loss": 1.5531, "step": 298500 }, { "epoch": 5.8065143412737, "grad_norm": 4.355186939239502, "learning_rate": 2.1000681265206814e-05, "loss": 1.3741, "step": 298600 }, { "epoch": 5.808458920758386, "grad_norm": 2.422569990158081, "learning_rate": 2.099104622871046e-05, "loss": 1.6056, "step": 298700 }, { "epoch": 5.810403500243073, "grad_norm": 2.950106382369995, "learning_rate": 2.098131386861314e-05, "loss": 1.3911, "step": 298800 }, { "epoch": 5.812348079727759, "grad_norm": 7.908589839935303, "learning_rate": 2.0971581508515815e-05, "loss": 1.5227, "step": 298900 }, { "epoch": 5.8142926592124455, "grad_norm": 6.28528356552124, "learning_rate": 2.0961849148418492e-05, "loss": 1.5664, "step": 299000 }, { "epoch": 5.8162372386971315, "grad_norm": 5.12917423248291, "learning_rate": 2.095211678832117e-05, "loss": 1.4535, "step": 299100 }, { "epoch": 5.818181818181818, "grad_norm": 8.477303504943848, "learning_rate": 2.0942384428223847e-05, "loss": 1.4053, "step": 299200 }, { "epoch": 5.820126397666504, "grad_norm": 2.6333563327789307, "learning_rate": 2.093265206812652e-05, "loss": 1.4171, "step": 299300 }, { "epoch": 5.822070977151191, "grad_norm": 8.357742309570312, "learning_rate": 2.0922919708029197e-05, "loss": 1.6184, "step": 299400 }, { "epoch": 5.824015556635877, "grad_norm": 6.885205268859863, "learning_rate": 2.0913187347931874e-05, "loss": 1.4026, "step": 299500 }, { "epoch": 5.825960136120564, "grad_norm": 9.630010604858398, "learning_rate": 2.090345498783455e-05, "loss": 1.3151, "step": 299600 }, { "epoch": 5.82790471560525, "grad_norm": 8.115565299987793, "learning_rate": 2.0893722627737228e-05, "loss": 1.587, "step": 299700 }, { "epoch": 5.829849295089937, "grad_norm": 1.790964961051941, "learning_rate": 2.0883990267639902e-05, "loss": 1.4707, "step": 299800 }, { "epoch": 5.831793874574624, "grad_norm": 3.225213050842285, "learning_rate": 2.087425790754258e-05, "loss": 1.483, "step": 299900 }, { "epoch": 5.83373845405931, "grad_norm": 8.438333511352539, "learning_rate": 2.0864525547445256e-05, "loss": 1.4517, "step": 300000 }, { "epoch": 5.83373845405931, "eval_accuracy": 0.5570222222222222, "eval_f1": 0.5359681498910464, "eval_loss": 1.1835412979125977, "eval_precision": 0.569591886073392, "eval_recall": 0.5570222222222222, "eval_runtime": 11719.3267, "eval_samples_per_second": 15.359, "eval_steps_per_second": 0.48, "step": 300000 }, { "epoch": 5.835683033543996, "grad_norm": 4.358470439910889, "learning_rate": 2.0854793187347933e-05, "loss": 1.402, "step": 300100 }, { "epoch": 5.837627613028682, "grad_norm": 4.333396911621094, "learning_rate": 2.084506082725061e-05, "loss": 1.5533, "step": 300200 }, { "epoch": 5.839572192513369, "grad_norm": 1.896389126777649, "learning_rate": 2.0835328467153287e-05, "loss": 1.4953, "step": 300300 }, { "epoch": 5.841516771998055, "grad_norm": 3.8063201904296875, "learning_rate": 2.082559610705596e-05, "loss": 1.3313, "step": 300400 }, { "epoch": 5.843461351482742, "grad_norm": 4.61654806137085, "learning_rate": 2.0815863746958638e-05, "loss": 1.4429, "step": 300500 }, { "epoch": 5.845405930967428, "grad_norm": 3.480034589767456, "learning_rate": 2.0806131386861315e-05, "loss": 1.5803, "step": 300600 }, { "epoch": 5.847350510452115, "grad_norm": 5.1354660987854, "learning_rate": 2.079639902676399e-05, "loss": 1.4542, "step": 300700 }, { "epoch": 5.849295089936801, "grad_norm": 6.001499176025391, "learning_rate": 2.078666666666667e-05, "loss": 1.5379, "step": 300800 }, { "epoch": 5.851239669421488, "grad_norm": 2.7201807498931885, "learning_rate": 2.0776934306569342e-05, "loss": 1.3794, "step": 300900 }, { "epoch": 5.8531842489061745, "grad_norm": 3.97676420211792, "learning_rate": 2.076720194647202e-05, "loss": 1.4071, "step": 301000 }, { "epoch": 5.8551288283908605, "grad_norm": 3.3889548778533936, "learning_rate": 2.0757469586374696e-05, "loss": 1.3947, "step": 301100 }, { "epoch": 5.857073407875546, "grad_norm": 4.081322193145752, "learning_rate": 2.0747737226277373e-05, "loss": 1.4729, "step": 301200 }, { "epoch": 5.859017987360233, "grad_norm": 6.688270568847656, "learning_rate": 2.073800486618005e-05, "loss": 1.503, "step": 301300 }, { "epoch": 5.86096256684492, "grad_norm": 6.1161885261535645, "learning_rate": 2.0728272506082727e-05, "loss": 1.6636, "step": 301400 }, { "epoch": 5.862907146329606, "grad_norm": 7.752040863037109, "learning_rate": 2.07185401459854e-05, "loss": 1.4274, "step": 301500 }, { "epoch": 5.864851725814293, "grad_norm": 3.1994030475616455, "learning_rate": 2.0708905109489052e-05, "loss": 1.6194, "step": 301600 }, { "epoch": 5.866796305298979, "grad_norm": 2.9750773906707764, "learning_rate": 2.069917274939173e-05, "loss": 1.5134, "step": 301700 }, { "epoch": 5.868740884783666, "grad_norm": 3.987666368484497, "learning_rate": 2.0689440389294403e-05, "loss": 1.8035, "step": 301800 }, { "epoch": 5.870685464268352, "grad_norm": 20.50288963317871, "learning_rate": 2.067970802919708e-05, "loss": 1.5954, "step": 301900 }, { "epoch": 5.8726300437530385, "grad_norm": 25.590723037719727, "learning_rate": 2.066997566909976e-05, "loss": 1.4697, "step": 302000 }, { "epoch": 5.8745746232377245, "grad_norm": 4.702803134918213, "learning_rate": 2.0660243309002434e-05, "loss": 1.3928, "step": 302100 }, { "epoch": 5.876519202722411, "grad_norm": 4.452448844909668, "learning_rate": 2.065051094890511e-05, "loss": 1.3908, "step": 302200 }, { "epoch": 5.878463782207097, "grad_norm": 1.8155076503753662, "learning_rate": 2.0640778588807788e-05, "loss": 1.4921, "step": 302300 }, { "epoch": 5.880408361691784, "grad_norm": 10.16879653930664, "learning_rate": 2.063104622871046e-05, "loss": 1.4728, "step": 302400 }, { "epoch": 5.882352941176471, "grad_norm": 4.655290126800537, "learning_rate": 2.062131386861314e-05, "loss": 1.4199, "step": 302500 }, { "epoch": 5.884297520661157, "grad_norm": 4.60542631149292, "learning_rate": 2.0611581508515816e-05, "loss": 1.3494, "step": 302600 }, { "epoch": 5.886242100145844, "grad_norm": 4.656009674072266, "learning_rate": 2.0601849148418493e-05, "loss": 1.4626, "step": 302700 }, { "epoch": 5.88818667963053, "grad_norm": 4.212295055389404, "learning_rate": 2.059211678832117e-05, "loss": 1.6382, "step": 302800 }, { "epoch": 5.890131259115217, "grad_norm": 4.177592754364014, "learning_rate": 2.0582384428223843e-05, "loss": 1.3654, "step": 302900 }, { "epoch": 5.892075838599903, "grad_norm": 7.126915454864502, "learning_rate": 2.057265206812652e-05, "loss": 1.5878, "step": 303000 }, { "epoch": 5.894020418084589, "grad_norm": 3.6068100929260254, "learning_rate": 2.05629197080292e-05, "loss": 1.4209, "step": 303100 }, { "epoch": 5.895964997569275, "grad_norm": 3.6560840606689453, "learning_rate": 2.0553187347931874e-05, "loss": 1.49, "step": 303200 }, { "epoch": 5.897909577053962, "grad_norm": 4.020359992980957, "learning_rate": 2.054345498783455e-05, "loss": 1.4969, "step": 303300 }, { "epoch": 5.899854156538648, "grad_norm": 3.2645716667175293, "learning_rate": 2.0533722627737228e-05, "loss": 1.3322, "step": 303400 }, { "epoch": 5.901798736023335, "grad_norm": 5.862416744232178, "learning_rate": 2.0523990267639902e-05, "loss": 1.4728, "step": 303500 }, { "epoch": 5.903743315508021, "grad_norm": 3.521505832672119, "learning_rate": 2.0514355231143553e-05, "loss": 1.5776, "step": 303600 }, { "epoch": 5.905687894992708, "grad_norm": 6.165040016174316, "learning_rate": 2.050462287104623e-05, "loss": 1.3923, "step": 303700 }, { "epoch": 5.907632474477394, "grad_norm": 2.4897730350494385, "learning_rate": 2.0494890510948904e-05, "loss": 1.4239, "step": 303800 }, { "epoch": 5.909577053962081, "grad_norm": 5.103913307189941, "learning_rate": 2.0485158150851584e-05, "loss": 1.5213, "step": 303900 }, { "epoch": 5.9115216334467675, "grad_norm": 5.871942043304443, "learning_rate": 2.047542579075426e-05, "loss": 1.3446, "step": 304000 }, { "epoch": 5.9134662129314535, "grad_norm": 5.656116485595703, "learning_rate": 2.0465693430656935e-05, "loss": 1.475, "step": 304100 }, { "epoch": 5.91541079241614, "grad_norm": 3.600043296813965, "learning_rate": 2.0455961070559612e-05, "loss": 1.5724, "step": 304200 }, { "epoch": 5.917355371900826, "grad_norm": 9.482864379882812, "learning_rate": 2.0446228710462285e-05, "loss": 1.4877, "step": 304300 }, { "epoch": 5.919299951385513, "grad_norm": 9.639269828796387, "learning_rate": 2.0436496350364966e-05, "loss": 1.3854, "step": 304400 }, { "epoch": 5.921244530870199, "grad_norm": 4.629130840301514, "learning_rate": 2.0426763990267643e-05, "loss": 1.3796, "step": 304500 }, { "epoch": 5.923189110354886, "grad_norm": 7.125763416290283, "learning_rate": 2.0417031630170316e-05, "loss": 1.6543, "step": 304600 }, { "epoch": 5.925133689839572, "grad_norm": 4.524581432342529, "learning_rate": 2.0407299270072993e-05, "loss": 1.3298, "step": 304700 }, { "epoch": 5.927078269324259, "grad_norm": 3.784674882888794, "learning_rate": 2.039756690997567e-05, "loss": 1.5343, "step": 304800 }, { "epoch": 5.929022848808945, "grad_norm": 1.8280987739562988, "learning_rate": 2.0387834549878344e-05, "loss": 1.4807, "step": 304900 }, { "epoch": 5.9309674282936315, "grad_norm": 3.944295644760132, "learning_rate": 2.0378102189781024e-05, "loss": 1.5457, "step": 305000 }, { "epoch": 5.9309674282936315, "eval_accuracy": 0.56175, "eval_f1": 0.5530176836589128, "eval_loss": 1.163524866104126, "eval_precision": 0.5737936997334955, "eval_recall": 0.5617500000000001, "eval_runtime": 11717.1261, "eval_samples_per_second": 15.362, "eval_steps_per_second": 0.48, "step": 305000 }, { "epoch": 5.932912007778318, "grad_norm": 4.496733665466309, "learning_rate": 2.03683698296837e-05, "loss": 1.6155, "step": 305100 }, { "epoch": 5.934856587263004, "grad_norm": 7.328239440917969, "learning_rate": 2.0358637469586375e-05, "loss": 1.5498, "step": 305200 }, { "epoch": 5.93680116674769, "grad_norm": 4.749730110168457, "learning_rate": 2.0348905109489052e-05, "loss": 1.6858, "step": 305300 }, { "epoch": 5.938745746232377, "grad_norm": 3.8981568813323975, "learning_rate": 2.0339172749391726e-05, "loss": 1.5091, "step": 305400 }, { "epoch": 5.940690325717064, "grad_norm": 5.010552883148193, "learning_rate": 2.0329440389294406e-05, "loss": 1.5786, "step": 305500 }, { "epoch": 5.94263490520175, "grad_norm": 4.383106708526611, "learning_rate": 2.0319708029197083e-05, "loss": 1.525, "step": 305600 }, { "epoch": 5.944579484686437, "grad_norm": 6.8664751052856445, "learning_rate": 2.0309975669099757e-05, "loss": 1.4027, "step": 305700 }, { "epoch": 5.946524064171123, "grad_norm": 5.480989933013916, "learning_rate": 2.0300243309002434e-05, "loss": 1.3494, "step": 305800 }, { "epoch": 5.94846864365581, "grad_norm": 8.752619743347168, "learning_rate": 2.029051094890511e-05, "loss": 1.5539, "step": 305900 }, { "epoch": 5.950413223140496, "grad_norm": 4.708664894104004, "learning_rate": 2.0280778588807785e-05, "loss": 1.5052, "step": 306000 }, { "epoch": 5.952357802625182, "grad_norm": 4.090583801269531, "learning_rate": 2.0271046228710465e-05, "loss": 1.4663, "step": 306100 }, { "epoch": 5.954302382109868, "grad_norm": 4.031184673309326, "learning_rate": 2.0261313868613142e-05, "loss": 1.4976, "step": 306200 }, { "epoch": 5.956246961594555, "grad_norm": 2.8702776432037354, "learning_rate": 2.0251581508515816e-05, "loss": 1.5506, "step": 306300 }, { "epoch": 5.958191541079241, "grad_norm": 3.2854809761047363, "learning_rate": 2.0241849148418493e-05, "loss": 1.3962, "step": 306400 }, { "epoch": 5.960136120563928, "grad_norm": 3.0082104206085205, "learning_rate": 2.0232116788321166e-05, "loss": 1.3569, "step": 306500 }, { "epoch": 5.962080700048615, "grad_norm": 3.737938642501831, "learning_rate": 2.0222384428223847e-05, "loss": 1.52, "step": 306600 }, { "epoch": 5.964025279533301, "grad_norm": 3.3214964866638184, "learning_rate": 2.0212652068126524e-05, "loss": 1.5762, "step": 306700 }, { "epoch": 5.965969859017988, "grad_norm": 3.258768320083618, "learning_rate": 2.0202919708029197e-05, "loss": 1.6886, "step": 306800 }, { "epoch": 5.967914438502674, "grad_norm": 3.842589855194092, "learning_rate": 2.0193187347931874e-05, "loss": 1.4658, "step": 306900 }, { "epoch": 5.9698590179873605, "grad_norm": 7.0361247062683105, "learning_rate": 2.018345498783455e-05, "loss": 1.3673, "step": 307000 }, { "epoch": 5.9718035974720465, "grad_norm": 3.958556652069092, "learning_rate": 2.0173722627737225e-05, "loss": 1.4029, "step": 307100 }, { "epoch": 5.973748176956733, "grad_norm": 5.821876525878906, "learning_rate": 2.0163990267639905e-05, "loss": 1.4299, "step": 307200 }, { "epoch": 5.975692756441419, "grad_norm": 6.117541790008545, "learning_rate": 2.0154257907542582e-05, "loss": 1.4105, "step": 307300 }, { "epoch": 5.977637335926106, "grad_norm": 3.7048325538635254, "learning_rate": 2.0144525547445256e-05, "loss": 1.311, "step": 307400 }, { "epoch": 5.979581915410792, "grad_norm": 8.271830558776855, "learning_rate": 2.0134793187347933e-05, "loss": 1.5121, "step": 307500 }, { "epoch": 5.981526494895479, "grad_norm": 4.967381000518799, "learning_rate": 2.0125158150851584e-05, "loss": 1.4801, "step": 307600 }, { "epoch": 5.983471074380166, "grad_norm": 3.275629758834839, "learning_rate": 2.0115425790754258e-05, "loss": 1.6551, "step": 307700 }, { "epoch": 5.985415653864852, "grad_norm": 3.680265426635742, "learning_rate": 2.0105693430656935e-05, "loss": 1.4095, "step": 307800 }, { "epoch": 5.987360233349538, "grad_norm": 4.194607734680176, "learning_rate": 2.0095961070559612e-05, "loss": 1.5264, "step": 307900 }, { "epoch": 5.989304812834225, "grad_norm": 2.856673240661621, "learning_rate": 2.008622871046229e-05, "loss": 1.5672, "step": 308000 }, { "epoch": 5.991249392318911, "grad_norm": 6.236642837524414, "learning_rate": 2.0076496350364966e-05, "loss": 1.4937, "step": 308100 }, { "epoch": 5.993193971803597, "grad_norm": 3.848942995071411, "learning_rate": 2.006676399026764e-05, "loss": 1.4876, "step": 308200 }, { "epoch": 5.995138551288284, "grad_norm": 3.415868043899536, "learning_rate": 2.0057031630170316e-05, "loss": 1.4181, "step": 308300 }, { "epoch": 5.99708313077297, "grad_norm": 4.060932636260986, "learning_rate": 2.0047299270072993e-05, "loss": 1.4992, "step": 308400 }, { "epoch": 5.999027710257657, "grad_norm": 4.6190338134765625, "learning_rate": 2.003756690997567e-05, "loss": 1.4384, "step": 308500 }, { "epoch": 6.000972289742343, "grad_norm": 5.350668430328369, "learning_rate": 2.0027834549878348e-05, "loss": 1.3852, "step": 308600 }, { "epoch": 6.00291686922703, "grad_norm": 2.4087729454040527, "learning_rate": 2.0018102189781025e-05, "loss": 1.3397, "step": 308700 }, { "epoch": 6.004861448711716, "grad_norm": 9.495200157165527, "learning_rate": 2.0008369829683698e-05, "loss": 1.4402, "step": 308800 }, { "epoch": 6.006806028196403, "grad_norm": 6.116739273071289, "learning_rate": 1.9998637469586375e-05, "loss": 1.4475, "step": 308900 }, { "epoch": 6.008750607681089, "grad_norm": 5.0628156661987305, "learning_rate": 1.9988905109489052e-05, "loss": 1.6567, "step": 309000 }, { "epoch": 6.010695187165775, "grad_norm": 4.693350315093994, "learning_rate": 1.997917274939173e-05, "loss": 1.3629, "step": 309100 }, { "epoch": 6.012639766650461, "grad_norm": 21.818355560302734, "learning_rate": 1.9969440389294406e-05, "loss": 1.5012, "step": 309200 }, { "epoch": 6.014584346135148, "grad_norm": 4.915663242340088, "learning_rate": 1.9959805352798054e-05, "loss": 1.4859, "step": 309300 }, { "epoch": 6.016528925619835, "grad_norm": 10.371393203735352, "learning_rate": 1.995007299270073e-05, "loss": 1.4598, "step": 309400 }, { "epoch": 6.018473505104521, "grad_norm": 2.7357895374298096, "learning_rate": 1.9940340632603408e-05, "loss": 1.5392, "step": 309500 }, { "epoch": 6.020418084589208, "grad_norm": 5.104417324066162, "learning_rate": 1.9930608272506085e-05, "loss": 1.3863, "step": 309600 }, { "epoch": 6.022362664073894, "grad_norm": 3.106505870819092, "learning_rate": 1.992087591240876e-05, "loss": 1.3727, "step": 309700 }, { "epoch": 6.024307243558581, "grad_norm": 6.8095011711120605, "learning_rate": 1.9911143552311436e-05, "loss": 1.5354, "step": 309800 }, { "epoch": 6.026251823043267, "grad_norm": 6.28358793258667, "learning_rate": 1.9901411192214113e-05, "loss": 1.4686, "step": 309900 }, { "epoch": 6.0281964025279535, "grad_norm": 5.077210903167725, "learning_rate": 1.989167883211679e-05, "loss": 1.4769, "step": 310000 }, { "epoch": 6.0281964025279535, "eval_accuracy": 0.5633444444444444, "eval_f1": 0.5564599329898219, "eval_loss": 1.1636440753936768, "eval_precision": 0.5669958420911608, "eval_recall": 0.5633444444444444, "eval_runtime": 11711.6676, "eval_samples_per_second": 15.369, "eval_steps_per_second": 0.48, "step": 310000 }, { "epoch": 6.0301409820126395, "grad_norm": 16.045427322387695, "learning_rate": 1.9881946472019467e-05, "loss": 1.3949, "step": 310100 }, { "epoch": 6.032085561497326, "grad_norm": 4.747559547424316, "learning_rate": 1.987221411192214e-05, "loss": 1.6476, "step": 310200 }, { "epoch": 6.034030140982012, "grad_norm": 4.244184494018555, "learning_rate": 1.9862481751824817e-05, "loss": 1.3194, "step": 310300 }, { "epoch": 6.035974720466699, "grad_norm": 3.196268081665039, "learning_rate": 1.9852749391727494e-05, "loss": 1.4159, "step": 310400 }, { "epoch": 6.037919299951385, "grad_norm": 10.664225578308105, "learning_rate": 1.984301703163017e-05, "loss": 1.3843, "step": 310500 }, { "epoch": 6.039863879436072, "grad_norm": 3.882715940475464, "learning_rate": 1.983328467153285e-05, "loss": 1.3964, "step": 310600 }, { "epoch": 6.041808458920759, "grad_norm": 6.820979595184326, "learning_rate": 1.9823552311435525e-05, "loss": 1.4735, "step": 310700 }, { "epoch": 6.043753038405445, "grad_norm": 4.800029754638672, "learning_rate": 1.98138199513382e-05, "loss": 1.3992, "step": 310800 }, { "epoch": 6.045697617890132, "grad_norm": 8.167525291442871, "learning_rate": 1.9804087591240876e-05, "loss": 1.6581, "step": 310900 }, { "epoch": 6.047642197374818, "grad_norm": 2.6004254817962646, "learning_rate": 1.9794355231143553e-05, "loss": 1.5593, "step": 311000 }, { "epoch": 6.049586776859504, "grad_norm": 5.836132049560547, "learning_rate": 1.978462287104623e-05, "loss": 1.4153, "step": 311100 }, { "epoch": 6.05153135634419, "grad_norm": 4.285826206207275, "learning_rate": 1.9774890510948907e-05, "loss": 1.3626, "step": 311200 }, { "epoch": 6.053475935828877, "grad_norm": 1.9920976161956787, "learning_rate": 1.976515815085158e-05, "loss": 1.5751, "step": 311300 }, { "epoch": 6.055420515313563, "grad_norm": 4.698296070098877, "learning_rate": 1.9755425790754258e-05, "loss": 1.4953, "step": 311400 }, { "epoch": 6.05736509479825, "grad_norm": 3.9542434215545654, "learning_rate": 1.9745693430656935e-05, "loss": 1.4548, "step": 311500 }, { "epoch": 6.059309674282936, "grad_norm": 4.047943592071533, "learning_rate": 1.9735961070559612e-05, "loss": 1.39, "step": 311600 }, { "epoch": 6.061254253767623, "grad_norm": 8.237183570861816, "learning_rate": 1.972622871046229e-05, "loss": 1.4602, "step": 311700 }, { "epoch": 6.063198833252309, "grad_norm": 1.9403748512268066, "learning_rate": 1.9716496350364966e-05, "loss": 1.4665, "step": 311800 }, { "epoch": 6.065143412736996, "grad_norm": 6.315776824951172, "learning_rate": 1.970676399026764e-05, "loss": 1.45, "step": 311900 }, { "epoch": 6.0670879922216825, "grad_norm": 3.036787748336792, "learning_rate": 1.9697031630170317e-05, "loss": 1.397, "step": 312000 }, { "epoch": 6.0690325717063685, "grad_norm": 12.917655944824219, "learning_rate": 1.9687299270072994e-05, "loss": 1.5465, "step": 312100 }, { "epoch": 6.070977151191055, "grad_norm": 6.828645706176758, "learning_rate": 1.967756690997567e-05, "loss": 1.5576, "step": 312200 }, { "epoch": 6.072921730675741, "grad_norm": 5.229411602020264, "learning_rate": 1.9667834549878348e-05, "loss": 1.4929, "step": 312300 }, { "epoch": 6.074866310160428, "grad_norm": 12.167957305908203, "learning_rate": 1.965810218978102e-05, "loss": 1.6184, "step": 312400 }, { "epoch": 6.076810889645114, "grad_norm": 3.5662593841552734, "learning_rate": 1.9648369829683698e-05, "loss": 1.518, "step": 312500 }, { "epoch": 6.078755469129801, "grad_norm": 3.3929920196533203, "learning_rate": 1.9638637469586375e-05, "loss": 1.4576, "step": 312600 }, { "epoch": 6.080700048614487, "grad_norm": 4.429834842681885, "learning_rate": 1.9628905109489052e-05, "loss": 1.3158, "step": 312700 }, { "epoch": 6.082644628099174, "grad_norm": 2.8558926582336426, "learning_rate": 1.961917274939173e-05, "loss": 1.5766, "step": 312800 }, { "epoch": 6.08458920758386, "grad_norm": 16.17397689819336, "learning_rate": 1.9609440389294406e-05, "loss": 1.4455, "step": 312900 }, { "epoch": 6.0865337870685465, "grad_norm": 3.60321044921875, "learning_rate": 1.959970802919708e-05, "loss": 1.4175, "step": 313000 }, { "epoch": 6.0884783665532325, "grad_norm": 4.545898914337158, "learning_rate": 1.9589975669099757e-05, "loss": 1.3739, "step": 313100 }, { "epoch": 6.090422946037919, "grad_norm": 9.710282325744629, "learning_rate": 1.9580243309002434e-05, "loss": 1.4045, "step": 313200 }, { "epoch": 6.092367525522606, "grad_norm": 6.412103176116943, "learning_rate": 1.957060827250608e-05, "loss": 1.4468, "step": 313300 }, { "epoch": 6.094312105007292, "grad_norm": 3.2482750415802, "learning_rate": 1.9560875912408762e-05, "loss": 1.4889, "step": 313400 }, { "epoch": 6.096256684491979, "grad_norm": 4.788947105407715, "learning_rate": 1.955114355231144e-05, "loss": 1.7007, "step": 313500 }, { "epoch": 6.098201263976665, "grad_norm": 3.4273681640625, "learning_rate": 1.9541411192214113e-05, "loss": 1.357, "step": 313600 }, { "epoch": 6.100145843461352, "grad_norm": 5.393561363220215, "learning_rate": 1.953167883211679e-05, "loss": 1.4996, "step": 313700 }, { "epoch": 6.102090422946038, "grad_norm": 4.627309322357178, "learning_rate": 1.952204379562044e-05, "loss": 1.5413, "step": 313800 }, { "epoch": 6.104035002430725, "grad_norm": 3.894895076751709, "learning_rate": 1.9512311435523114e-05, "loss": 1.4352, "step": 313900 }, { "epoch": 6.105979581915411, "grad_norm": 3.846920967102051, "learning_rate": 1.950257907542579e-05, "loss": 1.5199, "step": 314000 }, { "epoch": 6.107924161400097, "grad_norm": 3.5465757846832275, "learning_rate": 1.949284671532847e-05, "loss": 1.4115, "step": 314100 }, { "epoch": 6.109868740884783, "grad_norm": 4.613507270812988, "learning_rate": 1.9483114355231145e-05, "loss": 1.4517, "step": 314200 }, { "epoch": 6.11181332036947, "grad_norm": 6.56162691116333, "learning_rate": 1.9473381995133823e-05, "loss": 1.5402, "step": 314300 }, { "epoch": 6.113757899854156, "grad_norm": 3.2641592025756836, "learning_rate": 1.9463649635036496e-05, "loss": 1.337, "step": 314400 }, { "epoch": 6.115702479338843, "grad_norm": 6.7088727951049805, "learning_rate": 1.9453917274939173e-05, "loss": 1.492, "step": 314500 }, { "epoch": 6.117647058823529, "grad_norm": 2.4839587211608887, "learning_rate": 1.944418491484185e-05, "loss": 1.4608, "step": 314600 }, { "epoch": 6.119591638308216, "grad_norm": 2.6403870582580566, "learning_rate": 1.9434452554744527e-05, "loss": 1.4672, "step": 314700 }, { "epoch": 6.121536217792903, "grad_norm": 4.043313503265381, "learning_rate": 1.9424720194647204e-05, "loss": 1.462, "step": 314800 }, { "epoch": 6.123480797277589, "grad_norm": 7.226438522338867, "learning_rate": 1.941498783454988e-05, "loss": 1.5077, "step": 314900 }, { "epoch": 6.1254253767622755, "grad_norm": 6.230031967163086, "learning_rate": 1.9405255474452555e-05, "loss": 1.3975, "step": 315000 }, { "epoch": 6.1254253767622755, "eval_accuracy": 0.5596, "eval_f1": 0.5443459559632018, "eval_loss": 1.1785250902175903, "eval_precision": 0.5683913121411203, "eval_recall": 0.5596, "eval_runtime": 11721.5636, "eval_samples_per_second": 15.356, "eval_steps_per_second": 0.48, "step": 315000 }, { "epoch": 6.1273699562469615, "grad_norm": 3.480191946029663, "learning_rate": 1.9395523114355232e-05, "loss": 1.5452, "step": 315100 }, { "epoch": 6.129314535731648, "grad_norm": 3.818809747695923, "learning_rate": 1.938579075425791e-05, "loss": 1.4611, "step": 315200 }, { "epoch": 6.131259115216334, "grad_norm": 3.5162010192871094, "learning_rate": 1.9376058394160586e-05, "loss": 1.4199, "step": 315300 }, { "epoch": 6.133203694701021, "grad_norm": 3.187256336212158, "learning_rate": 1.9366326034063263e-05, "loss": 1.4758, "step": 315400 }, { "epoch": 6.135148274185707, "grad_norm": 4.672565460205078, "learning_rate": 1.9356593673965937e-05, "loss": 1.4139, "step": 315500 }, { "epoch": 6.137092853670394, "grad_norm": 5.792416095733643, "learning_rate": 1.9346861313868614e-05, "loss": 1.4549, "step": 315600 }, { "epoch": 6.13903743315508, "grad_norm": 3.168485403060913, "learning_rate": 1.933712895377129e-05, "loss": 1.4716, "step": 315700 }, { "epoch": 6.140982012639767, "grad_norm": 1.3486725091934204, "learning_rate": 1.9327396593673964e-05, "loss": 1.4219, "step": 315800 }, { "epoch": 6.142926592124453, "grad_norm": 0.7734708786010742, "learning_rate": 1.9317664233576645e-05, "loss": 1.4565, "step": 315900 }, { "epoch": 6.1448711716091395, "grad_norm": 3.580396890640259, "learning_rate": 1.930793187347932e-05, "loss": 1.5072, "step": 316000 }, { "epoch": 6.146815751093826, "grad_norm": 5.029633522033691, "learning_rate": 1.9298199513381995e-05, "loss": 1.4557, "step": 316100 }, { "epoch": 6.148760330578512, "grad_norm": 4.151611328125, "learning_rate": 1.9288467153284672e-05, "loss": 1.5537, "step": 316200 }, { "epoch": 6.150704910063199, "grad_norm": 3.4311344623565674, "learning_rate": 1.927873479318735e-05, "loss": 1.5262, "step": 316300 }, { "epoch": 6.152649489547885, "grad_norm": 2.5758349895477295, "learning_rate": 1.9269002433090026e-05, "loss": 1.4774, "step": 316400 }, { "epoch": 6.154594069032572, "grad_norm": 2.979952096939087, "learning_rate": 1.9259270072992703e-05, "loss": 1.5383, "step": 316500 }, { "epoch": 6.156538648517258, "grad_norm": 5.082278728485107, "learning_rate": 1.9249537712895377e-05, "loss": 1.5438, "step": 316600 }, { "epoch": 6.158483228001945, "grad_norm": 8.53520393371582, "learning_rate": 1.9239805352798054e-05, "loss": 1.417, "step": 316700 }, { "epoch": 6.160427807486631, "grad_norm": 7.569393634796143, "learning_rate": 1.923007299270073e-05, "loss": 1.4948, "step": 316800 }, { "epoch": 6.162372386971318, "grad_norm": 2.921064853668213, "learning_rate": 1.9220340632603405e-05, "loss": 1.387, "step": 316900 }, { "epoch": 6.164316966456004, "grad_norm": 5.178424835205078, "learning_rate": 1.9210608272506085e-05, "loss": 1.3901, "step": 317000 }, { "epoch": 6.16626154594069, "grad_norm": 5.0535888671875, "learning_rate": 1.9200875912408762e-05, "loss": 1.5991, "step": 317100 }, { "epoch": 6.168206125425376, "grad_norm": 2.6345207691192627, "learning_rate": 1.9191143552311436e-05, "loss": 1.4488, "step": 317200 }, { "epoch": 6.170150704910063, "grad_norm": 3.6427628993988037, "learning_rate": 1.9181411192214113e-05, "loss": 1.4389, "step": 317300 }, { "epoch": 6.17209528439475, "grad_norm": 5.04641580581665, "learning_rate": 1.917167883211679e-05, "loss": 1.5143, "step": 317400 }, { "epoch": 6.174039863879436, "grad_norm": 3.6857101917266846, "learning_rate": 1.9161946472019467e-05, "loss": 1.3954, "step": 317500 }, { "epoch": 6.175984443364123, "grad_norm": 3.692680835723877, "learning_rate": 1.9152214111922144e-05, "loss": 1.4569, "step": 317600 }, { "epoch": 6.177929022848809, "grad_norm": 3.6057026386260986, "learning_rate": 1.9142481751824817e-05, "loss": 1.4758, "step": 317700 }, { "epoch": 6.179873602333496, "grad_norm": 6.3172430992126465, "learning_rate": 1.913284671532847e-05, "loss": 1.3429, "step": 317800 }, { "epoch": 6.181818181818182, "grad_norm": 3.3836917877197266, "learning_rate": 1.9123114355231146e-05, "loss": 1.5908, "step": 317900 }, { "epoch": 6.1837627613028685, "grad_norm": 3.6280856132507324, "learning_rate": 1.9113381995133823e-05, "loss": 1.4254, "step": 318000 }, { "epoch": 6.1857073407875545, "grad_norm": 8.846024513244629, "learning_rate": 1.9103649635036496e-05, "loss": 1.4657, "step": 318100 }, { "epoch": 6.187651920272241, "grad_norm": 6.840804576873779, "learning_rate": 1.9093917274939173e-05, "loss": 1.2984, "step": 318200 }, { "epoch": 6.189596499756927, "grad_norm": 3.867169141769409, "learning_rate": 1.908418491484185e-05, "loss": 1.4072, "step": 318300 }, { "epoch": 6.191541079241614, "grad_norm": 5.015232086181641, "learning_rate": 1.9074452554744527e-05, "loss": 1.3633, "step": 318400 }, { "epoch": 6.1934856587263, "grad_norm": 5.513073444366455, "learning_rate": 1.9064720194647204e-05, "loss": 1.4674, "step": 318500 }, { "epoch": 6.195430238210987, "grad_norm": 4.115925312042236, "learning_rate": 1.9054987834549878e-05, "loss": 1.4307, "step": 318600 }, { "epoch": 6.197374817695673, "grad_norm": 7.280549049377441, "learning_rate": 1.9045255474452555e-05, "loss": 1.3774, "step": 318700 }, { "epoch": 6.19931939718036, "grad_norm": 4.222359657287598, "learning_rate": 1.9035523114355232e-05, "loss": 1.4399, "step": 318800 }, { "epoch": 6.201263976665047, "grad_norm": 4.2943830490112305, "learning_rate": 1.902579075425791e-05, "loss": 1.4404, "step": 318900 }, { "epoch": 6.2032085561497325, "grad_norm": 4.500814437866211, "learning_rate": 1.9016058394160586e-05, "loss": 1.5883, "step": 319000 }, { "epoch": 6.205153135634419, "grad_norm": 8.573968887329102, "learning_rate": 1.9006326034063263e-05, "loss": 1.4392, "step": 319100 }, { "epoch": 6.207097715119105, "grad_norm": 3.281327724456787, "learning_rate": 1.8996593673965937e-05, "loss": 1.5497, "step": 319200 }, { "epoch": 6.209042294603792, "grad_norm": 3.1374824047088623, "learning_rate": 1.8986861313868614e-05, "loss": 1.4453, "step": 319300 }, { "epoch": 6.210986874088478, "grad_norm": 4.742597579956055, "learning_rate": 1.897712895377129e-05, "loss": 1.4873, "step": 319400 }, { "epoch": 6.212931453573165, "grad_norm": 2.7687182426452637, "learning_rate": 1.8967396593673968e-05, "loss": 1.5082, "step": 319500 }, { "epoch": 6.214876033057851, "grad_norm": 6.333176136016846, "learning_rate": 1.8957664233576645e-05, "loss": 1.4187, "step": 319600 }, { "epoch": 6.216820612542538, "grad_norm": 5.05058479309082, "learning_rate": 1.894793187347932e-05, "loss": 1.3786, "step": 319700 }, { "epoch": 6.218765192027224, "grad_norm": 5.887439250946045, "learning_rate": 1.893829683698297e-05, "loss": 1.417, "step": 319800 }, { "epoch": 6.220709771511911, "grad_norm": 5.484090805053711, "learning_rate": 1.8928564476885646e-05, "loss": 1.3957, "step": 319900 }, { "epoch": 6.222654350996597, "grad_norm": 6.095386505126953, "learning_rate": 1.891883211678832e-05, "loss": 1.6069, "step": 320000 }, { "epoch": 6.222654350996597, "eval_accuracy": 0.5633611111111111, "eval_f1": 0.5541101785603595, "eval_loss": 1.1684919595718384, "eval_precision": 0.563190115444196, "eval_recall": 0.5633611111111111, "eval_runtime": 11719.4936, "eval_samples_per_second": 15.359, "eval_steps_per_second": 0.48, "step": 320000 }, { "epoch": 6.224598930481283, "grad_norm": 4.729607582092285, "learning_rate": 1.8909099756690997e-05, "loss": 1.5094, "step": 320100 }, { "epoch": 6.22654350996597, "grad_norm": 4.298061847686768, "learning_rate": 1.8899367396593674e-05, "loss": 1.7356, "step": 320200 }, { "epoch": 6.228488089450656, "grad_norm": 3.123292922973633, "learning_rate": 1.888963503649635e-05, "loss": 1.5314, "step": 320300 }, { "epoch": 6.230432668935343, "grad_norm": 3.4583261013031006, "learning_rate": 1.8879902676399028e-05, "loss": 1.5312, "step": 320400 }, { "epoch": 6.232377248420029, "grad_norm": 2.6702096462249756, "learning_rate": 1.8870170316301705e-05, "loss": 1.5627, "step": 320500 }, { "epoch": 6.234321827904716, "grad_norm": 4.060761451721191, "learning_rate": 1.886043795620438e-05, "loss": 1.3561, "step": 320600 }, { "epoch": 6.236266407389402, "grad_norm": 2.151110887527466, "learning_rate": 1.8850705596107056e-05, "loss": 1.4464, "step": 320700 }, { "epoch": 6.238210986874089, "grad_norm": 3.668806791305542, "learning_rate": 1.8840973236009736e-05, "loss": 1.4056, "step": 320800 }, { "epoch": 6.240155566358775, "grad_norm": 3.387866258621216, "learning_rate": 1.883124087591241e-05, "loss": 1.4274, "step": 320900 }, { "epoch": 6.2421001458434615, "grad_norm": 3.1053788661956787, "learning_rate": 1.8821508515815087e-05, "loss": 1.3961, "step": 321000 }, { "epoch": 6.2440447253281475, "grad_norm": 4.321255207061768, "learning_rate": 1.881177615571776e-05, "loss": 1.4705, "step": 321100 }, { "epoch": 6.245989304812834, "grad_norm": 10.237278938293457, "learning_rate": 1.8802043795620438e-05, "loss": 1.3709, "step": 321200 }, { "epoch": 6.24793388429752, "grad_norm": 4.130396366119385, "learning_rate": 1.8792311435523115e-05, "loss": 1.3846, "step": 321300 }, { "epoch": 6.249878463782207, "grad_norm": 4.296258926391602, "learning_rate": 1.878257907542579e-05, "loss": 1.4023, "step": 321400 }, { "epoch": 6.251823043266894, "grad_norm": 3.6013894081115723, "learning_rate": 1.877284671532847e-05, "loss": 1.3515, "step": 321500 }, { "epoch": 6.25376762275158, "grad_norm": 5.470593452453613, "learning_rate": 1.8763114355231146e-05, "loss": 1.4714, "step": 321600 }, { "epoch": 6.255712202236267, "grad_norm": 25.212718963623047, "learning_rate": 1.875338199513382e-05, "loss": 1.4518, "step": 321700 }, { "epoch": 6.257656781720953, "grad_norm": 4.303997993469238, "learning_rate": 1.874374695863747e-05, "loss": 1.3972, "step": 321800 }, { "epoch": 6.25960136120564, "grad_norm": 15.539963722229004, "learning_rate": 1.8734014598540147e-05, "loss": 1.4891, "step": 321900 }, { "epoch": 6.2615459406903256, "grad_norm": 3.0610439777374268, "learning_rate": 1.872428223844282e-05, "loss": 1.4337, "step": 322000 }, { "epoch": 6.263490520175012, "grad_norm": 3.567678689956665, "learning_rate": 1.87145498783455e-05, "loss": 1.4541, "step": 322100 }, { "epoch": 6.265435099659698, "grad_norm": 2.320155382156372, "learning_rate": 1.870481751824818e-05, "loss": 1.5213, "step": 322200 }, { "epoch": 6.267379679144385, "grad_norm": 3.4761383533477783, "learning_rate": 1.8695085158150852e-05, "loss": 1.6023, "step": 322300 }, { "epoch": 6.269324258629071, "grad_norm": 3.7232134342193604, "learning_rate": 1.868535279805353e-05, "loss": 1.5833, "step": 322400 }, { "epoch": 6.271268838113758, "grad_norm": 3.4247965812683105, "learning_rate": 1.8675620437956206e-05, "loss": 1.391, "step": 322500 }, { "epoch": 6.273213417598444, "grad_norm": 5.943592071533203, "learning_rate": 1.866588807785888e-05, "loss": 1.4453, "step": 322600 }, { "epoch": 6.275157997083131, "grad_norm": 4.904018878936768, "learning_rate": 1.865615571776156e-05, "loss": 1.5247, "step": 322700 }, { "epoch": 6.277102576567817, "grad_norm": 7.191188812255859, "learning_rate": 1.8646423357664234e-05, "loss": 1.4007, "step": 322800 }, { "epoch": 6.279047156052504, "grad_norm": 4.9659271240234375, "learning_rate": 1.863669099756691e-05, "loss": 1.4296, "step": 322900 }, { "epoch": 6.2809917355371905, "grad_norm": 3.9912312030792236, "learning_rate": 1.8626958637469588e-05, "loss": 1.4879, "step": 323000 }, { "epoch": 6.282936315021876, "grad_norm": 7.168257236480713, "learning_rate": 1.861722627737226e-05, "loss": 1.5228, "step": 323100 }, { "epoch": 6.284880894506563, "grad_norm": 4.867827892303467, "learning_rate": 1.8607493917274942e-05, "loss": 1.6904, "step": 323200 }, { "epoch": 6.286825473991249, "grad_norm": 4.283023357391357, "learning_rate": 1.859776155717762e-05, "loss": 1.2988, "step": 323300 }, { "epoch": 6.288770053475936, "grad_norm": 6.246048450469971, "learning_rate": 1.8588029197080292e-05, "loss": 1.418, "step": 323400 }, { "epoch": 6.290714632960622, "grad_norm": 9.661216735839844, "learning_rate": 1.857829683698297e-05, "loss": 1.3637, "step": 323500 }, { "epoch": 6.292659212445309, "grad_norm": 2.992645025253296, "learning_rate": 1.8568564476885646e-05, "loss": 1.5127, "step": 323600 }, { "epoch": 6.294603791929995, "grad_norm": 2.260132074356079, "learning_rate": 1.855883211678832e-05, "loss": 1.4907, "step": 323700 }, { "epoch": 6.296548371414682, "grad_norm": 4.363528728485107, "learning_rate": 1.854919708029197e-05, "loss": 1.5121, "step": 323800 }, { "epoch": 6.298492950899368, "grad_norm": 2.610535144805908, "learning_rate": 1.8539464720194648e-05, "loss": 1.5826, "step": 323900 }, { "epoch": 6.3004375303840545, "grad_norm": 14.684494972229004, "learning_rate": 1.8529732360097325e-05, "loss": 1.636, "step": 324000 }, { "epoch": 6.3023821098687405, "grad_norm": 3.0857560634613037, "learning_rate": 1.8520000000000002e-05, "loss": 1.3687, "step": 324100 }, { "epoch": 6.304326689353427, "grad_norm": 3.3324716091156006, "learning_rate": 1.851026763990268e-05, "loss": 1.33, "step": 324200 }, { "epoch": 6.306271268838113, "grad_norm": 3.9890902042388916, "learning_rate": 1.8500535279805353e-05, "loss": 1.5765, "step": 324300 }, { "epoch": 6.3082158483228, "grad_norm": 6.690783500671387, "learning_rate": 1.849080291970803e-05, "loss": 1.3767, "step": 324400 }, { "epoch": 6.310160427807487, "grad_norm": 3.3850648403167725, "learning_rate": 1.8481070559610707e-05, "loss": 1.4216, "step": 324500 }, { "epoch": 6.312105007292173, "grad_norm": 5.3242950439453125, "learning_rate": 1.8471338199513384e-05, "loss": 1.4392, "step": 324600 }, { "epoch": 6.31404958677686, "grad_norm": 6.471696853637695, "learning_rate": 1.846160583941606e-05, "loss": 1.5352, "step": 324700 }, { "epoch": 6.315994166261546, "grad_norm": 3.9814276695251465, "learning_rate": 1.8451873479318735e-05, "loss": 1.684, "step": 324800 }, { "epoch": 6.317938745746233, "grad_norm": 4.636258125305176, "learning_rate": 1.844214111922141e-05, "loss": 1.4911, "step": 324900 }, { "epoch": 6.319883325230919, "grad_norm": 7.094061851501465, "learning_rate": 1.843240875912409e-05, "loss": 1.3608, "step": 325000 }, { "epoch": 6.319883325230919, "eval_accuracy": 0.56725, "eval_f1": 0.5580928278141694, "eval_loss": 1.1589263677597046, "eval_precision": 0.5600027645379897, "eval_recall": 0.56725, "eval_runtime": 11719.114, "eval_samples_per_second": 15.36, "eval_steps_per_second": 0.48, "step": 325000 }, { "epoch": 6.321827904715605, "grad_norm": 2.700863838195801, "learning_rate": 1.8422676399026766e-05, "loss": 1.3251, "step": 325100 }, { "epoch": 6.323772484200291, "grad_norm": 13.095878601074219, "learning_rate": 1.8412944038929443e-05, "loss": 1.4106, "step": 325200 }, { "epoch": 6.325717063684978, "grad_norm": 6.026397705078125, "learning_rate": 1.840321167883212e-05, "loss": 1.4413, "step": 325300 }, { "epoch": 6.327661643169664, "grad_norm": 5.294804096221924, "learning_rate": 1.8393479318734793e-05, "loss": 1.5506, "step": 325400 }, { "epoch": 6.329606222654351, "grad_norm": 3.2760119438171387, "learning_rate": 1.838374695863747e-05, "loss": 1.4018, "step": 325500 }, { "epoch": 6.331550802139038, "grad_norm": 3.6233794689178467, "learning_rate": 1.8374014598540147e-05, "loss": 1.5359, "step": 325600 }, { "epoch": 6.333495381623724, "grad_norm": 4.903733730316162, "learning_rate": 1.8364282238442824e-05, "loss": 1.496, "step": 325700 }, { "epoch": 6.335439961108411, "grad_norm": 5.383787631988525, "learning_rate": 1.83545498783455e-05, "loss": 1.4162, "step": 325800 }, { "epoch": 6.337384540593097, "grad_norm": 4.269700527191162, "learning_rate": 1.834491484184915e-05, "loss": 1.5169, "step": 325900 }, { "epoch": 6.3393291200777835, "grad_norm": 13.533833503723145, "learning_rate": 1.8335182481751826e-05, "loss": 1.5429, "step": 326000 }, { "epoch": 6.3412736995624694, "grad_norm": 6.302928447723389, "learning_rate": 1.8325450121654503e-05, "loss": 1.44, "step": 326100 }, { "epoch": 6.343218279047156, "grad_norm": 4.290900707244873, "learning_rate": 1.8315815085158154e-05, "loss": 1.4061, "step": 326200 }, { "epoch": 6.345162858531842, "grad_norm": 3.0899057388305664, "learning_rate": 1.8306082725060828e-05, "loss": 1.4328, "step": 326300 }, { "epoch": 6.347107438016529, "grad_norm": 8.438668251037598, "learning_rate": 1.8296350364963505e-05, "loss": 1.3774, "step": 326400 }, { "epoch": 6.349052017501215, "grad_norm": 3.9385383129119873, "learning_rate": 1.8286618004866182e-05, "loss": 1.6306, "step": 326500 }, { "epoch": 6.350996596985902, "grad_norm": 7.61837911605835, "learning_rate": 1.8276885644768856e-05, "loss": 1.5477, "step": 326600 }, { "epoch": 6.352941176470588, "grad_norm": 7.709361553192139, "learning_rate": 1.8267153284671536e-05, "loss": 1.4942, "step": 326700 }, { "epoch": 6.354885755955275, "grad_norm": 3.870912790298462, "learning_rate": 1.825742092457421e-05, "loss": 1.4777, "step": 326800 }, { "epoch": 6.356830335439961, "grad_norm": 3.4782419204711914, "learning_rate": 1.8247688564476887e-05, "loss": 1.7029, "step": 326900 }, { "epoch": 6.3587749149246475, "grad_norm": 3.6134583950042725, "learning_rate": 1.8237956204379564e-05, "loss": 1.3732, "step": 327000 }, { "epoch": 6.360719494409334, "grad_norm": 2.942739963531494, "learning_rate": 1.8228223844282237e-05, "loss": 1.583, "step": 327100 }, { "epoch": 6.36266407389402, "grad_norm": 5.727865695953369, "learning_rate": 1.8218491484184914e-05, "loss": 1.4764, "step": 327200 }, { "epoch": 6.364608653378707, "grad_norm": 2.9978928565979004, "learning_rate": 1.8208759124087595e-05, "loss": 1.4576, "step": 327300 }, { "epoch": 6.366553232863393, "grad_norm": 4.862885475158691, "learning_rate": 1.8199026763990268e-05, "loss": 1.4077, "step": 327400 }, { "epoch": 6.36849781234808, "grad_norm": 8.506243705749512, "learning_rate": 1.8189294403892945e-05, "loss": 1.5823, "step": 327500 }, { "epoch": 6.370442391832766, "grad_norm": 3.633310079574585, "learning_rate": 1.8179562043795622e-05, "loss": 1.5548, "step": 327600 }, { "epoch": 6.372386971317453, "grad_norm": 14.865370750427246, "learning_rate": 1.8169829683698296e-05, "loss": 1.4591, "step": 327700 }, { "epoch": 6.374331550802139, "grad_norm": 2.1714751720428467, "learning_rate": 1.8160097323600976e-05, "loss": 1.508, "step": 327800 }, { "epoch": 6.376276130286826, "grad_norm": 2.592644214630127, "learning_rate": 1.815036496350365e-05, "loss": 1.4935, "step": 327900 }, { "epoch": 6.378220709771512, "grad_norm": 2.9890706539154053, "learning_rate": 1.8140632603406327e-05, "loss": 1.3911, "step": 328000 }, { "epoch": 6.380165289256198, "grad_norm": 3.924994707107544, "learning_rate": 1.8130900243309004e-05, "loss": 1.3515, "step": 328100 }, { "epoch": 6.382109868740884, "grad_norm": 5.209507465362549, "learning_rate": 1.8121167883211678e-05, "loss": 1.4534, "step": 328200 }, { "epoch": 6.384054448225571, "grad_norm": 6.166680812835693, "learning_rate": 1.8111435523114355e-05, "loss": 1.4525, "step": 328300 }, { "epoch": 6.385999027710258, "grad_norm": 2.0258662700653076, "learning_rate": 1.8101703163017035e-05, "loss": 1.475, "step": 328400 }, { "epoch": 6.387943607194944, "grad_norm": 5.197487831115723, "learning_rate": 1.809197080291971e-05, "loss": 1.4274, "step": 328500 }, { "epoch": 6.389888186679631, "grad_norm": 4.809579372406006, "learning_rate": 1.8082238442822386e-05, "loss": 1.5105, "step": 328600 }, { "epoch": 6.391832766164317, "grad_norm": 2.73152232170105, "learning_rate": 1.8072506082725063e-05, "loss": 1.3838, "step": 328700 }, { "epoch": 6.393777345649004, "grad_norm": 8.15272331237793, "learning_rate": 1.8062773722627736e-05, "loss": 1.5323, "step": 328800 }, { "epoch": 6.39572192513369, "grad_norm": 5.3758111000061035, "learning_rate": 1.8053041362530417e-05, "loss": 1.5679, "step": 328900 }, { "epoch": 6.3976665046183765, "grad_norm": 3.4386982917785645, "learning_rate": 1.804330900243309e-05, "loss": 1.3601, "step": 329000 }, { "epoch": 6.3996110841030625, "grad_norm": 5.464317798614502, "learning_rate": 1.8033576642335767e-05, "loss": 1.4125, "step": 329100 }, { "epoch": 6.401555663587749, "grad_norm": 4.890077114105225, "learning_rate": 1.8023844282238444e-05, "loss": 1.6096, "step": 329200 }, { "epoch": 6.403500243072435, "grad_norm": 3.8405470848083496, "learning_rate": 1.8014111922141118e-05, "loss": 1.3999, "step": 329300 }, { "epoch": 6.405444822557122, "grad_norm": 3.627624988555908, "learning_rate": 1.8004379562043795e-05, "loss": 1.5145, "step": 329400 }, { "epoch": 6.407389402041808, "grad_norm": 5.940077781677246, "learning_rate": 1.7994647201946476e-05, "loss": 1.5194, "step": 329500 }, { "epoch": 6.409333981526495, "grad_norm": 3.9798583984375, "learning_rate": 1.798491484184915e-05, "loss": 1.4971, "step": 329600 }, { "epoch": 6.411278561011182, "grad_norm": 2.8324646949768066, "learning_rate": 1.79752798053528e-05, "loss": 1.562, "step": 329700 }, { "epoch": 6.413223140495868, "grad_norm": 4.713772296905518, "learning_rate": 1.7965547445255477e-05, "loss": 1.3959, "step": 329800 }, { "epoch": 6.415167719980555, "grad_norm": 3.7149033546447754, "learning_rate": 1.795581508515815e-05, "loss": 1.5866, "step": 329900 }, { "epoch": 6.4171122994652405, "grad_norm": 8.004486083984375, "learning_rate": 1.7946082725060828e-05, "loss": 1.5021, "step": 330000 }, { "epoch": 6.4171122994652405, "eval_accuracy": 0.5576388888888889, "eval_f1": 0.5434826268018718, "eval_loss": 1.1798685789108276, "eval_precision": 0.5561496879985319, "eval_recall": 0.5576388888888889, "eval_runtime": 11711.0828, "eval_samples_per_second": 15.37, "eval_steps_per_second": 0.48, "step": 330000 }, { "epoch": 6.419056878949927, "grad_norm": 2.6609749794006348, "learning_rate": 1.7936350364963505e-05, "loss": 1.4048, "step": 330100 }, { "epoch": 6.421001458434613, "grad_norm": 5.0934271812438965, "learning_rate": 1.7926618004866182e-05, "loss": 1.456, "step": 330200 }, { "epoch": 6.4229460379193, "grad_norm": 3.591728925704956, "learning_rate": 1.791688564476886e-05, "loss": 1.3673, "step": 330300 }, { "epoch": 6.424890617403986, "grad_norm": 5.462815761566162, "learning_rate": 1.7907153284671536e-05, "loss": 1.5172, "step": 330400 }, { "epoch": 6.426835196888673, "grad_norm": 3.7901976108551025, "learning_rate": 1.789742092457421e-05, "loss": 1.4558, "step": 330500 }, { "epoch": 6.428779776373359, "grad_norm": 11.196703910827637, "learning_rate": 1.7887688564476887e-05, "loss": 1.5534, "step": 330600 }, { "epoch": 6.430724355858046, "grad_norm": 3.977976083755493, "learning_rate": 1.787795620437956e-05, "loss": 1.6568, "step": 330700 }, { "epoch": 6.432668935342732, "grad_norm": 1.8892567157745361, "learning_rate": 1.786822384428224e-05, "loss": 1.6632, "step": 330800 }, { "epoch": 6.434613514827419, "grad_norm": 3.3128931522369385, "learning_rate": 1.7858491484184918e-05, "loss": 1.6218, "step": 330900 }, { "epoch": 6.436558094312105, "grad_norm": 4.3868327140808105, "learning_rate": 1.784875912408759e-05, "loss": 1.5248, "step": 331000 }, { "epoch": 6.438502673796791, "grad_norm": 3.0118391513824463, "learning_rate": 1.783902676399027e-05, "loss": 1.5576, "step": 331100 }, { "epoch": 6.440447253281478, "grad_norm": 8.080787658691406, "learning_rate": 1.7829294403892945e-05, "loss": 1.5306, "step": 331200 }, { "epoch": 6.442391832766164, "grad_norm": 12.02663516998291, "learning_rate": 1.781956204379562e-05, "loss": 1.4924, "step": 331300 }, { "epoch": 6.444336412250851, "grad_norm": 18.309213638305664, "learning_rate": 1.78098296836983e-05, "loss": 1.5761, "step": 331400 }, { "epoch": 6.446280991735537, "grad_norm": 5.691696643829346, "learning_rate": 1.7800097323600973e-05, "loss": 1.4572, "step": 331500 }, { "epoch": 6.448225571220224, "grad_norm": 9.501502990722656, "learning_rate": 1.779036496350365e-05, "loss": 1.4607, "step": 331600 }, { "epoch": 6.45017015070491, "grad_norm": 3.5113320350646973, "learning_rate": 1.7780632603406327e-05, "loss": 1.5919, "step": 331700 }, { "epoch": 6.452114730189597, "grad_norm": 3.375302791595459, "learning_rate": 1.7770900243309e-05, "loss": 1.5204, "step": 331800 }, { "epoch": 6.454059309674283, "grad_norm": 6.950946807861328, "learning_rate": 1.776116788321168e-05, "loss": 1.4458, "step": 331900 }, { "epoch": 6.4560038891589695, "grad_norm": 3.9952073097229004, "learning_rate": 1.7751435523114358e-05, "loss": 1.3093, "step": 332000 }, { "epoch": 6.4579484686436555, "grad_norm": 3.7535219192504883, "learning_rate": 1.7741703163017032e-05, "loss": 1.4115, "step": 332100 }, { "epoch": 6.459893048128342, "grad_norm": 6.246533393859863, "learning_rate": 1.773197080291971e-05, "loss": 1.5079, "step": 332200 }, { "epoch": 6.461837627613028, "grad_norm": 5.043905735015869, "learning_rate": 1.7722238442822386e-05, "loss": 1.4254, "step": 332300 }, { "epoch": 6.463782207097715, "grad_norm": 4.572514533996582, "learning_rate": 1.771250608272506e-05, "loss": 1.5916, "step": 332400 }, { "epoch": 6.465726786582402, "grad_norm": 2.78344988822937, "learning_rate": 1.770277372262774e-05, "loss": 1.5773, "step": 332500 }, { "epoch": 6.467671366067088, "grad_norm": 5.235777854919434, "learning_rate": 1.7693041362530413e-05, "loss": 1.3315, "step": 332600 }, { "epoch": 6.469615945551775, "grad_norm": 3.218252658843994, "learning_rate": 1.768330900243309e-05, "loss": 1.5472, "step": 332700 }, { "epoch": 6.471560525036461, "grad_norm": 6.704249382019043, "learning_rate": 1.7673576642335768e-05, "loss": 1.404, "step": 332800 }, { "epoch": 6.473505104521148, "grad_norm": 1.0339585542678833, "learning_rate": 1.766384428223844e-05, "loss": 1.3682, "step": 332900 }, { "epoch": 6.4754496840058335, "grad_norm": 4.811272621154785, "learning_rate": 1.765411192214112e-05, "loss": 1.3941, "step": 333000 }, { "epoch": 6.47739426349052, "grad_norm": 3.7223634719848633, "learning_rate": 1.76443795620438e-05, "loss": 1.3849, "step": 333100 }, { "epoch": 6.479338842975206, "grad_norm": 10.399712562561035, "learning_rate": 1.7634647201946472e-05, "loss": 1.3881, "step": 333200 }, { "epoch": 6.481283422459893, "grad_norm": 3.136148691177368, "learning_rate": 1.762491484184915e-05, "loss": 1.353, "step": 333300 }, { "epoch": 6.483228001944579, "grad_norm": 5.214229583740234, "learning_rate": 1.7615182481751826e-05, "loss": 1.4209, "step": 333400 }, { "epoch": 6.485172581429266, "grad_norm": 3.1538076400756836, "learning_rate": 1.76054501216545e-05, "loss": 1.4726, "step": 333500 }, { "epoch": 6.487117160913952, "grad_norm": 3.081408739089966, "learning_rate": 1.759581508515815e-05, "loss": 1.5135, "step": 333600 }, { "epoch": 6.489061740398639, "grad_norm": 5.243956089019775, "learning_rate": 1.7586082725060828e-05, "loss": 1.4828, "step": 333700 }, { "epoch": 6.491006319883326, "grad_norm": 2.6874594688415527, "learning_rate": 1.7576350364963505e-05, "loss": 1.3674, "step": 333800 }, { "epoch": 6.492950899368012, "grad_norm": 5.550246715545654, "learning_rate": 1.7566618004866182e-05, "loss": 1.6076, "step": 333900 }, { "epoch": 6.4948954788526985, "grad_norm": 4.1433892250061035, "learning_rate": 1.755688564476886e-05, "loss": 1.4392, "step": 334000 }, { "epoch": 6.496840058337384, "grad_norm": 2.2852768898010254, "learning_rate": 1.7547153284671533e-05, "loss": 1.3678, "step": 334100 }, { "epoch": 6.498784637822071, "grad_norm": 2.6213388442993164, "learning_rate": 1.753742092457421e-05, "loss": 1.3752, "step": 334200 }, { "epoch": 6.500729217306757, "grad_norm": 4.162505626678467, "learning_rate": 1.7527688564476887e-05, "loss": 1.5656, "step": 334300 }, { "epoch": 6.502673796791444, "grad_norm": 2.4854419231414795, "learning_rate": 1.7517956204379564e-05, "loss": 1.4469, "step": 334400 }, { "epoch": 6.50461837627613, "grad_norm": 4.294705390930176, "learning_rate": 1.750822384428224e-05, "loss": 1.3801, "step": 334500 }, { "epoch": 6.506562955760817, "grad_norm": 3.2983334064483643, "learning_rate": 1.7498491484184914e-05, "loss": 1.551, "step": 334600 }, { "epoch": 6.508507535245503, "grad_norm": 6.3215203285217285, "learning_rate": 1.748875912408759e-05, "loss": 1.3941, "step": 334700 }, { "epoch": 6.51045211473019, "grad_norm": 13.326635360717773, "learning_rate": 1.747902676399027e-05, "loss": 1.5555, "step": 334800 }, { "epoch": 6.512396694214876, "grad_norm": 3.295361280441284, "learning_rate": 1.7469294403892945e-05, "loss": 1.5386, "step": 334900 }, { "epoch": 6.5143412736995625, "grad_norm": 4.825199604034424, "learning_rate": 1.7459562043795622e-05, "loss": 1.6022, "step": 335000 }, { "epoch": 6.5143412736995625, "eval_accuracy": 0.5617277777777778, "eval_f1": 0.550425035616443, "eval_loss": 1.1721656322479248, "eval_precision": 0.5579114859218491, "eval_recall": 0.5617277777777778, "eval_runtime": 11692.1621, "eval_samples_per_second": 15.395, "eval_steps_per_second": 0.481, "step": 335000 }, { "epoch": 6.5162858531842485, "grad_norm": 3.483297824859619, "learning_rate": 1.74498296836983e-05, "loss": 1.5153, "step": 335100 }, { "epoch": 6.518230432668935, "grad_norm": 4.646646499633789, "learning_rate": 1.7440097323600973e-05, "loss": 1.3967, "step": 335200 }, { "epoch": 6.520175012153622, "grad_norm": 6.400533199310303, "learning_rate": 1.743036496350365e-05, "loss": 1.5921, "step": 335300 }, { "epoch": 6.522119591638308, "grad_norm": 2.6917943954467773, "learning_rate": 1.7420632603406327e-05, "loss": 1.3792, "step": 335400 }, { "epoch": 6.524064171122995, "grad_norm": 1.328212022781372, "learning_rate": 1.7410900243309004e-05, "loss": 1.55, "step": 335500 }, { "epoch": 6.526008750607681, "grad_norm": 5.014604568481445, "learning_rate": 1.740116788321168e-05, "loss": 1.5901, "step": 335600 }, { "epoch": 6.527953330092368, "grad_norm": 6.698209285736084, "learning_rate": 1.7391435523114355e-05, "loss": 1.4781, "step": 335700 }, { "epoch": 6.529897909577054, "grad_norm": 3.1309876441955566, "learning_rate": 1.7381703163017032e-05, "loss": 1.5735, "step": 335800 }, { "epoch": 6.531842489061741, "grad_norm": 4.271028995513916, "learning_rate": 1.737197080291971e-05, "loss": 1.4074, "step": 335900 }, { "epoch": 6.5337870685464265, "grad_norm": 4.41569185256958, "learning_rate": 1.7362238442822386e-05, "loss": 1.3867, "step": 336000 }, { "epoch": 6.535731648031113, "grad_norm": 5.331923007965088, "learning_rate": 1.7352506082725063e-05, "loss": 1.4498, "step": 336100 }, { "epoch": 6.537676227515799, "grad_norm": 4.839564323425293, "learning_rate": 1.734277372262774e-05, "loss": 1.4848, "step": 336200 }, { "epoch": 6.539620807000486, "grad_norm": 6.165761470794678, "learning_rate": 1.7333041362530414e-05, "loss": 1.5712, "step": 336300 }, { "epoch": 6.541565386485173, "grad_norm": 5.009347438812256, "learning_rate": 1.732330900243309e-05, "loss": 1.5043, "step": 336400 }, { "epoch": 6.543509965969859, "grad_norm": 6.3258209228515625, "learning_rate": 1.7313576642335768e-05, "loss": 1.5121, "step": 336500 }, { "epoch": 6.545454545454545, "grad_norm": 2.7236430644989014, "learning_rate": 1.7303844282238445e-05, "loss": 1.3286, "step": 336600 }, { "epoch": 6.547399124939232, "grad_norm": 3.588529348373413, "learning_rate": 1.729411192214112e-05, "loss": 1.5076, "step": 336700 }, { "epoch": 6.549343704423919, "grad_norm": 3.3619768619537354, "learning_rate": 1.7284379562043795e-05, "loss": 1.5602, "step": 336800 }, { "epoch": 6.551288283908605, "grad_norm": 10.99946117401123, "learning_rate": 1.7274647201946472e-05, "loss": 1.439, "step": 336900 }, { "epoch": 6.5532328633932915, "grad_norm": 4.8366804122924805, "learning_rate": 1.726491484184915e-05, "loss": 1.6776, "step": 337000 }, { "epoch": 6.555177442877977, "grad_norm": 10.986165046691895, "learning_rate": 1.7255182481751826e-05, "loss": 1.4525, "step": 337100 }, { "epoch": 6.557122022362664, "grad_norm": 2.866612434387207, "learning_rate": 1.7245450121654503e-05, "loss": 1.4828, "step": 337200 }, { "epoch": 6.55906660184735, "grad_norm": 5.182183265686035, "learning_rate": 1.723571776155718e-05, "loss": 1.4712, "step": 337300 }, { "epoch": 6.561011181332037, "grad_norm": 5.299014091491699, "learning_rate": 1.7226082725060828e-05, "loss": 1.5102, "step": 337400 }, { "epoch": 6.562955760816723, "grad_norm": 3.698068618774414, "learning_rate": 1.7216350364963505e-05, "loss": 1.6026, "step": 337500 }, { "epoch": 6.56490034030141, "grad_norm": 3.4090359210968018, "learning_rate": 1.7206618004866182e-05, "loss": 1.5523, "step": 337600 }, { "epoch": 6.566844919786096, "grad_norm": 11.455697059631348, "learning_rate": 1.7196885644768856e-05, "loss": 1.4182, "step": 337700 }, { "epoch": 6.568789499270783, "grad_norm": 6.9990458488464355, "learning_rate": 1.7187153284671533e-05, "loss": 1.4352, "step": 337800 }, { "epoch": 6.5707340787554696, "grad_norm": 34.071693420410156, "learning_rate": 1.717742092457421e-05, "loss": 1.5181, "step": 337900 }, { "epoch": 6.5726786582401555, "grad_norm": 3.4173805713653564, "learning_rate": 1.7167688564476887e-05, "loss": 1.3663, "step": 338000 }, { "epoch": 6.574623237724842, "grad_norm": 2.3222148418426514, "learning_rate": 1.7157956204379564e-05, "loss": 1.4591, "step": 338100 }, { "epoch": 6.576567817209528, "grad_norm": 3.1245806217193604, "learning_rate": 1.7148223844282237e-05, "loss": 1.4121, "step": 338200 }, { "epoch": 6.578512396694215, "grad_norm": 3.5700721740722656, "learning_rate": 1.7138491484184914e-05, "loss": 1.386, "step": 338300 }, { "epoch": 6.580456976178901, "grad_norm": 4.116962909698486, "learning_rate": 1.712875912408759e-05, "loss": 1.4302, "step": 338400 }, { "epoch": 6.582401555663588, "grad_norm": 7.680058002471924, "learning_rate": 1.711902676399027e-05, "loss": 1.4046, "step": 338500 }, { "epoch": 6.584346135148274, "grad_norm": 6.593456268310547, "learning_rate": 1.7109294403892945e-05, "loss": 1.3969, "step": 338600 }, { "epoch": 6.586290714632961, "grad_norm": 5.30784273147583, "learning_rate": 1.7099562043795622e-05, "loss": 1.4895, "step": 338700 }, { "epoch": 6.588235294117647, "grad_norm": 12.807516098022461, "learning_rate": 1.7089829683698296e-05, "loss": 1.5716, "step": 338800 }, { "epoch": 6.590179873602334, "grad_norm": 4.735255718231201, "learning_rate": 1.7080097323600973e-05, "loss": 1.2893, "step": 338900 }, { "epoch": 6.5921244530870196, "grad_norm": 5.847226142883301, "learning_rate": 1.707036496350365e-05, "loss": 1.476, "step": 339000 }, { "epoch": 6.594069032571706, "grad_norm": 3.642072916030884, "learning_rate": 1.7060632603406327e-05, "loss": 1.4767, "step": 339100 }, { "epoch": 6.596013612056392, "grad_norm": 6.403225421905518, "learning_rate": 1.7050900243309004e-05, "loss": 1.6147, "step": 339200 }, { "epoch": 6.597958191541079, "grad_norm": 4.9601640701293945, "learning_rate": 1.7041167883211678e-05, "loss": 1.4141, "step": 339300 }, { "epoch": 6.599902771025766, "grad_norm": 5.796444416046143, "learning_rate": 1.7031435523114355e-05, "loss": 1.5965, "step": 339400 }, { "epoch": 6.601847350510452, "grad_norm": 7.419425964355469, "learning_rate": 1.7021703163017032e-05, "loss": 1.4093, "step": 339500 }, { "epoch": 6.603791929995139, "grad_norm": 4.677413463592529, "learning_rate": 1.701197080291971e-05, "loss": 1.397, "step": 339600 }, { "epoch": 6.605736509479825, "grad_norm": 5.550114154815674, "learning_rate": 1.7002238442822386e-05, "loss": 1.521, "step": 339700 }, { "epoch": 6.607681088964512, "grad_norm": 3.383683443069458, "learning_rate": 1.6992506082725063e-05, "loss": 1.316, "step": 339800 }, { "epoch": 6.609625668449198, "grad_norm": 2.2930819988250732, "learning_rate": 1.6982773722627737e-05, "loss": 1.4342, "step": 339900 }, { "epoch": 6.6115702479338845, "grad_norm": 4.438930511474609, "learning_rate": 1.6973041362530414e-05, "loss": 1.5354, "step": 340000 }, { "epoch": 6.6115702479338845, "eval_accuracy": 0.56435, "eval_f1": 0.5540698659975835, "eval_loss": 1.1631097793579102, "eval_precision": 0.5667549680241949, "eval_recall": 0.56435, "eval_runtime": 11662.0991, "eval_samples_per_second": 15.435, "eval_steps_per_second": 0.482, "step": 340000 }, { "epoch": 6.61351482741857, "grad_norm": 6.308751106262207, "learning_rate": 1.696330900243309e-05, "loss": 1.3571, "step": 340100 }, { "epoch": 6.615459406903257, "grad_norm": 4.354719161987305, "learning_rate": 1.6953576642335768e-05, "loss": 1.5734, "step": 340200 }, { "epoch": 6.617403986387943, "grad_norm": 2.6351795196533203, "learning_rate": 1.6943844282238445e-05, "loss": 1.5163, "step": 340300 }, { "epoch": 6.61934856587263, "grad_norm": 4.985274791717529, "learning_rate": 1.6934111922141118e-05, "loss": 1.3956, "step": 340400 }, { "epoch": 6.621293145357317, "grad_norm": 4.434135913848877, "learning_rate": 1.6924379562043795e-05, "loss": 1.4153, "step": 340500 }, { "epoch": 6.623237724842003, "grad_norm": 3.725522518157959, "learning_rate": 1.6914647201946472e-05, "loss": 1.4067, "step": 340600 }, { "epoch": 6.625182304326689, "grad_norm": 6.436718463897705, "learning_rate": 1.690491484184915e-05, "loss": 1.4348, "step": 340700 }, { "epoch": 6.627126883811376, "grad_norm": 2.1100080013275146, "learning_rate": 1.6895182481751826e-05, "loss": 1.4665, "step": 340800 }, { "epoch": 6.629071463296063, "grad_norm": 10.948493003845215, "learning_rate": 1.6885450121654503e-05, "loss": 1.545, "step": 340900 }, { "epoch": 6.6310160427807485, "grad_norm": 9.55878734588623, "learning_rate": 1.6875717761557177e-05, "loss": 1.4269, "step": 341000 }, { "epoch": 6.632960622265435, "grad_norm": 7.7770490646362305, "learning_rate": 1.6865985401459854e-05, "loss": 1.3564, "step": 341100 }, { "epoch": 6.634905201750121, "grad_norm": 4.3352437019348145, "learning_rate": 1.685625304136253e-05, "loss": 1.3302, "step": 341200 }, { "epoch": 6.636849781234808, "grad_norm": 6.935305118560791, "learning_rate": 1.6846520681265208e-05, "loss": 1.4178, "step": 341300 }, { "epoch": 6.638794360719494, "grad_norm": 5.156348705291748, "learning_rate": 1.6836885644768856e-05, "loss": 1.4164, "step": 341400 }, { "epoch": 6.640738940204181, "grad_norm": 4.420572757720947, "learning_rate": 1.6827153284671536e-05, "loss": 1.3803, "step": 341500 }, { "epoch": 6.642683519688867, "grad_norm": 6.000495433807373, "learning_rate": 1.681742092457421e-05, "loss": 1.5775, "step": 341600 }, { "epoch": 6.644628099173554, "grad_norm": 6.442324638366699, "learning_rate": 1.6807688564476887e-05, "loss": 1.4531, "step": 341700 }, { "epoch": 6.64657267865824, "grad_norm": 4.09006404876709, "learning_rate": 1.6797956204379564e-05, "loss": 1.6233, "step": 341800 }, { "epoch": 6.648517258142927, "grad_norm": 5.611737251281738, "learning_rate": 1.6788223844282237e-05, "loss": 1.4366, "step": 341900 }, { "epoch": 6.6504618376276134, "grad_norm": 6.341790199279785, "learning_rate": 1.6778491484184914e-05, "loss": 1.4899, "step": 342000 }, { "epoch": 6.652406417112299, "grad_norm": 2.507185220718384, "learning_rate": 1.676875912408759e-05, "loss": 1.4488, "step": 342100 }, { "epoch": 6.654350996596986, "grad_norm": 21.71469497680664, "learning_rate": 1.675902676399027e-05, "loss": 1.5519, "step": 342200 }, { "epoch": 6.656295576081672, "grad_norm": 4.010864734649658, "learning_rate": 1.6749294403892946e-05, "loss": 1.4279, "step": 342300 }, { "epoch": 6.658240155566359, "grad_norm": 6.308621883392334, "learning_rate": 1.673956204379562e-05, "loss": 1.4199, "step": 342400 }, { "epoch": 6.660184735051045, "grad_norm": 2.805286169052124, "learning_rate": 1.6729829683698296e-05, "loss": 1.3855, "step": 342500 }, { "epoch": 6.662129314535732, "grad_norm": 3.0020692348480225, "learning_rate": 1.6720097323600977e-05, "loss": 1.2867, "step": 342600 }, { "epoch": 6.664073894020418, "grad_norm": 6.344849109649658, "learning_rate": 1.671036496350365e-05, "loss": 1.4753, "step": 342700 }, { "epoch": 6.666018473505105, "grad_norm": 3.46720290184021, "learning_rate": 1.6700632603406327e-05, "loss": 1.408, "step": 342800 }, { "epoch": 6.667963052989791, "grad_norm": 2.506078004837036, "learning_rate": 1.6690900243309004e-05, "loss": 1.5002, "step": 342900 }, { "epoch": 6.6699076324744775, "grad_norm": 4.867034912109375, "learning_rate": 1.6681167883211678e-05, "loss": 1.3624, "step": 343000 }, { "epoch": 6.6718522119591634, "grad_norm": 9.229085922241211, "learning_rate": 1.6671435523114355e-05, "loss": 1.63, "step": 343100 }, { "epoch": 6.67379679144385, "grad_norm": 6.985474586486816, "learning_rate": 1.6661703163017032e-05, "loss": 1.6616, "step": 343200 }, { "epoch": 6.675741370928536, "grad_norm": 14.7839994430542, "learning_rate": 1.665197080291971e-05, "loss": 1.5392, "step": 343300 }, { "epoch": 6.677685950413223, "grad_norm": 5.303781986236572, "learning_rate": 1.6642238442822386e-05, "loss": 1.457, "step": 343400 }, { "epoch": 6.67963052989791, "grad_norm": 6.053272724151611, "learning_rate": 1.6632603406326037e-05, "loss": 1.4926, "step": 343500 }, { "epoch": 6.681575109382596, "grad_norm": 14.258841514587402, "learning_rate": 1.662287104622871e-05, "loss": 1.5883, "step": 343600 }, { "epoch": 6.683519688867283, "grad_norm": 6.282293796539307, "learning_rate": 1.6613138686131388e-05, "loss": 1.3948, "step": 343700 }, { "epoch": 6.685464268351969, "grad_norm": 9.199627876281738, "learning_rate": 1.660340632603406e-05, "loss": 1.5522, "step": 343800 }, { "epoch": 6.687408847836656, "grad_norm": 8.75355339050293, "learning_rate": 1.6593673965936742e-05, "loss": 1.554, "step": 343900 }, { "epoch": 6.6893534273213415, "grad_norm": 7.012129306793213, "learning_rate": 1.658394160583942e-05, "loss": 1.6001, "step": 344000 }, { "epoch": 6.691298006806028, "grad_norm": 3.815718173980713, "learning_rate": 1.6574209245742092e-05, "loss": 1.3876, "step": 344100 }, { "epoch": 6.693242586290714, "grad_norm": 4.201666355133057, "learning_rate": 1.656447688564477e-05, "loss": 1.5227, "step": 344200 }, { "epoch": 6.695187165775401, "grad_norm": 7.222345352172852, "learning_rate": 1.6554744525547446e-05, "loss": 1.3518, "step": 344300 }, { "epoch": 6.697131745260087, "grad_norm": 6.64124059677124, "learning_rate": 1.654501216545012e-05, "loss": 1.5711, "step": 344400 }, { "epoch": 6.699076324744774, "grad_norm": 5.359579086303711, "learning_rate": 1.65352798053528e-05, "loss": 1.587, "step": 344500 }, { "epoch": 6.701020904229461, "grad_norm": 4.226009845733643, "learning_rate": 1.6525547445255477e-05, "loss": 1.3944, "step": 344600 }, { "epoch": 6.702965483714147, "grad_norm": 8.229913711547852, "learning_rate": 1.651581508515815e-05, "loss": 1.4328, "step": 344700 }, { "epoch": 6.704910063198833, "grad_norm": 2.106302499771118, "learning_rate": 1.6506082725060828e-05, "loss": 1.5257, "step": 344800 }, { "epoch": 6.70685464268352, "grad_norm": 2.9660022258758545, "learning_rate": 1.6496350364963502e-05, "loss": 1.4159, "step": 344900 }, { "epoch": 6.7087992221682065, "grad_norm": 7.217944145202637, "learning_rate": 1.6486618004866182e-05, "loss": 1.4264, "step": 345000 }, { "epoch": 6.7087992221682065, "eval_accuracy": 0.5626444444444444, "eval_f1": 0.5484271304409575, "eval_loss": 1.1693283319473267, "eval_precision": 0.5640365916009155, "eval_recall": 0.5626444444444445, "eval_runtime": 11717.1636, "eval_samples_per_second": 15.362, "eval_steps_per_second": 0.48, "step": 345000 }, { "epoch": 6.710743801652892, "grad_norm": 4.608566761016846, "learning_rate": 1.647688564476886e-05, "loss": 1.3899, "step": 345100 }, { "epoch": 6.712688381137579, "grad_norm": 5.415187835693359, "learning_rate": 1.6467153284671533e-05, "loss": 1.5599, "step": 345200 }, { "epoch": 6.714632960622265, "grad_norm": 6.185451984405518, "learning_rate": 1.645742092457421e-05, "loss": 1.4563, "step": 345300 }, { "epoch": 6.716577540106952, "grad_norm": 3.846397638320923, "learning_rate": 1.6447688564476887e-05, "loss": 1.4619, "step": 345400 }, { "epoch": 6.718522119591638, "grad_norm": 1.9868512153625488, "learning_rate": 1.6438053527980535e-05, "loss": 1.3234, "step": 345500 }, { "epoch": 6.720466699076325, "grad_norm": 3.262474536895752, "learning_rate": 1.642832116788321e-05, "loss": 1.565, "step": 345600 }, { "epoch": 6.722411278561011, "grad_norm": 3.690235137939453, "learning_rate": 1.641858880778589e-05, "loss": 1.379, "step": 345700 }, { "epoch": 6.724355858045698, "grad_norm": 6.004395961761475, "learning_rate": 1.6408856447688566e-05, "loss": 1.5487, "step": 345800 }, { "epoch": 6.726300437530384, "grad_norm": 5.085541248321533, "learning_rate": 1.6399124087591243e-05, "loss": 1.4018, "step": 345900 }, { "epoch": 6.7282450170150705, "grad_norm": 8.185060501098633, "learning_rate": 1.638939172749392e-05, "loss": 1.5554, "step": 346000 }, { "epoch": 6.730189596499757, "grad_norm": 6.570384979248047, "learning_rate": 1.6379659367396593e-05, "loss": 1.3807, "step": 346100 }, { "epoch": 6.732134175984443, "grad_norm": 6.303893089294434, "learning_rate": 1.636992700729927e-05, "loss": 1.4999, "step": 346200 }, { "epoch": 6.73407875546913, "grad_norm": 3.841564178466797, "learning_rate": 1.6360194647201947e-05, "loss": 1.4231, "step": 346300 }, { "epoch": 6.736023334953816, "grad_norm": 5.237590789794922, "learning_rate": 1.6350462287104624e-05, "loss": 1.4526, "step": 346400 }, { "epoch": 6.737967914438503, "grad_norm": 8.058945655822754, "learning_rate": 1.63407299270073e-05, "loss": 1.5816, "step": 346500 }, { "epoch": 6.739912493923189, "grad_norm": 6.059102535247803, "learning_rate": 1.633109489051095e-05, "loss": 1.5959, "step": 346600 }, { "epoch": 6.741857073407876, "grad_norm": 26.284774780273438, "learning_rate": 1.6321362530413626e-05, "loss": 1.505, "step": 346700 }, { "epoch": 6.743801652892562, "grad_norm": 3.7712631225585938, "learning_rate": 1.6311630170316303e-05, "loss": 1.2947, "step": 346800 }, { "epoch": 6.745746232377249, "grad_norm": 2.9632151126861572, "learning_rate": 1.630189781021898e-05, "loss": 1.3854, "step": 346900 }, { "epoch": 6.7476908118619345, "grad_norm": 4.552134990692139, "learning_rate": 1.6292165450121654e-05, "loss": 1.7654, "step": 347000 }, { "epoch": 6.749635391346621, "grad_norm": 6.002095699310303, "learning_rate": 1.628243309002433e-05, "loss": 1.3629, "step": 347100 }, { "epoch": 6.751579970831307, "grad_norm": 9.015872955322266, "learning_rate": 1.6272700729927008e-05, "loss": 1.5527, "step": 347200 }, { "epoch": 6.753524550315994, "grad_norm": 8.625974655151367, "learning_rate": 1.6262968369829685e-05, "loss": 1.645, "step": 347300 }, { "epoch": 6.75546912980068, "grad_norm": 4.3711042404174805, "learning_rate": 1.6253236009732362e-05, "loss": 1.391, "step": 347400 }, { "epoch": 6.757413709285367, "grad_norm": 2.224710464477539, "learning_rate": 1.6243503649635035e-05, "loss": 1.3881, "step": 347500 }, { "epoch": 6.759358288770054, "grad_norm": 5.197736740112305, "learning_rate": 1.6233771289537712e-05, "loss": 1.3868, "step": 347600 }, { "epoch": 6.76130286825474, "grad_norm": 7.163862228393555, "learning_rate": 1.622403892944039e-05, "loss": 1.3672, "step": 347700 }, { "epoch": 6.763247447739427, "grad_norm": 15.33387565612793, "learning_rate": 1.6214306569343066e-05, "loss": 1.4202, "step": 347800 }, { "epoch": 6.765192027224113, "grad_norm": 3.8585872650146484, "learning_rate": 1.6204574209245743e-05, "loss": 1.4322, "step": 347900 }, { "epoch": 6.7671366067087995, "grad_norm": 4.453078746795654, "learning_rate": 1.619484184914842e-05, "loss": 1.6116, "step": 348000 }, { "epoch": 6.769081186193485, "grad_norm": 2.7331926822662354, "learning_rate": 1.6185109489051094e-05, "loss": 1.4231, "step": 348100 }, { "epoch": 6.771025765678172, "grad_norm": 8.203137397766113, "learning_rate": 1.617537712895377e-05, "loss": 1.4525, "step": 348200 }, { "epoch": 6.772970345162858, "grad_norm": 20.03078842163086, "learning_rate": 1.6165644768856448e-05, "loss": 1.6403, "step": 348300 }, { "epoch": 6.774914924647545, "grad_norm": 4.198240280151367, "learning_rate": 1.6155912408759125e-05, "loss": 1.4842, "step": 348400 }, { "epoch": 6.776859504132231, "grad_norm": 4.239771842956543, "learning_rate": 1.6146180048661802e-05, "loss": 1.4975, "step": 348500 }, { "epoch": 6.778804083616918, "grad_norm": 4.72540807723999, "learning_rate": 1.6136447688564476e-05, "loss": 1.4638, "step": 348600 }, { "epoch": 6.780748663101605, "grad_norm": 3.382838487625122, "learning_rate": 1.6126715328467153e-05, "loss": 1.5703, "step": 348700 }, { "epoch": 6.782693242586291, "grad_norm": 3.3850011825561523, "learning_rate": 1.6117080291970804e-05, "loss": 1.4044, "step": 348800 }, { "epoch": 6.784637822070977, "grad_norm": 8.305487632751465, "learning_rate": 1.6107347931873478e-05, "loss": 1.485, "step": 348900 }, { "epoch": 6.7865824015556635, "grad_norm": 8.633224487304688, "learning_rate": 1.6097615571776155e-05, "loss": 1.4445, "step": 349000 }, { "epoch": 6.78852698104035, "grad_norm": 11.536341667175293, "learning_rate": 1.6087883211678835e-05, "loss": 1.5045, "step": 349100 }, { "epoch": 6.790471560525036, "grad_norm": 3.862670660018921, "learning_rate": 1.607815085158151e-05, "loss": 1.4885, "step": 349200 }, { "epoch": 6.792416140009723, "grad_norm": 3.1610517501831055, "learning_rate": 1.6068418491484186e-05, "loss": 1.5247, "step": 349300 }, { "epoch": 6.794360719494409, "grad_norm": 16.031583786010742, "learning_rate": 1.6058686131386863e-05, "loss": 1.5017, "step": 349400 }, { "epoch": 6.796305298979096, "grad_norm": 6.776891708374023, "learning_rate": 1.6048953771289536e-05, "loss": 1.4358, "step": 349500 }, { "epoch": 6.798249878463782, "grad_norm": 4.1183671951293945, "learning_rate": 1.6039221411192217e-05, "loss": 1.5545, "step": 349600 }, { "epoch": 6.800194457948469, "grad_norm": 9.317232131958008, "learning_rate": 1.6029489051094894e-05, "loss": 1.5665, "step": 349700 }, { "epoch": 6.802139037433155, "grad_norm": 13.820940017700195, "learning_rate": 1.6019756690997567e-05, "loss": 1.5295, "step": 349800 }, { "epoch": 6.804083616917842, "grad_norm": 3.1361641883850098, "learning_rate": 1.6010024330900244e-05, "loss": 1.4257, "step": 349900 }, { "epoch": 6.8060281964025275, "grad_norm": 3.379857301712036, "learning_rate": 1.6000291970802918e-05, "loss": 1.5207, "step": 350000 }, { "epoch": 6.8060281964025275, "eval_accuracy": 0.5582833333333334, "eval_f1": 0.5400702888684698, "eval_loss": 1.178115963935852, "eval_precision": 0.5667581246772343, "eval_recall": 0.5582833333333334, "eval_runtime": 11712.7461, "eval_samples_per_second": 15.368, "eval_steps_per_second": 0.48, "step": 350000 }, { "epoch": 6.807972775887214, "grad_norm": 4.648892402648926, "learning_rate": 1.5990559610705595e-05, "loss": 1.3634, "step": 350100 }, { "epoch": 6.809917355371901, "grad_norm": 5.25177001953125, "learning_rate": 1.5980827250608275e-05, "loss": 1.3766, "step": 350200 }, { "epoch": 6.811861934856587, "grad_norm": 4.799590110778809, "learning_rate": 1.597109489051095e-05, "loss": 1.4822, "step": 350300 }, { "epoch": 6.813806514341274, "grad_norm": 6.5947723388671875, "learning_rate": 1.5961362530413626e-05, "loss": 1.4096, "step": 350400 }, { "epoch": 6.81575109382596, "grad_norm": 20.037906646728516, "learning_rate": 1.5951630170316303e-05, "loss": 1.4635, "step": 350500 }, { "epoch": 6.817695673310647, "grad_norm": 9.130476951599121, "learning_rate": 1.5941897810218977e-05, "loss": 1.6884, "step": 350600 }, { "epoch": 6.819640252795333, "grad_norm": 5.91204309463501, "learning_rate": 1.5932165450121657e-05, "loss": 1.5821, "step": 350700 }, { "epoch": 6.82158483228002, "grad_norm": 4.006198883056641, "learning_rate": 1.5922433090024334e-05, "loss": 1.5433, "step": 350800 }, { "epoch": 6.823529411764706, "grad_norm": 4.063616752624512, "learning_rate": 1.5912700729927008e-05, "loss": 1.741, "step": 350900 }, { "epoch": 6.8254739912493925, "grad_norm": 4.237301826477051, "learning_rate": 1.5902968369829685e-05, "loss": 1.5429, "step": 351000 }, { "epoch": 6.827418570734078, "grad_norm": 3.1281614303588867, "learning_rate": 1.589323600973236e-05, "loss": 1.6055, "step": 351100 }, { "epoch": 6.829363150218765, "grad_norm": 1.737205982208252, "learning_rate": 1.5883503649635035e-05, "loss": 1.4584, "step": 351200 }, { "epoch": 6.831307729703452, "grad_norm": 5.509426593780518, "learning_rate": 1.5873771289537716e-05, "loss": 1.4545, "step": 351300 }, { "epoch": 6.833252309188138, "grad_norm": 7.2582879066467285, "learning_rate": 1.586403892944039e-05, "loss": 1.3067, "step": 351400 }, { "epoch": 6.835196888672824, "grad_norm": 2.4553821086883545, "learning_rate": 1.5854306569343067e-05, "loss": 1.4794, "step": 351500 }, { "epoch": 6.837141468157511, "grad_norm": 3.715660810470581, "learning_rate": 1.5844671532846718e-05, "loss": 1.4567, "step": 351600 }, { "epoch": 6.839086047642198, "grad_norm": 6.250126361846924, "learning_rate": 1.583493917274939e-05, "loss": 1.414, "step": 351700 }, { "epoch": 6.841030627126884, "grad_norm": 7.588212490081787, "learning_rate": 1.5825206812652068e-05, "loss": 1.3683, "step": 351800 }, { "epoch": 6.8429752066115705, "grad_norm": 5.124974727630615, "learning_rate": 1.5815474452554745e-05, "loss": 1.5665, "step": 351900 }, { "epoch": 6.8449197860962565, "grad_norm": 4.8305559158325195, "learning_rate": 1.5805742092457422e-05, "loss": 1.3891, "step": 352000 }, { "epoch": 6.846864365580943, "grad_norm": 2.889914035797119, "learning_rate": 1.57960097323601e-05, "loss": 1.4494, "step": 352100 }, { "epoch": 6.848808945065629, "grad_norm": 5.933715343475342, "learning_rate": 1.5786277372262776e-05, "loss": 1.4873, "step": 352200 }, { "epoch": 6.850753524550316, "grad_norm": 6.94621467590332, "learning_rate": 1.577654501216545e-05, "loss": 1.3721, "step": 352300 }, { "epoch": 6.852698104035002, "grad_norm": 5.081608295440674, "learning_rate": 1.5766812652068127e-05, "loss": 1.3888, "step": 352400 }, { "epoch": 6.854642683519689, "grad_norm": 4.843266487121582, "learning_rate": 1.5757080291970804e-05, "loss": 1.3587, "step": 352500 }, { "epoch": 6.856587263004375, "grad_norm": 9.979722023010254, "learning_rate": 1.574734793187348e-05, "loss": 1.429, "step": 352600 }, { "epoch": 6.858531842489062, "grad_norm": 3.252609968185425, "learning_rate": 1.5737615571776158e-05, "loss": 1.4875, "step": 352700 }, { "epoch": 6.860476421973749, "grad_norm": 3.5106687545776367, "learning_rate": 1.572788321167883e-05, "loss": 1.5906, "step": 352800 }, { "epoch": 6.862421001458435, "grad_norm": 7.1923441886901855, "learning_rate": 1.571815085158151e-05, "loss": 1.528, "step": 352900 }, { "epoch": 6.864365580943121, "grad_norm": 2.9241271018981934, "learning_rate": 1.5708418491484186e-05, "loss": 1.4008, "step": 353000 }, { "epoch": 6.866310160427807, "grad_norm": 2.834791660308838, "learning_rate": 1.5698686131386863e-05, "loss": 1.6115, "step": 353100 }, { "epoch": 6.868254739912494, "grad_norm": 6.6368088722229, "learning_rate": 1.568895377128954e-05, "loss": 1.585, "step": 353200 }, { "epoch": 6.87019931939718, "grad_norm": 4.346223831176758, "learning_rate": 1.5679221411192217e-05, "loss": 1.4002, "step": 353300 }, { "epoch": 6.872143898881867, "grad_norm": 4.790260314941406, "learning_rate": 1.566948905109489e-05, "loss": 1.5126, "step": 353400 }, { "epoch": 6.874088478366553, "grad_norm": 4.492265701293945, "learning_rate": 1.5659756690997567e-05, "loss": 1.4076, "step": 353500 }, { "epoch": 6.87603305785124, "grad_norm": 3.4243907928466797, "learning_rate": 1.5650024330900244e-05, "loss": 1.392, "step": 353600 }, { "epoch": 6.877977637335926, "grad_norm": 3.2975258827209473, "learning_rate": 1.564029197080292e-05, "loss": 1.3924, "step": 353700 }, { "epoch": 6.879922216820613, "grad_norm": 5.607815742492676, "learning_rate": 1.56305596107056e-05, "loss": 1.5474, "step": 353800 }, { "epoch": 6.881866796305299, "grad_norm": 8.003015518188477, "learning_rate": 1.5620827250608272e-05, "loss": 1.4296, "step": 353900 }, { "epoch": 6.8838113757899855, "grad_norm": 6.574801445007324, "learning_rate": 1.561109489051095e-05, "loss": 1.355, "step": 354000 }, { "epoch": 6.885755955274671, "grad_norm": 5.306797027587891, "learning_rate": 1.5601362530413626e-05, "loss": 1.2645, "step": 354100 }, { "epoch": 6.887700534759358, "grad_norm": 3.0872364044189453, "learning_rate": 1.55916301703163e-05, "loss": 1.4532, "step": 354200 }, { "epoch": 6.889645114244045, "grad_norm": 7.314919471740723, "learning_rate": 1.558189781021898e-05, "loss": 1.5081, "step": 354300 }, { "epoch": 6.891589693728731, "grad_norm": 6.223499298095703, "learning_rate": 1.5572165450121657e-05, "loss": 1.3824, "step": 354400 }, { "epoch": 6.893534273213418, "grad_norm": 3.410200834274292, "learning_rate": 1.556243309002433e-05, "loss": 1.642, "step": 354500 }, { "epoch": 6.895478852698104, "grad_norm": 4.695956230163574, "learning_rate": 1.5552700729927008e-05, "loss": 1.6078, "step": 354600 }, { "epoch": 6.897423432182791, "grad_norm": 5.931009292602539, "learning_rate": 1.5542968369829685e-05, "loss": 1.5324, "step": 354700 }, { "epoch": 6.899368011667477, "grad_norm": 6.152247428894043, "learning_rate": 1.5533236009732362e-05, "loss": 1.3863, "step": 354800 }, { "epoch": 6.9013125911521636, "grad_norm": 2.1513333320617676, "learning_rate": 1.552350364963504e-05, "loss": 1.5216, "step": 354900 }, { "epoch": 6.9032571706368495, "grad_norm": 3.0146796703338623, "learning_rate": 1.5513771289537713e-05, "loss": 1.441, "step": 355000 }, { "epoch": 6.9032571706368495, "eval_accuracy": 0.5581, "eval_f1": 0.5495529506487198, "eval_loss": 1.1745611429214478, "eval_precision": 0.5666457862538422, "eval_recall": 0.5581, "eval_runtime": 11724.9578, "eval_samples_per_second": 15.352, "eval_steps_per_second": 0.48, "step": 355000 }, { "epoch": 6.905201750121536, "grad_norm": 4.306898593902588, "learning_rate": 1.5504136253041364e-05, "loss": 1.4467, "step": 355100 }, { "epoch": 6.907146329606222, "grad_norm": 5.671085834503174, "learning_rate": 1.549440389294404e-05, "loss": 1.5155, "step": 355200 }, { "epoch": 6.909090909090909, "grad_norm": 5.4951677322387695, "learning_rate": 1.5484671532846718e-05, "loss": 1.478, "step": 355300 }, { "epoch": 6.911035488575596, "grad_norm": 7.702792167663574, "learning_rate": 1.547493917274939e-05, "loss": 1.598, "step": 355400 }, { "epoch": 6.912980068060282, "grad_norm": 6.570266246795654, "learning_rate": 1.546520681265207e-05, "loss": 1.3829, "step": 355500 }, { "epoch": 6.914924647544968, "grad_norm": 3.1220309734344482, "learning_rate": 1.5455474452554745e-05, "loss": 1.6089, "step": 355600 }, { "epoch": 6.916869227029655, "grad_norm": 1.6664758920669556, "learning_rate": 1.5445742092457422e-05, "loss": 1.471, "step": 355700 }, { "epoch": 6.918813806514342, "grad_norm": 4.967290878295898, "learning_rate": 1.54360097323601e-05, "loss": 1.4691, "step": 355800 }, { "epoch": 6.920758385999028, "grad_norm": 3.120300769805908, "learning_rate": 1.5426277372262773e-05, "loss": 1.5146, "step": 355900 }, { "epoch": 6.922702965483714, "grad_norm": 3.945497512817383, "learning_rate": 1.541654501216545e-05, "loss": 1.4236, "step": 356000 }, { "epoch": 6.9246475449684, "grad_norm": 5.228361129760742, "learning_rate": 1.5406812652068127e-05, "loss": 1.5055, "step": 356100 }, { "epoch": 6.926592124453087, "grad_norm": 3.607844352722168, "learning_rate": 1.5397080291970804e-05, "loss": 1.5343, "step": 356200 }, { "epoch": 6.928536703937773, "grad_norm": 6.652727127075195, "learning_rate": 1.538734793187348e-05, "loss": 1.4866, "step": 356300 }, { "epoch": 6.93048128342246, "grad_norm": 7.437748432159424, "learning_rate": 1.5377615571776158e-05, "loss": 1.7197, "step": 356400 }, { "epoch": 6.932425862907146, "grad_norm": 5.157785415649414, "learning_rate": 1.5367883211678832e-05, "loss": 1.5476, "step": 356500 }, { "epoch": 6.934370442391833, "grad_norm": 3.649047613143921, "learning_rate": 1.535815085158151e-05, "loss": 1.3261, "step": 356600 }, { "epoch": 6.936315021876519, "grad_norm": 6.254286289215088, "learning_rate": 1.5348418491484186e-05, "loss": 1.5411, "step": 356700 }, { "epoch": 6.938259601361206, "grad_norm": 4.894169330596924, "learning_rate": 1.5338686131386863e-05, "loss": 1.4609, "step": 356800 }, { "epoch": 6.9402041808458925, "grad_norm": 5.257272720336914, "learning_rate": 1.532895377128954e-05, "loss": 1.385, "step": 356900 }, { "epoch": 6.9421487603305785, "grad_norm": 1.8414762020111084, "learning_rate": 1.5319221411192213e-05, "loss": 1.4913, "step": 357000 }, { "epoch": 6.944093339815265, "grad_norm": 2.8521575927734375, "learning_rate": 1.530948905109489e-05, "loss": 1.6553, "step": 357100 }, { "epoch": 6.946037919299951, "grad_norm": 6.965274333953857, "learning_rate": 1.5299756690997567e-05, "loss": 1.5769, "step": 357200 }, { "epoch": 6.947982498784638, "grad_norm": 3.182405471801758, "learning_rate": 1.5290024330900245e-05, "loss": 1.386, "step": 357300 }, { "epoch": 6.949927078269324, "grad_norm": 3.2514429092407227, "learning_rate": 1.528029197080292e-05, "loss": 1.4507, "step": 357400 }, { "epoch": 6.951871657754011, "grad_norm": 2.807191848754883, "learning_rate": 1.52705596107056e-05, "loss": 1.8131, "step": 357500 }, { "epoch": 6.953816237238697, "grad_norm": 3.6275417804718018, "learning_rate": 1.5260827250608272e-05, "loss": 1.4467, "step": 357600 }, { "epoch": 6.955760816723384, "grad_norm": 4.113154888153076, "learning_rate": 1.5251094890510951e-05, "loss": 1.6078, "step": 357700 }, { "epoch": 6.95770539620807, "grad_norm": 6.1789350509643555, "learning_rate": 1.5241362530413625e-05, "loss": 1.3258, "step": 357800 }, { "epoch": 6.959649975692757, "grad_norm": 5.752359867095947, "learning_rate": 1.5231630170316302e-05, "loss": 1.4563, "step": 357900 }, { "epoch": 6.9615945551774425, "grad_norm": 4.413275718688965, "learning_rate": 1.522189781021898e-05, "loss": 1.5326, "step": 358000 }, { "epoch": 6.963539134662129, "grad_norm": 4.142030715942383, "learning_rate": 1.5212165450121654e-05, "loss": 1.4859, "step": 358100 }, { "epoch": 6.965483714146815, "grad_norm": 5.000210285186768, "learning_rate": 1.5202433090024331e-05, "loss": 1.4162, "step": 358200 }, { "epoch": 6.967428293631502, "grad_norm": 4.467865467071533, "learning_rate": 1.519270072992701e-05, "loss": 1.5003, "step": 358300 }, { "epoch": 6.969372873116189, "grad_norm": 3.13112473487854, "learning_rate": 1.5182968369829683e-05, "loss": 1.372, "step": 358400 }, { "epoch": 6.971317452600875, "grad_norm": 2.831646680831909, "learning_rate": 1.5173236009732362e-05, "loss": 1.5251, "step": 358500 }, { "epoch": 6.973262032085562, "grad_norm": 6.817995548248291, "learning_rate": 1.5163503649635039e-05, "loss": 1.4744, "step": 358600 }, { "epoch": 6.975206611570248, "grad_norm": 4.419078350067139, "learning_rate": 1.5153771289537713e-05, "loss": 1.4128, "step": 358700 }, { "epoch": 6.977151191054935, "grad_norm": 2.528421401977539, "learning_rate": 1.5144038929440391e-05, "loss": 1.442, "step": 358800 }, { "epoch": 6.979095770539621, "grad_norm": 5.647482395172119, "learning_rate": 1.5134306569343065e-05, "loss": 1.3474, "step": 358900 }, { "epoch": 6.9810403500243075, "grad_norm": 1.9833718538284302, "learning_rate": 1.5124671532846716e-05, "loss": 1.4922, "step": 359000 }, { "epoch": 6.982984929508993, "grad_norm": 3.410750389099121, "learning_rate": 1.5114939172749393e-05, "loss": 1.4837, "step": 359100 }, { "epoch": 6.98492950899368, "grad_norm": 4.250805854797363, "learning_rate": 1.510520681265207e-05, "loss": 1.4523, "step": 359200 }, { "epoch": 6.986874088478366, "grad_norm": 4.7928547859191895, "learning_rate": 1.5095474452554745e-05, "loss": 1.6183, "step": 359300 }, { "epoch": 6.988818667963053, "grad_norm": 7.6458587646484375, "learning_rate": 1.5085742092457422e-05, "loss": 1.4251, "step": 359400 }, { "epoch": 6.99076324744774, "grad_norm": 7.640934944152832, "learning_rate": 1.5076009732360096e-05, "loss": 1.5807, "step": 359500 }, { "epoch": 6.992707826932426, "grad_norm": 5.268707275390625, "learning_rate": 1.5066277372262775e-05, "loss": 1.4778, "step": 359600 }, { "epoch": 6.994652406417112, "grad_norm": 7.776730060577393, "learning_rate": 1.5056545012165452e-05, "loss": 1.4797, "step": 359700 }, { "epoch": 6.996596985901799, "grad_norm": 3.0145046710968018, "learning_rate": 1.5046812652068127e-05, "loss": 1.4029, "step": 359800 }, { "epoch": 6.9985415653864855, "grad_norm": 5.3838934898376465, "learning_rate": 1.5037080291970804e-05, "loss": 1.4151, "step": 359900 }, { "epoch": 7.0004861448711715, "grad_norm": 2.9597880840301514, "learning_rate": 1.5027347931873481e-05, "loss": 1.33, "step": 360000 }, { "epoch": 7.0004861448711715, "eval_accuracy": 0.5677111111111112, "eval_f1": 0.5574394759360473, "eval_loss": 1.1605494022369385, "eval_precision": 0.5721470644766599, "eval_recall": 0.5677111111111112, "eval_runtime": 11728.6864, "eval_samples_per_second": 15.347, "eval_steps_per_second": 0.48, "step": 360000 }, { "epoch": 7.002430724355858, "grad_norm": 6.0150909423828125, "learning_rate": 1.5017615571776156e-05, "loss": 1.4855, "step": 360100 }, { "epoch": 7.004375303840544, "grad_norm": 2.179396152496338, "learning_rate": 1.5007883211678833e-05, "loss": 1.3495, "step": 360200 }, { "epoch": 7.006319883325231, "grad_norm": 11.258903503417969, "learning_rate": 1.499815085158151e-05, "loss": 1.4803, "step": 360300 }, { "epoch": 7.008264462809917, "grad_norm": 5.342238903045654, "learning_rate": 1.4988418491484186e-05, "loss": 1.4875, "step": 360400 }, { "epoch": 7.010209042294604, "grad_norm": 4.187309265136719, "learning_rate": 1.4978686131386863e-05, "loss": 1.6618, "step": 360500 }, { "epoch": 7.01215362177929, "grad_norm": 11.256775856018066, "learning_rate": 1.4968953771289536e-05, "loss": 1.5135, "step": 360600 }, { "epoch": 7.014098201263977, "grad_norm": 7.277352333068848, "learning_rate": 1.4959221411192215e-05, "loss": 1.5116, "step": 360700 }, { "epoch": 7.016042780748663, "grad_norm": 3.5735840797424316, "learning_rate": 1.4949489051094892e-05, "loss": 1.3504, "step": 360800 }, { "epoch": 7.01798736023335, "grad_norm": 4.784153461456299, "learning_rate": 1.4939756690997568e-05, "loss": 1.4953, "step": 360900 }, { "epoch": 7.019931939718036, "grad_norm": 3.2255051136016846, "learning_rate": 1.4930024330900245e-05, "loss": 1.4413, "step": 361000 }, { "epoch": 7.021876519202722, "grad_norm": 13.076014518737793, "learning_rate": 1.4920291970802922e-05, "loss": 1.5699, "step": 361100 }, { "epoch": 7.023821098687409, "grad_norm": 4.969531059265137, "learning_rate": 1.4910559610705597e-05, "loss": 1.4186, "step": 361200 }, { "epoch": 7.025765678172095, "grad_norm": 3.0026803016662598, "learning_rate": 1.4900827250608274e-05, "loss": 1.4725, "step": 361300 }, { "epoch": 7.027710257656782, "grad_norm": 9.625602722167969, "learning_rate": 1.4891094890510951e-05, "loss": 1.3951, "step": 361400 }, { "epoch": 7.029654837141468, "grad_norm": 4.03495979309082, "learning_rate": 1.4881362530413626e-05, "loss": 1.5534, "step": 361500 }, { "epoch": 7.031599416626155, "grad_norm": 5.057310104370117, "learning_rate": 1.4871630170316303e-05, "loss": 1.4764, "step": 361600 }, { "epoch": 7.033543996110841, "grad_norm": 7.873776435852051, "learning_rate": 1.4861897810218977e-05, "loss": 1.3473, "step": 361700 }, { "epoch": 7.035488575595528, "grad_norm": 3.5972483158111572, "learning_rate": 1.4852165450121656e-05, "loss": 1.665, "step": 361800 }, { "epoch": 7.037433155080214, "grad_norm": 3.2932708263397217, "learning_rate": 1.4842433090024333e-05, "loss": 1.3186, "step": 361900 }, { "epoch": 7.0393777345649005, "grad_norm": 5.177793502807617, "learning_rate": 1.4832700729927006e-05, "loss": 1.4202, "step": 362000 }, { "epoch": 7.041322314049586, "grad_norm": 11.329756736755371, "learning_rate": 1.4822968369829685e-05, "loss": 1.3661, "step": 362100 }, { "epoch": 7.043266893534273, "grad_norm": 6.620131015777588, "learning_rate": 1.4813236009732362e-05, "loss": 1.509, "step": 362200 }, { "epoch": 7.045211473018959, "grad_norm": 6.771982192993164, "learning_rate": 1.4803503649635037e-05, "loss": 1.5095, "step": 362300 }, { "epoch": 7.047156052503646, "grad_norm": 5.087162494659424, "learning_rate": 1.4793771289537714e-05, "loss": 1.53, "step": 362400 }, { "epoch": 7.049100631988333, "grad_norm": 4.442887783050537, "learning_rate": 1.4784038929440391e-05, "loss": 1.3857, "step": 362500 }, { "epoch": 7.051045211473019, "grad_norm": 3.1056809425354004, "learning_rate": 1.4774306569343067e-05, "loss": 1.3163, "step": 362600 }, { "epoch": 7.052989790957706, "grad_norm": 6.6623077392578125, "learning_rate": 1.4764574209245744e-05, "loss": 1.4818, "step": 362700 }, { "epoch": 7.054934370442392, "grad_norm": 3.085977792739868, "learning_rate": 1.4754841849148417e-05, "loss": 1.4801, "step": 362800 }, { "epoch": 7.0568789499270785, "grad_norm": 3.836225748062134, "learning_rate": 1.4745109489051096e-05, "loss": 1.461, "step": 362900 }, { "epoch": 7.0588235294117645, "grad_norm": 3.977445602416992, "learning_rate": 1.4735474452554745e-05, "loss": 1.5725, "step": 363000 }, { "epoch": 7.060768108896451, "grad_norm": 1.9992314577102661, "learning_rate": 1.4725742092457422e-05, "loss": 1.3836, "step": 363100 }, { "epoch": 7.062712688381137, "grad_norm": 5.206114768981934, "learning_rate": 1.4716009732360098e-05, "loss": 1.4856, "step": 363200 }, { "epoch": 7.064657267865824, "grad_norm": 6.948749542236328, "learning_rate": 1.4706277372262775e-05, "loss": 1.4678, "step": 363300 }, { "epoch": 7.06660184735051, "grad_norm": 8.916266441345215, "learning_rate": 1.469654501216545e-05, "loss": 1.5914, "step": 363400 }, { "epoch": 7.068546426835197, "grad_norm": 5.216919898986816, "learning_rate": 1.4686812652068127e-05, "loss": 1.5504, "step": 363500 }, { "epoch": 7.070491006319883, "grad_norm": 6.092511177062988, "learning_rate": 1.4677080291970804e-05, "loss": 1.4256, "step": 363600 }, { "epoch": 7.07243558580457, "grad_norm": 3.1119332313537598, "learning_rate": 1.466734793187348e-05, "loss": 1.3745, "step": 363700 }, { "epoch": 7.074380165289257, "grad_norm": 7.141623497009277, "learning_rate": 1.4657615571776157e-05, "loss": 1.7229, "step": 363800 }, { "epoch": 7.076324744773943, "grad_norm": 1.8946709632873535, "learning_rate": 1.4647883211678834e-05, "loss": 1.3924, "step": 363900 }, { "epoch": 7.078269324258629, "grad_norm": 3.9444971084594727, "learning_rate": 1.4638150851581509e-05, "loss": 1.4752, "step": 364000 }, { "epoch": 7.080213903743315, "grad_norm": 4.854221820831299, "learning_rate": 1.4628418491484186e-05, "loss": 1.4671, "step": 364100 }, { "epoch": 7.082158483228002, "grad_norm": 12.137602806091309, "learning_rate": 1.4618686131386863e-05, "loss": 1.3925, "step": 364200 }, { "epoch": 7.084103062712688, "grad_norm": 7.07072639465332, "learning_rate": 1.4608953771289538e-05, "loss": 1.3781, "step": 364300 }, { "epoch": 7.086047642197375, "grad_norm": 4.82818603515625, "learning_rate": 1.4599221411192215e-05, "loss": 1.5932, "step": 364400 }, { "epoch": 7.087992221682061, "grad_norm": 12.414026260375977, "learning_rate": 1.4589586374695866e-05, "loss": 1.6992, "step": 364500 }, { "epoch": 7.089936801166748, "grad_norm": 4.968905925750732, "learning_rate": 1.457985401459854e-05, "loss": 1.3378, "step": 364600 }, { "epoch": 7.091881380651434, "grad_norm": 11.474954605102539, "learning_rate": 1.4570121654501217e-05, "loss": 1.5388, "step": 364700 }, { "epoch": 7.093825960136121, "grad_norm": 3.1664469242095947, "learning_rate": 1.4560389294403896e-05, "loss": 1.7374, "step": 364800 }, { "epoch": 7.095770539620807, "grad_norm": 3.643529176712036, "learning_rate": 1.455065693430657e-05, "loss": 1.5629, "step": 364900 }, { "epoch": 7.0977151191054935, "grad_norm": 5.325129985809326, "learning_rate": 1.4540924574209246e-05, "loss": 1.5886, "step": 365000 }, { "epoch": 7.0977151191054935, "eval_accuracy": 0.5656555555555556, "eval_f1": 0.5522733216818478, "eval_loss": 1.1648540496826172, "eval_precision": 0.5710729239182696, "eval_recall": 0.5656555555555556, "eval_runtime": 11735.2313, "eval_samples_per_second": 15.338, "eval_steps_per_second": 0.479, "step": 365000 }, { "epoch": 7.09965969859018, "grad_norm": 9.768268585205078, "learning_rate": 1.4531192214111922e-05, "loss": 1.4102, "step": 365100 }, { "epoch": 7.101604278074866, "grad_norm": 6.8505377769470215, "learning_rate": 1.4521459854014599e-05, "loss": 1.4808, "step": 365200 }, { "epoch": 7.103548857559553, "grad_norm": 2.7207529544830322, "learning_rate": 1.4511727493917276e-05, "loss": 1.5371, "step": 365300 }, { "epoch": 7.105493437044239, "grad_norm": 3.0455095767974854, "learning_rate": 1.4501995133819951e-05, "loss": 1.462, "step": 365400 }, { "epoch": 7.107438016528926, "grad_norm": 4.975159168243408, "learning_rate": 1.4492262773722628e-05, "loss": 1.4428, "step": 365500 }, { "epoch": 7.109382596013612, "grad_norm": 7.014932155609131, "learning_rate": 1.4482530413625307e-05, "loss": 1.4192, "step": 365600 }, { "epoch": 7.111327175498299, "grad_norm": 6.792420864105225, "learning_rate": 1.447279805352798e-05, "loss": 1.4867, "step": 365700 }, { "epoch": 7.113271754982985, "grad_norm": 4.142391681671143, "learning_rate": 1.4463065693430657e-05, "loss": 1.4518, "step": 365800 }, { "epoch": 7.1152163344676715, "grad_norm": 7.1769256591796875, "learning_rate": 1.4453333333333336e-05, "loss": 1.5003, "step": 365900 }, { "epoch": 7.1171609139523575, "grad_norm": 4.3597412109375, "learning_rate": 1.444360097323601e-05, "loss": 1.5593, "step": 366000 }, { "epoch": 7.119105493437044, "grad_norm": 4.053878307342529, "learning_rate": 1.4433868613138687e-05, "loss": 1.4692, "step": 366100 }, { "epoch": 7.12105007292173, "grad_norm": 5.724125385284424, "learning_rate": 1.4424136253041362e-05, "loss": 1.4397, "step": 366200 }, { "epoch": 7.122994652406417, "grad_norm": 6.512597560882568, "learning_rate": 1.4414403892944039e-05, "loss": 1.367, "step": 366300 }, { "epoch": 7.124939231891103, "grad_norm": 5.76318359375, "learning_rate": 1.4404671532846716e-05, "loss": 1.4702, "step": 366400 }, { "epoch": 7.12688381137579, "grad_norm": 5.399477958679199, "learning_rate": 1.4394939172749391e-05, "loss": 1.4109, "step": 366500 }, { "epoch": 7.128828390860477, "grad_norm": 6.772884845733643, "learning_rate": 1.4385206812652068e-05, "loss": 1.3882, "step": 366600 }, { "epoch": 7.130772970345163, "grad_norm": 2.0409352779388428, "learning_rate": 1.4375474452554746e-05, "loss": 1.6462, "step": 366700 }, { "epoch": 7.13271754982985, "grad_norm": 8.782021522521973, "learning_rate": 1.436574209245742e-05, "loss": 1.5115, "step": 366800 }, { "epoch": 7.134662129314536, "grad_norm": 6.470507621765137, "learning_rate": 1.4356009732360098e-05, "loss": 1.4069, "step": 366900 }, { "epoch": 7.136606708799222, "grad_norm": 7.07904577255249, "learning_rate": 1.4346277372262777e-05, "loss": 1.447, "step": 367000 }, { "epoch": 7.138551288283908, "grad_norm": 8.573386192321777, "learning_rate": 1.433654501216545e-05, "loss": 1.6015, "step": 367100 }, { "epoch": 7.140495867768595, "grad_norm": 5.569827079772949, "learning_rate": 1.4326812652068127e-05, "loss": 1.2652, "step": 367200 }, { "epoch": 7.142440447253281, "grad_norm": 6.280757904052734, "learning_rate": 1.4317080291970803e-05, "loss": 1.4156, "step": 367300 }, { "epoch": 7.144385026737968, "grad_norm": 3.6169281005859375, "learning_rate": 1.430734793187348e-05, "loss": 1.3484, "step": 367400 }, { "epoch": 7.146329606222654, "grad_norm": 4.552729606628418, "learning_rate": 1.4297615571776157e-05, "loss": 1.3913, "step": 367500 }, { "epoch": 7.148274185707341, "grad_norm": 3.536318063735962, "learning_rate": 1.4287883211678832e-05, "loss": 1.4272, "step": 367600 }, { "epoch": 7.150218765192027, "grad_norm": 3.6698389053344727, "learning_rate": 1.4278150851581509e-05, "loss": 1.3824, "step": 367700 }, { "epoch": 7.152163344676714, "grad_norm": 3.739919424057007, "learning_rate": 1.4268418491484186e-05, "loss": 1.3182, "step": 367800 }, { "epoch": 7.1541079241614005, "grad_norm": 10.627506256103516, "learning_rate": 1.4258686131386861e-05, "loss": 1.4477, "step": 367900 }, { "epoch": 7.1560525036460865, "grad_norm": 6.256158828735352, "learning_rate": 1.4248953771289538e-05, "loss": 1.4245, "step": 368000 }, { "epoch": 7.157997083130773, "grad_norm": 3.1087613105773926, "learning_rate": 1.4239221411192214e-05, "loss": 1.4827, "step": 368100 }, { "epoch": 7.159941662615459, "grad_norm": 5.214667797088623, "learning_rate": 1.422948905109489e-05, "loss": 1.5604, "step": 368200 }, { "epoch": 7.161886242100146, "grad_norm": 4.501950740814209, "learning_rate": 1.4219756690997568e-05, "loss": 1.5015, "step": 368300 }, { "epoch": 7.163830821584832, "grad_norm": 10.978050231933594, "learning_rate": 1.4210024330900243e-05, "loss": 1.4417, "step": 368400 }, { "epoch": 7.165775401069519, "grad_norm": 4.747421741485596, "learning_rate": 1.4200389294403892e-05, "loss": 1.5249, "step": 368500 }, { "epoch": 7.167719980554205, "grad_norm": 16.96912956237793, "learning_rate": 1.4190656934306571e-05, "loss": 1.4161, "step": 368600 }, { "epoch": 7.169664560038892, "grad_norm": 3.7778005599975586, "learning_rate": 1.4180924574209248e-05, "loss": 1.5445, "step": 368700 }, { "epoch": 7.171609139523578, "grad_norm": 5.817175388336182, "learning_rate": 1.4171192214111922e-05, "loss": 1.5116, "step": 368800 }, { "epoch": 7.1735537190082646, "grad_norm": 5.211709976196289, "learning_rate": 1.41614598540146e-05, "loss": 1.4287, "step": 368900 }, { "epoch": 7.1754982984929505, "grad_norm": 2.911386251449585, "learning_rate": 1.4151727493917274e-05, "loss": 1.5572, "step": 369000 }, { "epoch": 7.177442877977637, "grad_norm": 3.7709834575653076, "learning_rate": 1.4141995133819951e-05, "loss": 1.3185, "step": 369100 }, { "epoch": 7.179387457462324, "grad_norm": 2.2407033443450928, "learning_rate": 1.413226277372263e-05, "loss": 1.5666, "step": 369200 }, { "epoch": 7.18133203694701, "grad_norm": 4.2583208084106445, "learning_rate": 1.4122530413625303e-05, "loss": 1.459, "step": 369300 }, { "epoch": 7.183276616431697, "grad_norm": 3.4192683696746826, "learning_rate": 1.4112798053527982e-05, "loss": 1.6979, "step": 369400 }, { "epoch": 7.185221195916383, "grad_norm": 4.952481269836426, "learning_rate": 1.410306569343066e-05, "loss": 1.4515, "step": 369500 }, { "epoch": 7.18716577540107, "grad_norm": 5.862797260284424, "learning_rate": 1.4093333333333333e-05, "loss": 1.4503, "step": 369600 }, { "epoch": 7.189110354885756, "grad_norm": 4.456788539886475, "learning_rate": 1.4083600973236012e-05, "loss": 1.4152, "step": 369700 }, { "epoch": 7.191054934370443, "grad_norm": 2.448882579803467, "learning_rate": 1.4073868613138685e-05, "loss": 1.7683, "step": 369800 }, { "epoch": 7.192999513855129, "grad_norm": 9.631555557250977, "learning_rate": 1.4064136253041362e-05, "loss": 1.4993, "step": 369900 }, { "epoch": 7.194944093339815, "grad_norm": 3.566361427307129, "learning_rate": 1.4054403892944041e-05, "loss": 1.5005, "step": 370000 }, { "epoch": 7.194944093339815, "eval_accuracy": 0.5523388888888889, "eval_f1": 0.5384410401259389, "eval_loss": 1.1872459650039673, "eval_precision": 0.5644183003297011, "eval_recall": 0.5523388888888889, "eval_runtime": 11713.8233, "eval_samples_per_second": 15.366, "eval_steps_per_second": 0.48, "step": 370000 }, { "epoch": 7.196888672824501, "grad_norm": 6.948295593261719, "learning_rate": 1.4044671532846715e-05, "loss": 1.4867, "step": 370100 }, { "epoch": 7.198833252309188, "grad_norm": 3.9727628231048584, "learning_rate": 1.4034939172749392e-05, "loss": 1.3881, "step": 370200 }, { "epoch": 7.200777831793874, "grad_norm": 4.015213489532471, "learning_rate": 1.402520681265207e-05, "loss": 1.4132, "step": 370300 }, { "epoch": 7.202722411278561, "grad_norm": 6.6295647621154785, "learning_rate": 1.4015474452554744e-05, "loss": 1.399, "step": 370400 }, { "epoch": 7.204666990763247, "grad_norm": 14.827139854431152, "learning_rate": 1.4005742092457423e-05, "loss": 1.4469, "step": 370500 }, { "epoch": 7.206611570247934, "grad_norm": 4.597781181335449, "learning_rate": 1.39960097323601e-05, "loss": 1.6769, "step": 370600 }, { "epoch": 7.208556149732621, "grad_norm": 3.9348855018615723, "learning_rate": 1.3986277372262773e-05, "loss": 1.4993, "step": 370700 }, { "epoch": 7.210500729217307, "grad_norm": 5.664880752563477, "learning_rate": 1.3976545012165452e-05, "loss": 1.5471, "step": 370800 }, { "epoch": 7.2124453087019935, "grad_norm": 4.544150352478027, "learning_rate": 1.3966812652068126e-05, "loss": 1.3459, "step": 370900 }, { "epoch": 7.2143898881866795, "grad_norm": 5.878597259521484, "learning_rate": 1.3957080291970803e-05, "loss": 1.4689, "step": 371000 }, { "epoch": 7.216334467671366, "grad_norm": 3.688917875289917, "learning_rate": 1.3947347931873481e-05, "loss": 1.3186, "step": 371100 }, { "epoch": 7.218279047156052, "grad_norm": 3.146144390106201, "learning_rate": 1.3937615571776155e-05, "loss": 1.5185, "step": 371200 }, { "epoch": 7.220223626640739, "grad_norm": 3.1285433769226074, "learning_rate": 1.3927883211678832e-05, "loss": 1.5946, "step": 371300 }, { "epoch": 7.222168206125425, "grad_norm": 5.018423080444336, "learning_rate": 1.391815085158151e-05, "loss": 1.472, "step": 371400 }, { "epoch": 7.224112785610112, "grad_norm": 5.560024738311768, "learning_rate": 1.3908418491484184e-05, "loss": 1.4246, "step": 371500 }, { "epoch": 7.226057365094798, "grad_norm": 3.7563185691833496, "learning_rate": 1.3898686131386861e-05, "loss": 1.4646, "step": 371600 }, { "epoch": 7.228001944579485, "grad_norm": 8.299745559692383, "learning_rate": 1.388895377128954e-05, "loss": 1.3973, "step": 371700 }, { "epoch": 7.229946524064171, "grad_norm": 4.649594783782959, "learning_rate": 1.3879221411192214e-05, "loss": 1.4447, "step": 371800 }, { "epoch": 7.231891103548858, "grad_norm": 2.035207986831665, "learning_rate": 1.3869489051094892e-05, "loss": 1.4109, "step": 371900 }, { "epoch": 7.233835683033544, "grad_norm": 11.148365020751953, "learning_rate": 1.3859756690997566e-05, "loss": 1.7464, "step": 372000 }, { "epoch": 7.23578026251823, "grad_norm": 5.527385711669922, "learning_rate": 1.3850024330900243e-05, "loss": 1.5505, "step": 372100 }, { "epoch": 7.237724842002917, "grad_norm": 4.612794399261475, "learning_rate": 1.3840291970802922e-05, "loss": 1.5497, "step": 372200 }, { "epoch": 7.239669421487603, "grad_norm": 2.8502044677734375, "learning_rate": 1.3830559610705595e-05, "loss": 1.4829, "step": 372300 }, { "epoch": 7.24161400097229, "grad_norm": 4.048866271972656, "learning_rate": 1.3820827250608272e-05, "loss": 1.4187, "step": 372400 }, { "epoch": 7.243558580456976, "grad_norm": 4.0924882888793945, "learning_rate": 1.3811192214111923e-05, "loss": 1.4916, "step": 372500 }, { "epoch": 7.245503159941663, "grad_norm": 5.971703052520752, "learning_rate": 1.3801459854014597e-05, "loss": 1.3007, "step": 372600 }, { "epoch": 7.247447739426349, "grad_norm": 5.726802825927734, "learning_rate": 1.3791727493917276e-05, "loss": 1.348, "step": 372700 }, { "epoch": 7.249392318911036, "grad_norm": 4.831277847290039, "learning_rate": 1.3781995133819953e-05, "loss": 1.5619, "step": 372800 }, { "epoch": 7.251336898395722, "grad_norm": 2.2588117122650146, "learning_rate": 1.3772262773722626e-05, "loss": 1.5239, "step": 372900 }, { "epoch": 7.2532814778804084, "grad_norm": 3.9092018604278564, "learning_rate": 1.3762530413625305e-05, "loss": 1.4671, "step": 373000 }, { "epoch": 7.255226057365094, "grad_norm": 5.645890712738037, "learning_rate": 1.3752798053527982e-05, "loss": 1.402, "step": 373100 }, { "epoch": 7.257170636849781, "grad_norm": 4.089469909667969, "learning_rate": 1.374316301703163e-05, "loss": 1.6089, "step": 373200 }, { "epoch": 7.259115216334468, "grad_norm": 5.211938381195068, "learning_rate": 1.3733430656934307e-05, "loss": 1.4027, "step": 373300 }, { "epoch": 7.261059795819154, "grad_norm": 2.958750009536743, "learning_rate": 1.3723698296836984e-05, "loss": 1.4823, "step": 373400 }, { "epoch": 7.263004375303841, "grad_norm": 4.774558067321777, "learning_rate": 1.371396593673966e-05, "loss": 1.4343, "step": 373500 }, { "epoch": 7.264948954788527, "grad_norm": 4.151782989501953, "learning_rate": 1.3704233576642336e-05, "loss": 1.4653, "step": 373600 }, { "epoch": 7.266893534273214, "grad_norm": 2.1986477375030518, "learning_rate": 1.3694501216545013e-05, "loss": 1.6782, "step": 373700 }, { "epoch": 7.2688381137579, "grad_norm": 4.387908458709717, "learning_rate": 1.3684768856447689e-05, "loss": 1.5303, "step": 373800 }, { "epoch": 7.2707826932425865, "grad_norm": 2.0328783988952637, "learning_rate": 1.3675036496350366e-05, "loss": 1.5946, "step": 373900 }, { "epoch": 7.2727272727272725, "grad_norm": 4.61506986618042, "learning_rate": 1.3665304136253043e-05, "loss": 1.4262, "step": 374000 }, { "epoch": 7.274671852211959, "grad_norm": 1.168467402458191, "learning_rate": 1.3655571776155718e-05, "loss": 1.5012, "step": 374100 }, { "epoch": 7.276616431696645, "grad_norm": 5.401371002197266, "learning_rate": 1.3645839416058395e-05, "loss": 1.6092, "step": 374200 }, { "epoch": 7.278561011181332, "grad_norm": 3.0987391471862793, "learning_rate": 1.363610705596107e-05, "loss": 1.5876, "step": 374300 }, { "epoch": 7.280505590666018, "grad_norm": 5.411991596221924, "learning_rate": 1.3626374695863747e-05, "loss": 1.3441, "step": 374400 }, { "epoch": 7.282450170150705, "grad_norm": 8.261824607849121, "learning_rate": 1.3616642335766424e-05, "loss": 1.5555, "step": 374500 }, { "epoch": 7.284394749635391, "grad_norm": 11.723038673400879, "learning_rate": 1.36069099756691e-05, "loss": 1.6259, "step": 374600 }, { "epoch": 7.286339329120078, "grad_norm": 6.093369483947754, "learning_rate": 1.3597177615571777e-05, "loss": 1.5404, "step": 374700 }, { "epoch": 7.288283908604765, "grad_norm": 4.110349655151367, "learning_rate": 1.3587445255474454e-05, "loss": 1.4829, "step": 374800 }, { "epoch": 7.290228488089451, "grad_norm": 4.032006740570068, "learning_rate": 1.3577712895377129e-05, "loss": 1.7526, "step": 374900 }, { "epoch": 7.292173067574137, "grad_norm": 3.3806936740875244, "learning_rate": 1.3567980535279806e-05, "loss": 1.4685, "step": 375000 }, { "epoch": 7.292173067574137, "eval_accuracy": 0.5607, "eval_f1": 0.5451027924238648, "eval_loss": 1.1734563112258911, "eval_precision": 0.5671396216634407, "eval_recall": 0.5607000000000001, "eval_runtime": 11713.3656, "eval_samples_per_second": 15.367, "eval_steps_per_second": 0.48, "step": 375000 }, { "epoch": 7.294117647058823, "grad_norm": 3.8498358726501465, "learning_rate": 1.3558248175182483e-05, "loss": 1.7084, "step": 375100 }, { "epoch": 7.29606222654351, "grad_norm": 5.864315986633301, "learning_rate": 1.3548515815085158e-05, "loss": 1.434, "step": 375200 }, { "epoch": 7.298006806028196, "grad_norm": 5.6382341384887695, "learning_rate": 1.3538783454987835e-05, "loss": 1.5921, "step": 375300 }, { "epoch": 7.299951385512883, "grad_norm": 4.3411149978637695, "learning_rate": 1.352905109489051e-05, "loss": 1.5378, "step": 375400 }, { "epoch": 7.301895964997569, "grad_norm": 3.838376998901367, "learning_rate": 1.3519318734793188e-05, "loss": 1.5691, "step": 375500 }, { "epoch": 7.303840544482256, "grad_norm": 4.528079986572266, "learning_rate": 1.3509586374695865e-05, "loss": 1.4378, "step": 375600 }, { "epoch": 7.305785123966942, "grad_norm": 8.11723804473877, "learning_rate": 1.349985401459854e-05, "loss": 1.4835, "step": 375700 }, { "epoch": 7.307729703451629, "grad_norm": 2.5512802600860596, "learning_rate": 1.3490121654501217e-05, "loss": 1.4916, "step": 375800 }, { "epoch": 7.3096742829363155, "grad_norm": 5.932117938995361, "learning_rate": 1.3480389294403894e-05, "loss": 1.3553, "step": 375900 }, { "epoch": 7.3116188624210015, "grad_norm": 5.672201633453369, "learning_rate": 1.3470754257907542e-05, "loss": 1.4142, "step": 376000 }, { "epoch": 7.313563441905688, "grad_norm": 2.8071255683898926, "learning_rate": 1.3461021897810219e-05, "loss": 1.3927, "step": 376100 }, { "epoch": 7.315508021390374, "grad_norm": 4.406538963317871, "learning_rate": 1.3451289537712896e-05, "loss": 1.3837, "step": 376200 }, { "epoch": 7.317452600875061, "grad_norm": 7.233654975891113, "learning_rate": 1.3441557177615571e-05, "loss": 1.5219, "step": 376300 }, { "epoch": 7.319397180359747, "grad_norm": 3.9293465614318848, "learning_rate": 1.3431824817518248e-05, "loss": 1.4024, "step": 376400 }, { "epoch": 7.321341759844434, "grad_norm": 2.8937549591064453, "learning_rate": 1.3422092457420927e-05, "loss": 1.4096, "step": 376500 }, { "epoch": 7.32328633932912, "grad_norm": 3.1384670734405518, "learning_rate": 1.34123600973236e-05, "loss": 1.4234, "step": 376600 }, { "epoch": 7.325230918813807, "grad_norm": 3.71097731590271, "learning_rate": 1.3402627737226278e-05, "loss": 1.3735, "step": 376700 }, { "epoch": 7.327175498298493, "grad_norm": 2.899808883666992, "learning_rate": 1.3392895377128956e-05, "loss": 1.4784, "step": 376800 }, { "epoch": 7.3291200777831795, "grad_norm": 11.509203910827637, "learning_rate": 1.338316301703163e-05, "loss": 1.6261, "step": 376900 }, { "epoch": 7.3310646572678655, "grad_norm": 4.681097984313965, "learning_rate": 1.3373430656934307e-05, "loss": 1.4137, "step": 377000 }, { "epoch": 7.333009236752552, "grad_norm": 2.781247854232788, "learning_rate": 1.3363698296836982e-05, "loss": 1.6255, "step": 377100 }, { "epoch": 7.334953816237238, "grad_norm": 14.652652740478516, "learning_rate": 1.335396593673966e-05, "loss": 1.5403, "step": 377200 }, { "epoch": 7.336898395721925, "grad_norm": 10.559048652648926, "learning_rate": 1.3344233576642336e-05, "loss": 1.4594, "step": 377300 }, { "epoch": 7.338842975206612, "grad_norm": 2.6873152256011963, "learning_rate": 1.3334501216545012e-05, "loss": 1.3459, "step": 377400 }, { "epoch": 7.340787554691298, "grad_norm": 2.0440752506256104, "learning_rate": 1.3324768856447689e-05, "loss": 1.5597, "step": 377500 }, { "epoch": 7.342732134175985, "grad_norm": 4.165746688842773, "learning_rate": 1.3315036496350367e-05, "loss": 1.3918, "step": 377600 }, { "epoch": 7.344676713660671, "grad_norm": 12.213830947875977, "learning_rate": 1.3305304136253041e-05, "loss": 1.3168, "step": 377700 }, { "epoch": 7.346621293145358, "grad_norm": 6.676957607269287, "learning_rate": 1.3295571776155718e-05, "loss": 1.5451, "step": 377800 }, { "epoch": 7.348565872630044, "grad_norm": 8.998848915100098, "learning_rate": 1.3285839416058397e-05, "loss": 1.3685, "step": 377900 }, { "epoch": 7.35051045211473, "grad_norm": 4.663652420043945, "learning_rate": 1.327610705596107e-05, "loss": 1.5258, "step": 378000 }, { "epoch": 7.352455031599416, "grad_norm": 2.1335527896881104, "learning_rate": 1.3266374695863747e-05, "loss": 1.5779, "step": 378100 }, { "epoch": 7.354399611084103, "grad_norm": 5.396026134490967, "learning_rate": 1.3256642335766423e-05, "loss": 1.4651, "step": 378200 }, { "epoch": 7.356344190568789, "grad_norm": 5.164883613586426, "learning_rate": 1.32469099756691e-05, "loss": 1.5143, "step": 378300 }, { "epoch": 7.358288770053476, "grad_norm": 2.787472724914551, "learning_rate": 1.3237177615571777e-05, "loss": 1.4411, "step": 378400 }, { "epoch": 7.360233349538162, "grad_norm": 3.8885350227355957, "learning_rate": 1.3227445255474452e-05, "loss": 1.4322, "step": 378500 }, { "epoch": 7.362177929022849, "grad_norm": 3.964566230773926, "learning_rate": 1.3217712895377129e-05, "loss": 1.4721, "step": 378600 }, { "epoch": 7.364122508507535, "grad_norm": 5.370393753051758, "learning_rate": 1.3207980535279806e-05, "loss": 1.6026, "step": 378700 }, { "epoch": 7.366067087992222, "grad_norm": 4.5028300285339355, "learning_rate": 1.3198248175182481e-05, "loss": 1.6152, "step": 378800 }, { "epoch": 7.3680116674769085, "grad_norm": 5.21793794631958, "learning_rate": 1.3188515815085158e-05, "loss": 1.3568, "step": 378900 }, { "epoch": 7.3699562469615945, "grad_norm": 3.165051221847534, "learning_rate": 1.3178783454987837e-05, "loss": 1.5558, "step": 379000 }, { "epoch": 7.371900826446281, "grad_norm": 4.893402099609375, "learning_rate": 1.316905109489051e-05, "loss": 1.3958, "step": 379100 }, { "epoch": 7.373845405930967, "grad_norm": 5.0999369621276855, "learning_rate": 1.3159318734793188e-05, "loss": 1.4428, "step": 379200 }, { "epoch": 7.375789985415654, "grad_norm": 3.4964404106140137, "learning_rate": 1.3149586374695863e-05, "loss": 1.4771, "step": 379300 }, { "epoch": 7.37773456490034, "grad_norm": 6.2254509925842285, "learning_rate": 1.313985401459854e-05, "loss": 1.5376, "step": 379400 }, { "epoch": 7.379679144385027, "grad_norm": 5.281836032867432, "learning_rate": 1.3130121654501217e-05, "loss": 1.3638, "step": 379500 }, { "epoch": 7.381623723869713, "grad_norm": 10.532135963439941, "learning_rate": 1.3120389294403893e-05, "loss": 1.4423, "step": 379600 }, { "epoch": 7.3835683033544, "grad_norm": 4.445597171783447, "learning_rate": 1.311065693430657e-05, "loss": 1.6312, "step": 379700 }, { "epoch": 7.385512882839086, "grad_norm": 4.417415142059326, "learning_rate": 1.3100924574209247e-05, "loss": 1.5333, "step": 379800 }, { "epoch": 7.3874574623237725, "grad_norm": 3.2296712398529053, "learning_rate": 1.3091192214111922e-05, "loss": 1.4044, "step": 379900 }, { "epoch": 7.389402041808459, "grad_norm": 10.164597511291504, "learning_rate": 1.3081557177615571e-05, "loss": 1.373, "step": 380000 }, { "epoch": 7.389402041808459, "eval_accuracy": 0.5652222222222222, "eval_f1": 0.5557461492580578, "eval_loss": 1.159722924232483, "eval_precision": 0.5725660517315757, "eval_recall": 0.5652222222222223, "eval_runtime": 11725.2827, "eval_samples_per_second": 15.351, "eval_steps_per_second": 0.48, "step": 380000 }, { "epoch": 7.391346621293145, "grad_norm": 11.193039894104004, "learning_rate": 1.307182481751825e-05, "loss": 1.5826, "step": 380100 }, { "epoch": 7.393291200777832, "grad_norm": 10.941433906555176, "learning_rate": 1.3062092457420924e-05, "loss": 1.629, "step": 380200 }, { "epoch": 7.395235780262518, "grad_norm": 3.7060422897338867, "learning_rate": 1.3052360097323602e-05, "loss": 1.5393, "step": 380300 }, { "epoch": 7.397180359747205, "grad_norm": 5.208335876464844, "learning_rate": 1.304262773722628e-05, "loss": 1.4547, "step": 380400 }, { "epoch": 7.399124939231891, "grad_norm": 6.496133804321289, "learning_rate": 1.3032895377128953e-05, "loss": 1.3349, "step": 380500 }, { "epoch": 7.401069518716578, "grad_norm": 3.397002696990967, "learning_rate": 1.3023163017031632e-05, "loss": 1.3312, "step": 380600 }, { "epoch": 7.403014098201264, "grad_norm": 5.012977123260498, "learning_rate": 1.3013430656934309e-05, "loss": 1.325, "step": 380700 }, { "epoch": 7.404958677685951, "grad_norm": 3.761706590652466, "learning_rate": 1.3003698296836982e-05, "loss": 1.5103, "step": 380800 }, { "epoch": 7.406903257170637, "grad_norm": 4.514492988586426, "learning_rate": 1.2993965936739661e-05, "loss": 1.4742, "step": 380900 }, { "epoch": 7.408847836655323, "grad_norm": 6.8923516273498535, "learning_rate": 1.2984233576642335e-05, "loss": 1.5744, "step": 381000 }, { "epoch": 7.410792416140009, "grad_norm": 2.1312167644500732, "learning_rate": 1.2974501216545012e-05, "loss": 1.4658, "step": 381100 }, { "epoch": 7.412736995624696, "grad_norm": 5.136524677276611, "learning_rate": 1.296476885644769e-05, "loss": 1.416, "step": 381200 }, { "epoch": 7.414681575109382, "grad_norm": 21.042083740234375, "learning_rate": 1.2955036496350364e-05, "loss": 1.4813, "step": 381300 }, { "epoch": 7.416626154594069, "grad_norm": 5.820967674255371, "learning_rate": 1.2945304136253043e-05, "loss": 1.4442, "step": 381400 }, { "epoch": 7.418570734078756, "grad_norm": 9.195882797241211, "learning_rate": 1.293557177615572e-05, "loss": 1.4033, "step": 381500 }, { "epoch": 7.420515313563442, "grad_norm": 6.222693920135498, "learning_rate": 1.2925839416058393e-05, "loss": 1.5906, "step": 381600 }, { "epoch": 7.422459893048129, "grad_norm": 4.664599895477295, "learning_rate": 1.2916107055961072e-05, "loss": 1.4912, "step": 381700 }, { "epoch": 7.424404472532815, "grad_norm": 16.082107543945312, "learning_rate": 1.2906374695863749e-05, "loss": 1.5239, "step": 381800 }, { "epoch": 7.4263490520175015, "grad_norm": 2.9351227283477783, "learning_rate": 1.2896642335766423e-05, "loss": 1.5602, "step": 381900 }, { "epoch": 7.4282936315021875, "grad_norm": 3.840715169906616, "learning_rate": 1.2886909975669101e-05, "loss": 1.6555, "step": 382000 }, { "epoch": 7.430238210986874, "grad_norm": 4.8395280838012695, "learning_rate": 1.2877274939172751e-05, "loss": 1.5794, "step": 382100 }, { "epoch": 7.43218279047156, "grad_norm": 7.141819477081299, "learning_rate": 1.2867542579075426e-05, "loss": 1.4951, "step": 382200 }, { "epoch": 7.434127369956247, "grad_norm": 5.874687671661377, "learning_rate": 1.2857810218978103e-05, "loss": 1.5288, "step": 382300 }, { "epoch": 7.436071949440933, "grad_norm": 4.333919048309326, "learning_rate": 1.284807785888078e-05, "loss": 1.4859, "step": 382400 }, { "epoch": 7.43801652892562, "grad_norm": 1.1668999195098877, "learning_rate": 1.2838345498783456e-05, "loss": 1.6115, "step": 382500 }, { "epoch": 7.439961108410306, "grad_norm": 4.180711269378662, "learning_rate": 1.2828613138686133e-05, "loss": 1.562, "step": 382600 }, { "epoch": 7.441905687894993, "grad_norm": 2.6757564544677734, "learning_rate": 1.2818880778588808e-05, "loss": 1.4602, "step": 382700 }, { "epoch": 7.443850267379679, "grad_norm": 3.054635524749756, "learning_rate": 1.2809148418491485e-05, "loss": 1.5617, "step": 382800 }, { "epoch": 7.4457948468643655, "grad_norm": 5.861843109130859, "learning_rate": 1.2799416058394162e-05, "loss": 1.3797, "step": 382900 }, { "epoch": 7.447739426349052, "grad_norm": 3.6079301834106445, "learning_rate": 1.2789683698296837e-05, "loss": 1.4204, "step": 383000 }, { "epoch": 7.449684005833738, "grad_norm": 4.12105131149292, "learning_rate": 1.2779951338199514e-05, "loss": 1.4878, "step": 383100 }, { "epoch": 7.451628585318425, "grad_norm": 4.902799606323242, "learning_rate": 1.2770218978102191e-05, "loss": 1.4261, "step": 383200 }, { "epoch": 7.453573164803111, "grad_norm": 5.255555152893066, "learning_rate": 1.2760486618004867e-05, "loss": 1.5148, "step": 383300 }, { "epoch": 7.455517744287798, "grad_norm": 5.609690189361572, "learning_rate": 1.2750754257907544e-05, "loss": 1.3353, "step": 383400 }, { "epoch": 7.457462323772484, "grad_norm": 2.543059825897217, "learning_rate": 1.2741119221411193e-05, "loss": 1.3524, "step": 383500 }, { "epoch": 7.459406903257171, "grad_norm": 5.188281059265137, "learning_rate": 1.2731386861313868e-05, "loss": 1.4028, "step": 383600 }, { "epoch": 7.461351482741857, "grad_norm": 5.648746490478516, "learning_rate": 1.2721654501216545e-05, "loss": 1.4723, "step": 383700 }, { "epoch": 7.463296062226544, "grad_norm": 2.641937732696533, "learning_rate": 1.2711922141119222e-05, "loss": 1.6029, "step": 383800 }, { "epoch": 7.46524064171123, "grad_norm": 5.455081462860107, "learning_rate": 1.2702189781021898e-05, "loss": 1.5511, "step": 383900 }, { "epoch": 7.467185221195916, "grad_norm": 7.914240837097168, "learning_rate": 1.2692457420924575e-05, "loss": 1.5532, "step": 384000 }, { "epoch": 7.469129800680603, "grad_norm": 3.7569327354431152, "learning_rate": 1.2682725060827252e-05, "loss": 1.3489, "step": 384100 }, { "epoch": 7.471074380165289, "grad_norm": 7.259191989898682, "learning_rate": 1.2672992700729927e-05, "loss": 1.4354, "step": 384200 }, { "epoch": 7.473018959649976, "grad_norm": 3.578918218612671, "learning_rate": 1.2663260340632604e-05, "loss": 1.4237, "step": 384300 }, { "epoch": 7.474963539134662, "grad_norm": 4.631996154785156, "learning_rate": 1.265352798053528e-05, "loss": 1.42, "step": 384400 }, { "epoch": 7.476908118619349, "grad_norm": 4.303006172180176, "learning_rate": 1.2643795620437956e-05, "loss": 1.5193, "step": 384500 }, { "epoch": 7.478852698104035, "grad_norm": 4.006994724273682, "learning_rate": 1.2634063260340633e-05, "loss": 1.349, "step": 384600 }, { "epoch": 7.480797277588722, "grad_norm": 3.174062967300415, "learning_rate": 1.2624330900243309e-05, "loss": 1.4229, "step": 384700 }, { "epoch": 7.482741857073408, "grad_norm": 8.155875205993652, "learning_rate": 1.2614598540145986e-05, "loss": 1.3696, "step": 384800 }, { "epoch": 7.4846864365580945, "grad_norm": 3.0248894691467285, "learning_rate": 1.2604866180048663e-05, "loss": 1.3287, "step": 384900 }, { "epoch": 7.4866310160427805, "grad_norm": 3.095304489135742, "learning_rate": 1.2595133819951338e-05, "loss": 1.5504, "step": 385000 }, { "epoch": 7.4866310160427805, "eval_accuracy": 0.5517666666666666, "eval_f1": 0.5413268505969332, "eval_loss": 1.180282473564148, "eval_precision": 0.5732418392591322, "eval_recall": 0.5517666666666666, "eval_runtime": 11708.6908, "eval_samples_per_second": 15.373, "eval_steps_per_second": 0.48, "step": 385000 }, { "epoch": 7.488575595527467, "grad_norm": 17.03399085998535, "learning_rate": 1.2585401459854015e-05, "loss": 1.4031, "step": 385100 }, { "epoch": 7.490520175012153, "grad_norm": 2.626133441925049, "learning_rate": 1.2575669099756692e-05, "loss": 1.4396, "step": 385200 }, { "epoch": 7.49246475449684, "grad_norm": 4.160690784454346, "learning_rate": 1.2565936739659368e-05, "loss": 1.4391, "step": 385300 }, { "epoch": 7.494409333981526, "grad_norm": 5.843252658843994, "learning_rate": 1.2556204379562045e-05, "loss": 1.4673, "step": 385400 }, { "epoch": 7.496353913466213, "grad_norm": 7.45164155960083, "learning_rate": 1.254647201946472e-05, "loss": 1.4395, "step": 385500 }, { "epoch": 7.4982984929509, "grad_norm": 6.555979251861572, "learning_rate": 1.2536739659367397e-05, "loss": 1.5894, "step": 385600 }, { "epoch": 7.500243072435586, "grad_norm": 7.8105902671813965, "learning_rate": 1.2527007299270074e-05, "loss": 1.4137, "step": 385700 }, { "epoch": 7.502187651920273, "grad_norm": 5.232061862945557, "learning_rate": 1.251727493917275e-05, "loss": 1.3876, "step": 385800 }, { "epoch": 7.5041322314049586, "grad_norm": 7.805830955505371, "learning_rate": 1.2507542579075426e-05, "loss": 1.4257, "step": 385900 }, { "epoch": 7.506076810889645, "grad_norm": 3.970597505569458, "learning_rate": 1.2497810218978102e-05, "loss": 1.4786, "step": 386000 }, { "epoch": 7.508021390374331, "grad_norm": 3.608802556991577, "learning_rate": 1.248807785888078e-05, "loss": 1.6617, "step": 386100 }, { "epoch": 7.509965969859018, "grad_norm": 2.815657377243042, "learning_rate": 1.2478345498783456e-05, "loss": 1.5354, "step": 386200 }, { "epoch": 7.511910549343704, "grad_norm": 2.2526419162750244, "learning_rate": 1.2468613138686133e-05, "loss": 1.4207, "step": 386300 }, { "epoch": 7.513855128828391, "grad_norm": 4.9899582862854, "learning_rate": 1.2458880778588808e-05, "loss": 1.4134, "step": 386400 }, { "epoch": 7.515799708313077, "grad_norm": 3.4460208415985107, "learning_rate": 1.2449245742092457e-05, "loss": 1.6213, "step": 386500 }, { "epoch": 7.517744287797764, "grad_norm": 6.16884183883667, "learning_rate": 1.2439513381995134e-05, "loss": 1.4053, "step": 386600 }, { "epoch": 7.51968886728245, "grad_norm": 4.995258331298828, "learning_rate": 1.2429781021897811e-05, "loss": 1.4924, "step": 386700 }, { "epoch": 7.521633446767137, "grad_norm": 6.214372158050537, "learning_rate": 1.2420048661800487e-05, "loss": 1.4607, "step": 386800 }, { "epoch": 7.523578026251823, "grad_norm": 4.023592472076416, "learning_rate": 1.2410316301703164e-05, "loss": 1.4919, "step": 386900 }, { "epoch": 7.525522605736509, "grad_norm": 4.3722100257873535, "learning_rate": 1.2400583941605839e-05, "loss": 1.7157, "step": 387000 }, { "epoch": 7.527467185221196, "grad_norm": 6.941134452819824, "learning_rate": 1.2390851581508516e-05, "loss": 1.5216, "step": 387100 }, { "epoch": 7.529411764705882, "grad_norm": 4.928178310394287, "learning_rate": 1.2381119221411193e-05, "loss": 1.5165, "step": 387200 }, { "epoch": 7.531356344190569, "grad_norm": 5.452522277832031, "learning_rate": 1.2371386861313868e-05, "loss": 1.4556, "step": 387300 }, { "epoch": 7.533300923675255, "grad_norm": 3.964320659637451, "learning_rate": 1.2361654501216545e-05, "loss": 1.533, "step": 387400 }, { "epoch": 7.535245503159942, "grad_norm": 3.5189340114593506, "learning_rate": 1.2351922141119222e-05, "loss": 1.5334, "step": 387500 }, { "epoch": 7.537190082644628, "grad_norm": 4.400242328643799, "learning_rate": 1.2342189781021898e-05, "loss": 1.454, "step": 387600 }, { "epoch": 7.539134662129315, "grad_norm": 6.360297679901123, "learning_rate": 1.2332457420924575e-05, "loss": 1.3706, "step": 387700 }, { "epoch": 7.541079241614001, "grad_norm": 8.32119369506836, "learning_rate": 1.2322725060827252e-05, "loss": 1.4824, "step": 387800 }, { "epoch": 7.5430238210986875, "grad_norm": 2.567232847213745, "learning_rate": 1.2312992700729927e-05, "loss": 1.4347, "step": 387900 }, { "epoch": 7.5449684005833735, "grad_norm": 4.348623275756836, "learning_rate": 1.2303260340632604e-05, "loss": 1.6149, "step": 388000 }, { "epoch": 7.54691298006806, "grad_norm": 3.7355525493621826, "learning_rate": 1.229352798053528e-05, "loss": 1.5582, "step": 388100 }, { "epoch": 7.548857559552747, "grad_norm": 5.487592697143555, "learning_rate": 1.2283795620437956e-05, "loss": 1.5469, "step": 388200 }, { "epoch": 7.550802139037433, "grad_norm": 5.1818928718566895, "learning_rate": 1.2274063260340634e-05, "loss": 1.4225, "step": 388300 }, { "epoch": 7.552746718522119, "grad_norm": 3.696730136871338, "learning_rate": 1.2264330900243309e-05, "loss": 1.5091, "step": 388400 }, { "epoch": 7.554691298006806, "grad_norm": 4.793635368347168, "learning_rate": 1.2254598540145986e-05, "loss": 1.6819, "step": 388500 }, { "epoch": 7.556635877491493, "grad_norm": 5.568487167358398, "learning_rate": 1.2244866180048663e-05, "loss": 1.4847, "step": 388600 }, { "epoch": 7.558580456976179, "grad_norm": 2.9797725677490234, "learning_rate": 1.2235133819951338e-05, "loss": 1.4439, "step": 388700 }, { "epoch": 7.560525036460866, "grad_norm": 4.589217185974121, "learning_rate": 1.2225401459854015e-05, "loss": 1.4138, "step": 388800 }, { "epoch": 7.562469615945552, "grad_norm": 4.634342670440674, "learning_rate": 1.2215669099756692e-05, "loss": 1.4375, "step": 388900 }, { "epoch": 7.564414195430238, "grad_norm": 3.1399781703948975, "learning_rate": 1.2205936739659368e-05, "loss": 1.4941, "step": 389000 }, { "epoch": 7.566358774914924, "grad_norm": 5.171536922454834, "learning_rate": 1.2196204379562045e-05, "loss": 1.3986, "step": 389100 }, { "epoch": 7.568303354399611, "grad_norm": 10.188394546508789, "learning_rate": 1.218647201946472e-05, "loss": 1.6094, "step": 389200 }, { "epoch": 7.570247933884297, "grad_norm": 10.819273948669434, "learning_rate": 1.2176739659367397e-05, "loss": 1.669, "step": 389300 }, { "epoch": 7.572192513368984, "grad_norm": 3.3331918716430664, "learning_rate": 1.2167007299270074e-05, "loss": 1.4272, "step": 389400 }, { "epoch": 7.57413709285367, "grad_norm": 15.554542541503906, "learning_rate": 1.215727493917275e-05, "loss": 1.4945, "step": 389500 }, { "epoch": 7.576081672338357, "grad_norm": 7.285880088806152, "learning_rate": 1.2147542579075426e-05, "loss": 1.4567, "step": 389600 }, { "epoch": 7.578026251823044, "grad_norm": 5.208522796630859, "learning_rate": 1.2137810218978103e-05, "loss": 1.4565, "step": 389700 }, { "epoch": 7.57997083130773, "grad_norm": 4.139003276824951, "learning_rate": 1.2128077858880779e-05, "loss": 1.4198, "step": 389800 }, { "epoch": 7.5819154107924165, "grad_norm": 3.9209330081939697, "learning_rate": 1.2118345498783456e-05, "loss": 1.4293, "step": 389900 }, { "epoch": 7.5838599902771024, "grad_norm": 11.141642570495605, "learning_rate": 1.2108613138686133e-05, "loss": 1.4173, "step": 390000 }, { "epoch": 7.5838599902771024, "eval_accuracy": 0.5600777777777778, "eval_f1": 0.5454520348842914, "eval_loss": 1.170873999595642, "eval_precision": 0.566009400916926, "eval_recall": 0.5600777777777778, "eval_runtime": 11705.5222, "eval_samples_per_second": 15.377, "eval_steps_per_second": 0.481, "step": 390000 }, { "epoch": 7.585804569761789, "grad_norm": 8.432724952697754, "learning_rate": 1.2098880778588808e-05, "loss": 1.5891, "step": 390100 }, { "epoch": 7.587749149246475, "grad_norm": 4.877676010131836, "learning_rate": 1.2089148418491485e-05, "loss": 1.5406, "step": 390200 }, { "epoch": 7.589693728731162, "grad_norm": 7.485162734985352, "learning_rate": 1.207941605839416e-05, "loss": 1.3401, "step": 390300 }, { "epoch": 7.591638308215848, "grad_norm": 3.070404529571533, "learning_rate": 1.2069683698296837e-05, "loss": 1.7437, "step": 390400 }, { "epoch": 7.593582887700535, "grad_norm": 4.1379265785217285, "learning_rate": 1.2060048661800487e-05, "loss": 1.5273, "step": 390500 }, { "epoch": 7.595527467185221, "grad_norm": 4.755558967590332, "learning_rate": 1.2050316301703164e-05, "loss": 1.4711, "step": 390600 }, { "epoch": 7.597472046669908, "grad_norm": 4.678364276885986, "learning_rate": 1.204058394160584e-05, "loss": 1.2786, "step": 390700 }, { "epoch": 7.599416626154595, "grad_norm": 5.76406717300415, "learning_rate": 1.2030948905109488e-05, "loss": 1.472, "step": 390800 }, { "epoch": 7.6013612056392805, "grad_norm": 4.036124229431152, "learning_rate": 1.2021216545012167e-05, "loss": 1.4307, "step": 390900 }, { "epoch": 7.6033057851239665, "grad_norm": 3.7961111068725586, "learning_rate": 1.2011484184914843e-05, "loss": 1.4602, "step": 391000 }, { "epoch": 7.605250364608653, "grad_norm": 3.5989832878112793, "learning_rate": 1.2001751824817518e-05, "loss": 1.3399, "step": 391100 }, { "epoch": 7.60719494409334, "grad_norm": 2.8095057010650635, "learning_rate": 1.1992019464720197e-05, "loss": 1.4499, "step": 391200 }, { "epoch": 7.609139523578026, "grad_norm": 13.137466430664062, "learning_rate": 1.1982287104622872e-05, "loss": 1.477, "step": 391300 }, { "epoch": 7.611084103062713, "grad_norm": 2.8563392162323, "learning_rate": 1.1972554744525547e-05, "loss": 1.3863, "step": 391400 }, { "epoch": 7.613028682547399, "grad_norm": 3.313957929611206, "learning_rate": 1.1962822384428224e-05, "loss": 1.4375, "step": 391500 }, { "epoch": 7.614973262032086, "grad_norm": 4.008072853088379, "learning_rate": 1.1953090024330901e-05, "loss": 1.4737, "step": 391600 }, { "epoch": 7.616917841516772, "grad_norm": 4.180424690246582, "learning_rate": 1.1943357664233577e-05, "loss": 1.6005, "step": 391700 }, { "epoch": 7.618862421001459, "grad_norm": 4.278572082519531, "learning_rate": 1.1933625304136254e-05, "loss": 1.4805, "step": 391800 }, { "epoch": 7.620807000486145, "grad_norm": 5.402284145355225, "learning_rate": 1.1923892944038929e-05, "loss": 1.3568, "step": 391900 }, { "epoch": 7.622751579970831, "grad_norm": 4.856540679931641, "learning_rate": 1.1914160583941608e-05, "loss": 1.4231, "step": 392000 }, { "epoch": 7.624696159455517, "grad_norm": 3.5631964206695557, "learning_rate": 1.1904428223844283e-05, "loss": 1.4876, "step": 392100 }, { "epoch": 7.626640738940204, "grad_norm": 6.816437721252441, "learning_rate": 1.1894695863746958e-05, "loss": 1.3629, "step": 392200 }, { "epoch": 7.628585318424891, "grad_norm": 2.778501033782959, "learning_rate": 1.1884963503649637e-05, "loss": 1.5451, "step": 392300 }, { "epoch": 7.630529897909577, "grad_norm": 17.81403350830078, "learning_rate": 1.1875231143552312e-05, "loss": 1.4121, "step": 392400 }, { "epoch": 7.632474477394264, "grad_norm": 2.518784761428833, "learning_rate": 1.1865498783454988e-05, "loss": 1.5356, "step": 392500 }, { "epoch": 7.63441905687895, "grad_norm": 10.804983139038086, "learning_rate": 1.1855766423357665e-05, "loss": 1.5159, "step": 392600 }, { "epoch": 7.636363636363637, "grad_norm": 7.163889408111572, "learning_rate": 1.1846034063260342e-05, "loss": 1.4357, "step": 392700 }, { "epoch": 7.638308215848323, "grad_norm": 41.272159576416016, "learning_rate": 1.1836301703163017e-05, "loss": 1.4372, "step": 392800 }, { "epoch": 7.6402527953330095, "grad_norm": 4.598048686981201, "learning_rate": 1.1826569343065694e-05, "loss": 1.3938, "step": 392900 }, { "epoch": 7.6421973748176955, "grad_norm": 6.565249919891357, "learning_rate": 1.181683698296837e-05, "loss": 1.3889, "step": 393000 }, { "epoch": 7.644141954302382, "grad_norm": 3.015939235687256, "learning_rate": 1.1807104622871046e-05, "loss": 1.7747, "step": 393100 }, { "epoch": 7.646086533787068, "grad_norm": 3.533872604370117, "learning_rate": 1.1797372262773723e-05, "loss": 1.4268, "step": 393200 }, { "epoch": 7.648031113271755, "grad_norm": 4.453593730926514, "learning_rate": 1.1787639902676399e-05, "loss": 1.591, "step": 393300 }, { "epoch": 7.649975692756441, "grad_norm": 4.9489850997924805, "learning_rate": 1.1777907542579077e-05, "loss": 1.5109, "step": 393400 }, { "epoch": 7.651920272241128, "grad_norm": 4.509812355041504, "learning_rate": 1.1768175182481753e-05, "loss": 1.4192, "step": 393500 }, { "epoch": 7.653864851725814, "grad_norm": 9.241694450378418, "learning_rate": 1.1758442822384428e-05, "loss": 1.572, "step": 393600 }, { "epoch": 7.655809431210501, "grad_norm": 5.514021873474121, "learning_rate": 1.1748710462287105e-05, "loss": 1.3743, "step": 393700 }, { "epoch": 7.657754010695188, "grad_norm": 1.6059168577194214, "learning_rate": 1.1738978102189782e-05, "loss": 1.3885, "step": 393800 }, { "epoch": 7.6596985901798735, "grad_norm": 7.18228006362915, "learning_rate": 1.1729245742092457e-05, "loss": 1.5932, "step": 393900 }, { "epoch": 7.66164316966456, "grad_norm": 7.294412136077881, "learning_rate": 1.1719513381995134e-05, "loss": 1.3721, "step": 394000 }, { "epoch": 7.663587749149246, "grad_norm": 1.5697866678237915, "learning_rate": 1.170978102189781e-05, "loss": 1.3798, "step": 394100 }, { "epoch": 7.665532328633933, "grad_norm": 3.370408296585083, "learning_rate": 1.1700048661800487e-05, "loss": 1.4404, "step": 394200 }, { "epoch": 7.667476908118619, "grad_norm": 3.547229051589966, "learning_rate": 1.1690316301703164e-05, "loss": 1.6015, "step": 394300 }, { "epoch": 7.669421487603306, "grad_norm": 9.914877891540527, "learning_rate": 1.168058394160584e-05, "loss": 1.6271, "step": 394400 }, { "epoch": 7.671366067087992, "grad_norm": 3.2540035247802734, "learning_rate": 1.1670851581508518e-05, "loss": 1.4368, "step": 394500 }, { "epoch": 7.673310646572679, "grad_norm": 5.208374500274658, "learning_rate": 1.1661119221411193e-05, "loss": 1.4303, "step": 394600 }, { "epoch": 7.675255226057365, "grad_norm": 8.958680152893066, "learning_rate": 1.1651386861313869e-05, "loss": 1.4857, "step": 394700 }, { "epoch": 7.677199805542052, "grad_norm": 5.30008602142334, "learning_rate": 1.164175182481752e-05, "loss": 1.4207, "step": 394800 }, { "epoch": 7.6791443850267385, "grad_norm": 6.3815484046936035, "learning_rate": 1.1632019464720195e-05, "loss": 1.4727, "step": 394900 }, { "epoch": 7.681088964511424, "grad_norm": 6.39573335647583, "learning_rate": 1.1622287104622872e-05, "loss": 1.4251, "step": 395000 }, { "epoch": 7.681088964511424, "eval_accuracy": 0.5673666666666667, "eval_f1": 0.5574156016258378, "eval_loss": 1.16072678565979, "eval_precision": 0.5709947699292183, "eval_recall": 0.5673666666666667, "eval_runtime": 11702.4924, "eval_samples_per_second": 15.381, "eval_steps_per_second": 0.481, "step": 395000 }, { "epoch": 7.68303354399611, "grad_norm": 2.340610980987549, "learning_rate": 1.1612554744525549e-05, "loss": 1.4323, "step": 395100 }, { "epoch": 7.684978123480797, "grad_norm": 3.0859436988830566, "learning_rate": 1.1602822384428224e-05, "loss": 1.4594, "step": 395200 }, { "epoch": 7.686922702965484, "grad_norm": 3.048875570297241, "learning_rate": 1.1593090024330901e-05, "loss": 1.475, "step": 395300 }, { "epoch": 7.68886728245017, "grad_norm": 6.746722221374512, "learning_rate": 1.1583357664233577e-05, "loss": 1.4176, "step": 395400 }, { "epoch": 7.690811861934857, "grad_norm": 7.92321252822876, "learning_rate": 1.1573625304136254e-05, "loss": 1.4653, "step": 395500 }, { "epoch": 7.692756441419543, "grad_norm": 1.8439031839370728, "learning_rate": 1.156389294403893e-05, "loss": 1.5001, "step": 395600 }, { "epoch": 7.69470102090423, "grad_norm": 4.991579532623291, "learning_rate": 1.1554160583941606e-05, "loss": 1.3999, "step": 395700 }, { "epoch": 7.696645600388916, "grad_norm": 2.8870742321014404, "learning_rate": 1.1544428223844283e-05, "loss": 1.5404, "step": 395800 }, { "epoch": 7.6985901798736025, "grad_norm": 3.267101526260376, "learning_rate": 1.153469586374696e-05, "loss": 1.4641, "step": 395900 }, { "epoch": 7.7005347593582885, "grad_norm": 5.172792434692383, "learning_rate": 1.1524963503649635e-05, "loss": 1.5034, "step": 396000 }, { "epoch": 7.702479338842975, "grad_norm": 4.482735633850098, "learning_rate": 1.1515231143552312e-05, "loss": 1.5018, "step": 396100 }, { "epoch": 7.704423918327661, "grad_norm": 17.155332565307617, "learning_rate": 1.150549878345499e-05, "loss": 1.4485, "step": 396200 }, { "epoch": 7.706368497812348, "grad_norm": 4.578517913818359, "learning_rate": 1.1495766423357665e-05, "loss": 1.5266, "step": 396300 }, { "epoch": 7.708313077297035, "grad_norm": 4.609488487243652, "learning_rate": 1.1486034063260342e-05, "loss": 1.4966, "step": 396400 }, { "epoch": 7.710257656781721, "grad_norm": 5.498297691345215, "learning_rate": 1.1476301703163017e-05, "loss": 1.3474, "step": 396500 }, { "epoch": 7.712202236266408, "grad_norm": 3.2082982063293457, "learning_rate": 1.1466569343065694e-05, "loss": 1.4006, "step": 396600 }, { "epoch": 7.714146815751094, "grad_norm": 3.3274641036987305, "learning_rate": 1.1456836982968371e-05, "loss": 1.3323, "step": 396700 }, { "epoch": 7.716091395235781, "grad_norm": 3.112683057785034, "learning_rate": 1.144720194647202e-05, "loss": 1.3553, "step": 396800 }, { "epoch": 7.7180359747204665, "grad_norm": 4.680258274078369, "learning_rate": 1.1437469586374697e-05, "loss": 1.4871, "step": 396900 }, { "epoch": 7.719980554205153, "grad_norm": 3.590513229370117, "learning_rate": 1.1427737226277373e-05, "loss": 1.5765, "step": 397000 }, { "epoch": 7.721925133689839, "grad_norm": 1.30685555934906, "learning_rate": 1.1418004866180048e-05, "loss": 1.4982, "step": 397100 }, { "epoch": 7.723869713174526, "grad_norm": 3.4067234992980957, "learning_rate": 1.1408272506082727e-05, "loss": 1.4816, "step": 397200 }, { "epoch": 7.725814292659212, "grad_norm": 8.87986946105957, "learning_rate": 1.1398540145985402e-05, "loss": 1.5708, "step": 397300 }, { "epoch": 7.727758872143899, "grad_norm": 7.142636299133301, "learning_rate": 1.1388807785888078e-05, "loss": 1.4901, "step": 397400 }, { "epoch": 7.729703451628585, "grad_norm": 5.542691230773926, "learning_rate": 1.1379075425790755e-05, "loss": 1.4941, "step": 397500 }, { "epoch": 7.731648031113272, "grad_norm": 2.9880502223968506, "learning_rate": 1.1369343065693432e-05, "loss": 1.3906, "step": 397600 }, { "epoch": 7.733592610597958, "grad_norm": 5.112235069274902, "learning_rate": 1.1359610705596107e-05, "loss": 1.6055, "step": 397700 }, { "epoch": 7.735537190082645, "grad_norm": 5.745707035064697, "learning_rate": 1.1349878345498784e-05, "loss": 1.555, "step": 397800 }, { "epoch": 7.7374817695673315, "grad_norm": 6.155984401702881, "learning_rate": 1.1340145985401461e-05, "loss": 1.421, "step": 397900 }, { "epoch": 7.739426349052017, "grad_norm": 4.013309955596924, "learning_rate": 1.1330413625304136e-05, "loss": 1.3417, "step": 398000 }, { "epoch": 7.741370928536704, "grad_norm": 1.7333327531814575, "learning_rate": 1.1320681265206813e-05, "loss": 1.4838, "step": 398100 }, { "epoch": 7.74331550802139, "grad_norm": 3.2396888732910156, "learning_rate": 1.1310948905109489e-05, "loss": 1.5221, "step": 398200 }, { "epoch": 7.745260087506077, "grad_norm": 5.171722412109375, "learning_rate": 1.1301216545012167e-05, "loss": 1.4032, "step": 398300 }, { "epoch": 7.747204666990763, "grad_norm": 6.28728723526001, "learning_rate": 1.1291484184914843e-05, "loss": 1.7379, "step": 398400 }, { "epoch": 7.74914924647545, "grad_norm": 6.632148265838623, "learning_rate": 1.1281751824817518e-05, "loss": 1.6218, "step": 398500 }, { "epoch": 7.751093825960136, "grad_norm": 3.3366496562957764, "learning_rate": 1.1272019464720195e-05, "loss": 1.5764, "step": 398600 }, { "epoch": 7.753038405444823, "grad_norm": 8.563538551330566, "learning_rate": 1.1262287104622872e-05, "loss": 1.4829, "step": 398700 }, { "epoch": 7.754982984929509, "grad_norm": Infinity, "learning_rate": 1.1252652068126521e-05, "loss": 1.279, "step": 398800 }, { "epoch": 7.7569275644141955, "grad_norm": 3.185750961303711, "learning_rate": 1.1242919708029198e-05, "loss": 1.4947, "step": 398900 }, { "epoch": 7.758872143898882, "grad_norm": 3.5791382789611816, "learning_rate": 1.1233187347931874e-05, "loss": 1.4525, "step": 399000 }, { "epoch": 7.760816723383568, "grad_norm": 6.691524028778076, "learning_rate": 1.122345498783455e-05, "loss": 1.3718, "step": 399100 }, { "epoch": 7.762761302868254, "grad_norm": 10.764842987060547, "learning_rate": 1.1213722627737226e-05, "loss": 1.4686, "step": 399200 }, { "epoch": 7.764705882352941, "grad_norm": 3.453605890274048, "learning_rate": 1.1203990267639903e-05, "loss": 1.382, "step": 399300 }, { "epoch": 7.766650461837628, "grad_norm": 5.76488733291626, "learning_rate": 1.119425790754258e-05, "loss": 1.5017, "step": 399400 }, { "epoch": 7.768595041322314, "grad_norm": 5.519949913024902, "learning_rate": 1.118462287104623e-05, "loss": 1.4207, "step": 399500 }, { "epoch": 7.770539620807001, "grad_norm": 5.795897960662842, "learning_rate": 1.1174890510948906e-05, "loss": 1.3752, "step": 399600 }, { "epoch": 7.772484200291687, "grad_norm": 4.637702465057373, "learning_rate": 1.1165158150851582e-05, "loss": 1.4829, "step": 399700 }, { "epoch": 7.774428779776374, "grad_norm": 6.814932346343994, "learning_rate": 1.1155425790754257e-05, "loss": 1.3333, "step": 399800 }, { "epoch": 7.7763733592610595, "grad_norm": 3.6366894245147705, "learning_rate": 1.1145693430656936e-05, "loss": 1.3955, "step": 399900 }, { "epoch": 7.778317938745746, "grad_norm": 5.810283184051514, "learning_rate": 1.1135961070559611e-05, "loss": 1.6129, "step": 400000 }, { "epoch": 7.778317938745746, "eval_accuracy": 0.5529777777777778, "eval_f1": 0.5418381248243563, "eval_loss": 1.1831293106079102, "eval_precision": 0.5610163907948137, "eval_recall": 0.5529777777777777, "eval_runtime": 11711.7108, "eval_samples_per_second": 15.369, "eval_steps_per_second": 0.48, "step": 400000 }, { "epoch": 7.780262518230432, "grad_norm": 5.929451942443848, "learning_rate": 1.1126228710462286e-05, "loss": 1.3677, "step": 400100 }, { "epoch": 7.782207097715119, "grad_norm": 6.705881118774414, "learning_rate": 1.1116496350364965e-05, "loss": 1.5248, "step": 400200 }, { "epoch": 7.784151677199805, "grad_norm": 6.739823341369629, "learning_rate": 1.110676399026764e-05, "loss": 1.6393, "step": 400300 }, { "epoch": 7.786096256684492, "grad_norm": 1.6656315326690674, "learning_rate": 1.1097031630170318e-05, "loss": 1.4829, "step": 400400 }, { "epoch": 7.788040836169179, "grad_norm": 8.506837844848633, "learning_rate": 1.1087299270072993e-05, "loss": 1.4712, "step": 400500 }, { "epoch": 7.789985415653865, "grad_norm": 8.978560447692871, "learning_rate": 1.107756690997567e-05, "loss": 1.3361, "step": 400600 }, { "epoch": 7.791929995138552, "grad_norm": 6.344644546508789, "learning_rate": 1.1067834549878347e-05, "loss": 1.4681, "step": 400700 }, { "epoch": 7.793874574623238, "grad_norm": 4.296962261199951, "learning_rate": 1.1058102189781022e-05, "loss": 1.5346, "step": 400800 }, { "epoch": 7.7958191541079245, "grad_norm": 3.440783739089966, "learning_rate": 1.1048369829683698e-05, "loss": 1.4817, "step": 400900 }, { "epoch": 7.79776373359261, "grad_norm": 3.201793909072876, "learning_rate": 1.1038637469586376e-05, "loss": 1.657, "step": 401000 }, { "epoch": 7.799708313077297, "grad_norm": 4.518594264984131, "learning_rate": 1.1028905109489052e-05, "loss": 1.6145, "step": 401100 }, { "epoch": 7.801652892561983, "grad_norm": 3.6199495792388916, "learning_rate": 1.1019172749391727e-05, "loss": 1.3856, "step": 401200 }, { "epoch": 7.80359747204667, "grad_norm": 3.6435048580169678, "learning_rate": 1.1009440389294406e-05, "loss": 1.4749, "step": 401300 }, { "epoch": 7.805542051531356, "grad_norm": 3.288015365600586, "learning_rate": 1.0999708029197081e-05, "loss": 1.6176, "step": 401400 }, { "epoch": 7.807486631016043, "grad_norm": 4.373770236968994, "learning_rate": 1.0989975669099756e-05, "loss": 1.4651, "step": 401500 }, { "epoch": 7.809431210500729, "grad_norm": 6.883041858673096, "learning_rate": 1.0980243309002433e-05, "loss": 1.4202, "step": 401600 }, { "epoch": 7.811375789985416, "grad_norm": 5.34215784072876, "learning_rate": 1.097051094890511e-05, "loss": 1.4423, "step": 401700 }, { "epoch": 7.813320369470102, "grad_norm": 7.8216352462768555, "learning_rate": 1.0960778588807787e-05, "loss": 1.5679, "step": 401800 }, { "epoch": 7.8152649489547885, "grad_norm": 4.898365497589111, "learning_rate": 1.0951046228710463e-05, "loss": 1.4496, "step": 401900 }, { "epoch": 7.817209528439475, "grad_norm": 2.97902512550354, "learning_rate": 1.0941313868613138e-05, "loss": 1.4684, "step": 402000 }, { "epoch": 7.819154107924161, "grad_norm": 3.304945230484009, "learning_rate": 1.0931581508515817e-05, "loss": 1.6243, "step": 402100 }, { "epoch": 7.821098687408848, "grad_norm": 17.70952033996582, "learning_rate": 1.0921849148418492e-05, "loss": 1.5463, "step": 402200 }, { "epoch": 7.823043266893534, "grad_norm": 2.77830171585083, "learning_rate": 1.0912116788321167e-05, "loss": 1.5445, "step": 402300 }, { "epoch": 7.824987846378221, "grad_norm": 5.22880744934082, "learning_rate": 1.0902384428223846e-05, "loss": 1.5632, "step": 402400 }, { "epoch": 7.826932425862907, "grad_norm": 2.70027494430542, "learning_rate": 1.0892652068126521e-05, "loss": 1.6942, "step": 402500 }, { "epoch": 7.828877005347594, "grad_norm": 4.3437981605529785, "learning_rate": 1.0882919708029197e-05, "loss": 1.4041, "step": 402600 }, { "epoch": 7.83082158483228, "grad_norm": 5.51432466506958, "learning_rate": 1.0873187347931874e-05, "loss": 1.4101, "step": 402700 }, { "epoch": 7.832766164316967, "grad_norm": 13.698514938354492, "learning_rate": 1.086345498783455e-05, "loss": 1.5132, "step": 402800 }, { "epoch": 7.8347107438016526, "grad_norm": 3.5071969032287598, "learning_rate": 1.0853722627737228e-05, "loss": 1.473, "step": 402900 }, { "epoch": 7.836655323286339, "grad_norm": 4.353166103363037, "learning_rate": 1.0843990267639903e-05, "loss": 1.464, "step": 403000 }, { "epoch": 7.838599902771026, "grad_norm": 20.92913818359375, "learning_rate": 1.0834257907542578e-05, "loss": 1.7181, "step": 403100 }, { "epoch": 7.840544482255712, "grad_norm": 8.275464057922363, "learning_rate": 1.0824525547445257e-05, "loss": 1.4781, "step": 403200 }, { "epoch": 7.842489061740398, "grad_norm": 8.232032775878906, "learning_rate": 1.0814793187347932e-05, "loss": 1.4558, "step": 403300 }, { "epoch": 7.844433641225085, "grad_norm": 9.088436126708984, "learning_rate": 1.0805060827250608e-05, "loss": 1.4799, "step": 403400 }, { "epoch": 7.846378220709772, "grad_norm": 3.4052720069885254, "learning_rate": 1.0795328467153285e-05, "loss": 1.3156, "step": 403500 }, { "epoch": 7.848322800194458, "grad_norm": 7.553286075592041, "learning_rate": 1.0785693430656934e-05, "loss": 1.4625, "step": 403600 }, { "epoch": 7.850267379679145, "grad_norm": 2.4946393966674805, "learning_rate": 1.0775961070559611e-05, "loss": 1.383, "step": 403700 }, { "epoch": 7.852211959163831, "grad_norm": 7.1149396896362305, "learning_rate": 1.0766228710462288e-05, "loss": 1.4788, "step": 403800 }, { "epoch": 7.8541565386485175, "grad_norm": 4.711982250213623, "learning_rate": 1.0756496350364964e-05, "loss": 1.4902, "step": 403900 }, { "epoch": 7.856101118133203, "grad_norm": 3.796036720275879, "learning_rate": 1.074676399026764e-05, "loss": 1.275, "step": 404000 }, { "epoch": 7.85804569761789, "grad_norm": 3.6764206886291504, "learning_rate": 1.0737031630170318e-05, "loss": 1.4688, "step": 404100 }, { "epoch": 7.859990277102576, "grad_norm": 4.800363063812256, "learning_rate": 1.0727299270072993e-05, "loss": 1.5481, "step": 404200 }, { "epoch": 7.861934856587263, "grad_norm": 3.316906213760376, "learning_rate": 1.071756690997567e-05, "loss": 1.5756, "step": 404300 }, { "epoch": 7.863879436071949, "grad_norm": 2.466282367706299, "learning_rate": 1.0707834549878345e-05, "loss": 1.4807, "step": 404400 }, { "epoch": 7.865824015556636, "grad_norm": 4.915641784667969, "learning_rate": 1.0698102189781022e-05, "loss": 1.3995, "step": 404500 }, { "epoch": 7.867768595041323, "grad_norm": 7.46768045425415, "learning_rate": 1.06883698296837e-05, "loss": 1.4032, "step": 404600 }, { "epoch": 7.869713174526009, "grad_norm": 6.393059730529785, "learning_rate": 1.0678637469586375e-05, "loss": 1.5047, "step": 404700 }, { "epoch": 7.871657754010696, "grad_norm": 3.3265342712402344, "learning_rate": 1.0668905109489052e-05, "loss": 1.4691, "step": 404800 }, { "epoch": 7.8736023334953815, "grad_norm": 3.5089058876037598, "learning_rate": 1.0659172749391729e-05, "loss": 1.3997, "step": 404900 }, { "epoch": 7.875546912980068, "grad_norm": 5.677258491516113, "learning_rate": 1.0649440389294404e-05, "loss": 1.4331, "step": 405000 }, { "epoch": 7.875546912980068, "eval_accuracy": 0.5626333333333333, "eval_f1": 0.5487998210127204, "eval_loss": 1.1715468168258667, "eval_precision": 0.5644956625981702, "eval_recall": 0.5626333333333333, "eval_runtime": 11700.7836, "eval_samples_per_second": 15.384, "eval_steps_per_second": 0.481, "step": 405000 }, { "epoch": 7.877491492464754, "grad_norm": 6.411734580993652, "learning_rate": 1.0639708029197081e-05, "loss": 1.3993, "step": 405100 }, { "epoch": 7.879436071949441, "grad_norm": 4.674322128295898, "learning_rate": 1.0629975669099756e-05, "loss": 1.301, "step": 405200 }, { "epoch": 7.881380651434127, "grad_norm": 5.7349724769592285, "learning_rate": 1.0620243309002433e-05, "loss": 1.3802, "step": 405300 }, { "epoch": 7.883325230918814, "grad_norm": 9.82263469696045, "learning_rate": 1.061051094890511e-05, "loss": 1.51, "step": 405400 }, { "epoch": 7.8852698104035, "grad_norm": 3.42452073097229, "learning_rate": 1.0600778588807786e-05, "loss": 1.5196, "step": 405500 }, { "epoch": 7.887214389888187, "grad_norm": 2.8295469284057617, "learning_rate": 1.0591143552311437e-05, "loss": 1.3403, "step": 405600 }, { "epoch": 7.889158969372873, "grad_norm": 4.274758815765381, "learning_rate": 1.0581411192214112e-05, "loss": 1.4751, "step": 405700 }, { "epoch": 7.89110354885756, "grad_norm": 5.457281112670898, "learning_rate": 1.0571678832116789e-05, "loss": 1.4338, "step": 405800 }, { "epoch": 7.893048128342246, "grad_norm": 6.5806498527526855, "learning_rate": 1.0561946472019466e-05, "loss": 1.4783, "step": 405900 }, { "epoch": 7.894992707826932, "grad_norm": 5.463772296905518, "learning_rate": 1.0552214111922141e-05, "loss": 1.501, "step": 406000 }, { "epoch": 7.896937287311619, "grad_norm": 28.45591163635254, "learning_rate": 1.0542481751824817e-05, "loss": 1.4148, "step": 406100 }, { "epoch": 7.898881866796305, "grad_norm": 7.726846694946289, "learning_rate": 1.0532749391727496e-05, "loss": 1.4235, "step": 406200 }, { "epoch": 7.900826446280992, "grad_norm": 6.0453996658325195, "learning_rate": 1.052301703163017e-05, "loss": 1.5215, "step": 406300 }, { "epoch": 7.902771025765678, "grad_norm": 5.514199256896973, "learning_rate": 1.0513284671532848e-05, "loss": 1.5994, "step": 406400 }, { "epoch": 7.904715605250365, "grad_norm": 3.484220266342163, "learning_rate": 1.0503552311435523e-05, "loss": 1.3722, "step": 406500 }, { "epoch": 7.906660184735051, "grad_norm": 3.6981379985809326, "learning_rate": 1.04938199513382e-05, "loss": 1.6306, "step": 406600 }, { "epoch": 7.908604764219738, "grad_norm": 9.035853385925293, "learning_rate": 1.0484087591240877e-05, "loss": 1.4691, "step": 406700 }, { "epoch": 7.910549343704424, "grad_norm": 4.670960426330566, "learning_rate": 1.0474355231143553e-05, "loss": 1.445, "step": 406800 }, { "epoch": 7.9124939231891105, "grad_norm": 5.308229446411133, "learning_rate": 1.046462287104623e-05, "loss": 1.3699, "step": 406900 }, { "epoch": 7.9144385026737964, "grad_norm": 4.243105411529541, "learning_rate": 1.0454890510948907e-05, "loss": 1.3547, "step": 407000 }, { "epoch": 7.916383082158483, "grad_norm": 2.1843340396881104, "learning_rate": 1.0445158150851582e-05, "loss": 1.3685, "step": 407100 }, { "epoch": 7.91832766164317, "grad_norm": 2.6157386302948, "learning_rate": 1.0435425790754257e-05, "loss": 1.4643, "step": 407200 }, { "epoch": 7.920272241127856, "grad_norm": 4.057283401489258, "learning_rate": 1.0425693430656936e-05, "loss": 1.2838, "step": 407300 }, { "epoch": 7.922216820612542, "grad_norm": 3.7951064109802246, "learning_rate": 1.0415961070559611e-05, "loss": 1.3946, "step": 407400 }, { "epoch": 7.924161400097229, "grad_norm": 7.300729274749756, "learning_rate": 1.0406228710462287e-05, "loss": 1.5808, "step": 407500 }, { "epoch": 7.926105979581916, "grad_norm": 4.55383825302124, "learning_rate": 1.0396496350364964e-05, "loss": 1.4774, "step": 407600 }, { "epoch": 7.928050559066602, "grad_norm": 3.5816397666931152, "learning_rate": 1.0386861313868613e-05, "loss": 1.5473, "step": 407700 }, { "epoch": 7.929995138551289, "grad_norm": 2.5474565029144287, "learning_rate": 1.037712895377129e-05, "loss": 1.5517, "step": 407800 }, { "epoch": 7.9319397180359745, "grad_norm": 3.3825879096984863, "learning_rate": 1.0367396593673967e-05, "loss": 1.3614, "step": 407900 }, { "epoch": 7.933884297520661, "grad_norm": 12.051200866699219, "learning_rate": 1.0357664233576642e-05, "loss": 1.5088, "step": 408000 }, { "epoch": 7.935828877005347, "grad_norm": 6.482747554779053, "learning_rate": 1.034793187347932e-05, "loss": 1.674, "step": 408100 }, { "epoch": 7.937773456490034, "grad_norm": 4.043416500091553, "learning_rate": 1.0338199513381995e-05, "loss": 1.3514, "step": 408200 }, { "epoch": 7.93971803597472, "grad_norm": 4.841653823852539, "learning_rate": 1.0328467153284672e-05, "loss": 1.4318, "step": 408300 }, { "epoch": 7.941662615459407, "grad_norm": 6.822185516357422, "learning_rate": 1.0318734793187349e-05, "loss": 1.3595, "step": 408400 }, { "epoch": 7.943607194944093, "grad_norm": 7.578801155090332, "learning_rate": 1.0309002433090024e-05, "loss": 1.4745, "step": 408500 }, { "epoch": 7.94555177442878, "grad_norm": 4.892796039581299, "learning_rate": 1.0299270072992701e-05, "loss": 1.4989, "step": 408600 }, { "epoch": 7.947496353913467, "grad_norm": 4.0610504150390625, "learning_rate": 1.0289537712895378e-05, "loss": 1.3557, "step": 408700 }, { "epoch": 7.949440933398153, "grad_norm": 3.592154026031494, "learning_rate": 1.0279805352798053e-05, "loss": 1.4634, "step": 408800 }, { "epoch": 7.9513855128828395, "grad_norm": 3.7747302055358887, "learning_rate": 1.027007299270073e-05, "loss": 1.5135, "step": 408900 }, { "epoch": 7.953330092367525, "grad_norm": 7.084679126739502, "learning_rate": 1.026043795620438e-05, "loss": 1.5194, "step": 409000 }, { "epoch": 7.955274671852212, "grad_norm": 5.799705505371094, "learning_rate": 1.0250705596107057e-05, "loss": 1.4086, "step": 409100 }, { "epoch": 7.957219251336898, "grad_norm": 3.084853172302246, "learning_rate": 1.0240973236009734e-05, "loss": 1.4387, "step": 409200 }, { "epoch": 7.959163830821585, "grad_norm": 2.726733446121216, "learning_rate": 1.023124087591241e-05, "loss": 1.4907, "step": 409300 }, { "epoch": 7.961108410306271, "grad_norm": 10.177786827087402, "learning_rate": 1.0221508515815086e-05, "loss": 1.3685, "step": 409400 }, { "epoch": 7.963052989790958, "grad_norm": 2.9775404930114746, "learning_rate": 1.0211776155717762e-05, "loss": 1.4253, "step": 409500 }, { "epoch": 7.964997569275644, "grad_norm": 6.120230674743652, "learning_rate": 1.0202043795620439e-05, "loss": 1.8803, "step": 409600 }, { "epoch": 7.966942148760331, "grad_norm": 7.440308570861816, "learning_rate": 1.0192311435523116e-05, "loss": 1.5012, "step": 409700 }, { "epoch": 7.968886728245017, "grad_norm": 4.990752696990967, "learning_rate": 1.0182579075425791e-05, "loss": 1.5307, "step": 409800 }, { "epoch": 7.9708313077297035, "grad_norm": 5.382612705230713, "learning_rate": 1.0172846715328466e-05, "loss": 1.3335, "step": 409900 }, { "epoch": 7.9727758872143895, "grad_norm": 23.478364944458008, "learning_rate": 1.0163114355231145e-05, "loss": 1.5966, "step": 410000 }, { "epoch": 7.9727758872143895, "eval_accuracy": 0.5591944444444444, "eval_f1": 0.5411049609106189, "eval_loss": 1.1824848651885986, "eval_precision": 0.5623070493790744, "eval_recall": 0.5591944444444444, "eval_runtime": 11713.8736, "eval_samples_per_second": 15.366, "eval_steps_per_second": 0.48, "step": 410000 }, { "epoch": 7.974720466699076, "grad_norm": 5.971736907958984, "learning_rate": 1.015338199513382e-05, "loss": 1.5619, "step": 410100 }, { "epoch": 7.976665046183763, "grad_norm": 3.8860833644866943, "learning_rate": 1.0143649635036497e-05, "loss": 1.4245, "step": 410200 }, { "epoch": 7.978609625668449, "grad_norm": 1.6133008003234863, "learning_rate": 1.0133917274939174e-05, "loss": 1.5657, "step": 410300 }, { "epoch": 7.980554205153136, "grad_norm": 2.1746158599853516, "learning_rate": 1.012418491484185e-05, "loss": 1.3804, "step": 410400 }, { "epoch": 7.982498784637822, "grad_norm": 5.716184139251709, "learning_rate": 1.0114452554744527e-05, "loss": 1.5733, "step": 410500 }, { "epoch": 7.984443364122509, "grad_norm": 6.082252502441406, "learning_rate": 1.0104720194647202e-05, "loss": 1.4771, "step": 410600 }, { "epoch": 7.986387943607195, "grad_norm": 1.2574942111968994, "learning_rate": 1.0094987834549879e-05, "loss": 1.4542, "step": 410700 }, { "epoch": 7.988332523091882, "grad_norm": 18.15195655822754, "learning_rate": 1.0085255474452556e-05, "loss": 1.3807, "step": 410800 }, { "epoch": 7.9902771025765675, "grad_norm": 2.651533842086792, "learning_rate": 1.0075523114355231e-05, "loss": 1.3703, "step": 410900 }, { "epoch": 7.992221682061254, "grad_norm": 4.42024564743042, "learning_rate": 1.0065790754257907e-05, "loss": 1.4706, "step": 411000 }, { "epoch": 7.99416626154594, "grad_norm": 5.819663047790527, "learning_rate": 1.0056058394160585e-05, "loss": 1.6057, "step": 411100 }, { "epoch": 7.996110841030627, "grad_norm": 1.6238185167312622, "learning_rate": 1.004632603406326e-05, "loss": 1.3818, "step": 411200 }, { "epoch": 7.998055420515314, "grad_norm": 4.333930492401123, "learning_rate": 1.0036593673965938e-05, "loss": 1.391, "step": 411300 }, { "epoch": 8.0, "grad_norm": 6.71078634262085, "learning_rate": 1.0026861313868613e-05, "loss": 1.4453, "step": 411400 }, { "epoch": 8.001944579484686, "grad_norm": 7.226273536682129, "learning_rate": 1.001712895377129e-05, "loss": 1.4259, "step": 411500 }, { "epoch": 8.003889158969374, "grad_norm": 4.970400333404541, "learning_rate": 1.0007396593673967e-05, "loss": 1.7052, "step": 411600 }, { "epoch": 8.00583373845406, "grad_norm": 5.170510292053223, "learning_rate": 9.997664233576642e-06, "loss": 1.4338, "step": 411700 }, { "epoch": 8.007778317938746, "grad_norm": 5.133332252502441, "learning_rate": 9.98793187347932e-06, "loss": 1.3847, "step": 411800 }, { "epoch": 8.009722897423432, "grad_norm": 2.6906967163085938, "learning_rate": 9.978199513381996e-06, "loss": 1.5063, "step": 411900 }, { "epoch": 8.01166747690812, "grad_norm": 3.6313862800598145, "learning_rate": 9.968467153284672e-06, "loss": 1.4537, "step": 412000 }, { "epoch": 8.013612056392805, "grad_norm": 7.155602931976318, "learning_rate": 9.958734793187347e-06, "loss": 1.5901, "step": 412100 }, { "epoch": 8.015556635877491, "grad_norm": 6.127588272094727, "learning_rate": 9.949002433090026e-06, "loss": 1.5289, "step": 412200 }, { "epoch": 8.017501215362177, "grad_norm": 3.368046283721924, "learning_rate": 9.939270072992701e-06, "loss": 1.5084, "step": 412300 }, { "epoch": 8.019445794846865, "grad_norm": 5.306887149810791, "learning_rate": 9.929537712895376e-06, "loss": 1.3665, "step": 412400 }, { "epoch": 8.02139037433155, "grad_norm": 4.841428756713867, "learning_rate": 9.919805352798053e-06, "loss": 1.3928, "step": 412500 }, { "epoch": 8.023334953816237, "grad_norm": 4.356631755828857, "learning_rate": 9.91007299270073e-06, "loss": 1.489, "step": 412600 }, { "epoch": 8.025279533300923, "grad_norm": 2.836371421813965, "learning_rate": 9.900340632603408e-06, "loss": 1.4053, "step": 412700 }, { "epoch": 8.02722411278561, "grad_norm": 4.911273956298828, "learning_rate": 9.890608272506083e-06, "loss": 1.4679, "step": 412800 }, { "epoch": 8.029168692270297, "grad_norm": 4.292403221130371, "learning_rate": 9.88087591240876e-06, "loss": 1.4668, "step": 412900 }, { "epoch": 8.031113271754982, "grad_norm": 6.408909797668457, "learning_rate": 9.87124087591241e-06, "loss": 1.5031, "step": 413000 }, { "epoch": 8.03305785123967, "grad_norm": 4.037698745727539, "learning_rate": 9.861508515815085e-06, "loss": 1.5127, "step": 413100 }, { "epoch": 8.035002430724356, "grad_norm": 5.132932662963867, "learning_rate": 9.851776155717762e-06, "loss": 1.4026, "step": 413200 }, { "epoch": 8.036947010209042, "grad_norm": 3.5072522163391113, "learning_rate": 9.842141119221411e-06, "loss": 1.5126, "step": 413300 }, { "epoch": 8.038891589693728, "grad_norm": 8.595487594604492, "learning_rate": 9.832408759124088e-06, "loss": 1.6614, "step": 413400 }, { "epoch": 8.040836169178416, "grad_norm": 23.173616409301758, "learning_rate": 9.822676399026765e-06, "loss": 1.649, "step": 413500 }, { "epoch": 8.042780748663102, "grad_norm": 3.6549785137176514, "learning_rate": 9.81294403892944e-06, "loss": 1.4757, "step": 413600 }, { "epoch": 8.044725328147788, "grad_norm": 3.342169761657715, "learning_rate": 9.803211678832117e-06, "loss": 1.4904, "step": 413700 }, { "epoch": 8.046669907632474, "grad_norm": 5.9691033363342285, "learning_rate": 9.793479318734794e-06, "loss": 1.384, "step": 413800 }, { "epoch": 8.048614487117161, "grad_norm": 3.3850934505462646, "learning_rate": 9.78374695863747e-06, "loss": 1.5065, "step": 413900 }, { "epoch": 8.050559066601847, "grad_norm": 5.485231876373291, "learning_rate": 9.774014598540147e-06, "loss": 1.4653, "step": 414000 }, { "epoch": 8.052503646086533, "grad_norm": 13.017699241638184, "learning_rate": 9.764282238442824e-06, "loss": 1.4622, "step": 414100 }, { "epoch": 8.054448225571221, "grad_norm": 3.170003652572632, "learning_rate": 9.754549878345499e-06, "loss": 1.5958, "step": 414200 }, { "epoch": 8.056392805055907, "grad_norm": 4.46819543838501, "learning_rate": 9.744817518248176e-06, "loss": 1.4948, "step": 414300 }, { "epoch": 8.058337384540593, "grad_norm": 3.7590723037719727, "learning_rate": 9.735085158150851e-06, "loss": 1.419, "step": 414400 }, { "epoch": 8.060281964025279, "grad_norm": 8.267558097839355, "learning_rate": 9.725352798053528e-06, "loss": 1.6356, "step": 414500 }, { "epoch": 8.062226543509967, "grad_norm": 5.999807834625244, "learning_rate": 9.715620437956205e-06, "loss": 1.4991, "step": 414600 }, { "epoch": 8.064171122994653, "grad_norm": 5.934500694274902, "learning_rate": 9.70588807785888e-06, "loss": 1.5191, "step": 414700 }, { "epoch": 8.066115702479339, "grad_norm": 10.229401588439941, "learning_rate": 9.696155717761558e-06, "loss": 1.3894, "step": 414800 }, { "epoch": 8.068060281964025, "grad_norm": 2.615251064300537, "learning_rate": 9.686423357664235e-06, "loss": 1.4456, "step": 414900 }, { "epoch": 8.070004861448712, "grad_norm": 10.827305793762207, "learning_rate": 9.67669099756691e-06, "loss": 1.3413, "step": 415000 }, { "epoch": 8.070004861448712, "eval_accuracy": 0.55855, "eval_f1": 0.5486227080675073, "eval_loss": 1.1704530715942383, "eval_precision": 0.5687114028370817, "eval_recall": 0.55855, "eval_runtime": 11708.8388, "eval_samples_per_second": 15.373, "eval_steps_per_second": 0.48, "step": 415000 }, { "epoch": 8.071949440933398, "grad_norm": 6.06229305267334, "learning_rate": 9.666958637469587e-06, "loss": 1.3664, "step": 415100 }, { "epoch": 8.073894020418084, "grad_norm": 2.0550146102905273, "learning_rate": 9.657226277372264e-06, "loss": 1.4067, "step": 415200 }, { "epoch": 8.07583859990277, "grad_norm": 4.701744079589844, "learning_rate": 9.64749391727494e-06, "loss": 1.3713, "step": 415300 }, { "epoch": 8.077783179387458, "grad_norm": 7.075859069824219, "learning_rate": 9.637761557177617e-06, "loss": 1.4358, "step": 415400 }, { "epoch": 8.079727758872144, "grad_norm": 2.6451194286346436, "learning_rate": 9.628029197080292e-06, "loss": 1.4069, "step": 415500 }, { "epoch": 8.08167233835683, "grad_norm": 5.255343914031982, "learning_rate": 9.618296836982969e-06, "loss": 1.6226, "step": 415600 }, { "epoch": 8.083616917841518, "grad_norm": 5.258208751678467, "learning_rate": 9.608564476885646e-06, "loss": 1.5174, "step": 415700 }, { "epoch": 8.085561497326204, "grad_norm": 4.472553730010986, "learning_rate": 9.598832116788321e-06, "loss": 1.5251, "step": 415800 }, { "epoch": 8.08750607681089, "grad_norm": 3.210767984390259, "learning_rate": 9.589099756690997e-06, "loss": 1.5212, "step": 415900 }, { "epoch": 8.089450656295575, "grad_norm": 9.973603248596191, "learning_rate": 9.579367396593675e-06, "loss": 1.4734, "step": 416000 }, { "epoch": 8.091395235780263, "grad_norm": 3.0247139930725098, "learning_rate": 9.56963503649635e-06, "loss": 1.4309, "step": 416100 }, { "epoch": 8.09333981526495, "grad_norm": 4.900567531585693, "learning_rate": 9.559902676399028e-06, "loss": 1.4386, "step": 416200 }, { "epoch": 8.095284394749635, "grad_norm": 8.18513298034668, "learning_rate": 9.550170316301705e-06, "loss": 1.4521, "step": 416300 }, { "epoch": 8.097228974234321, "grad_norm": 6.38640832901001, "learning_rate": 9.54043795620438e-06, "loss": 1.4367, "step": 416400 }, { "epoch": 8.099173553719009, "grad_norm": 8.821578979492188, "learning_rate": 9.530705596107057e-06, "loss": 1.4649, "step": 416500 }, { "epoch": 8.101118133203695, "grad_norm": 6.641738414764404, "learning_rate": 9.520973236009732e-06, "loss": 1.549, "step": 416600 }, { "epoch": 8.10306271268838, "grad_norm": 5.900403022766113, "learning_rate": 9.51124087591241e-06, "loss": 1.4702, "step": 416700 }, { "epoch": 8.105007292173067, "grad_norm": 3.7385175228118896, "learning_rate": 9.501508515815086e-06, "loss": 1.3188, "step": 416800 }, { "epoch": 8.106951871657754, "grad_norm": 6.763862133026123, "learning_rate": 9.491776155717762e-06, "loss": 1.3736, "step": 416900 }, { "epoch": 8.10889645114244, "grad_norm": 3.205429792404175, "learning_rate": 9.482043795620437e-06, "loss": 1.4121, "step": 417000 }, { "epoch": 8.110841030627126, "grad_norm": 5.61266565322876, "learning_rate": 9.472311435523116e-06, "loss": 1.7632, "step": 417100 }, { "epoch": 8.112785610111814, "grad_norm": 4.549778938293457, "learning_rate": 9.462579075425791e-06, "loss": 1.4769, "step": 417200 }, { "epoch": 8.1147301895965, "grad_norm": 2.6924262046813965, "learning_rate": 9.452846715328468e-06, "loss": 1.4899, "step": 417300 }, { "epoch": 8.116674769081186, "grad_norm": 2.354316473007202, "learning_rate": 9.443211678832117e-06, "loss": 1.5281, "step": 417400 }, { "epoch": 8.118619348565872, "grad_norm": 4.044381141662598, "learning_rate": 9.433576642335767e-06, "loss": 1.3887, "step": 417500 }, { "epoch": 8.12056392805056, "grad_norm": 6.1016316413879395, "learning_rate": 9.423844282238444e-06, "loss": 1.4049, "step": 417600 }, { "epoch": 8.122508507535246, "grad_norm": 2.249946355819702, "learning_rate": 9.414111922141119e-06, "loss": 1.4527, "step": 417700 }, { "epoch": 8.124453087019932, "grad_norm": 4.067269325256348, "learning_rate": 9.404379562043796e-06, "loss": 1.3192, "step": 417800 }, { "epoch": 8.126397666504618, "grad_norm": 5.115975856781006, "learning_rate": 9.394647201946473e-06, "loss": 1.4528, "step": 417900 }, { "epoch": 8.128342245989305, "grad_norm": 4.240895748138428, "learning_rate": 9.384914841849148e-06, "loss": 1.8095, "step": 418000 }, { "epoch": 8.130286825473991, "grad_norm": 4.226661205291748, "learning_rate": 9.375182481751826e-06, "loss": 1.5291, "step": 418100 }, { "epoch": 8.132231404958677, "grad_norm": 2.7619547843933105, "learning_rate": 9.3654501216545e-06, "loss": 1.3748, "step": 418200 }, { "epoch": 8.134175984443365, "grad_norm": 4.334850311279297, "learning_rate": 9.355717761557178e-06, "loss": 1.6575, "step": 418300 }, { "epoch": 8.136120563928051, "grad_norm": 3.76060152053833, "learning_rate": 9.345985401459855e-06, "loss": 1.6519, "step": 418400 }, { "epoch": 8.138065143412737, "grad_norm": 2.987659454345703, "learning_rate": 9.33625304136253e-06, "loss": 1.4959, "step": 418500 }, { "epoch": 8.140009722897423, "grad_norm": 4.5599870681762695, "learning_rate": 9.326520681265207e-06, "loss": 1.3578, "step": 418600 }, { "epoch": 8.14195430238211, "grad_norm": 4.580192565917969, "learning_rate": 9.316788321167884e-06, "loss": 1.529, "step": 418700 }, { "epoch": 8.143898881866797, "grad_norm": 3.512608051300049, "learning_rate": 9.30705596107056e-06, "loss": 1.5125, "step": 418800 }, { "epoch": 8.145843461351483, "grad_norm": 10.645488739013672, "learning_rate": 9.297323600973237e-06, "loss": 1.4494, "step": 418900 }, { "epoch": 8.147788040836168, "grad_norm": 5.105146884918213, "learning_rate": 9.287591240875914e-06, "loss": 1.6029, "step": 419000 }, { "epoch": 8.149732620320856, "grad_norm": 6.903298377990723, "learning_rate": 9.277858880778589e-06, "loss": 1.4745, "step": 419100 }, { "epoch": 8.151677199805542, "grad_norm": 7.537288188934326, "learning_rate": 9.268126520681266e-06, "loss": 1.4815, "step": 419200 }, { "epoch": 8.153621779290228, "grad_norm": 16.006486892700195, "learning_rate": 9.258394160583941e-06, "loss": 1.495, "step": 419300 }, { "epoch": 8.155566358774914, "grad_norm": 3.4109480381011963, "learning_rate": 9.248661800486618e-06, "loss": 1.6261, "step": 419400 }, { "epoch": 8.157510938259602, "grad_norm": 2.2273545265197754, "learning_rate": 9.238929440389295e-06, "loss": 1.5284, "step": 419500 }, { "epoch": 8.159455517744288, "grad_norm": 3.1637468338012695, "learning_rate": 9.22919708029197e-06, "loss": 1.4936, "step": 419600 }, { "epoch": 8.161400097228974, "grad_norm": 8.263578414916992, "learning_rate": 9.219464720194648e-06, "loss": 1.4567, "step": 419700 }, { "epoch": 8.163344676713661, "grad_norm": 5.475799560546875, "learning_rate": 9.209732360097325e-06, "loss": 1.5349, "step": 419800 }, { "epoch": 8.165289256198347, "grad_norm": 2.4643006324768066, "learning_rate": 9.2e-06, "loss": 1.5353, "step": 419900 }, { "epoch": 8.167233835683033, "grad_norm": 2.0449953079223633, "learning_rate": 9.190267639902677e-06, "loss": 1.3785, "step": 420000 }, { "epoch": 8.167233835683033, "eval_accuracy": 0.5691722222222222, "eval_f1": 0.5568496830845869, "eval_loss": 1.1575814485549927, "eval_precision": 0.5656059509558639, "eval_recall": 0.5691722222222223, "eval_runtime": 11720.4257, "eval_samples_per_second": 15.358, "eval_steps_per_second": 0.48, "step": 420000 }, { "epoch": 8.16917841516772, "grad_norm": 7.660744667053223, "learning_rate": 9.180535279805354e-06, "loss": 1.4293, "step": 420100 }, { "epoch": 8.171122994652407, "grad_norm": 12.509638786315918, "learning_rate": 9.17080291970803e-06, "loss": 1.5404, "step": 420200 }, { "epoch": 8.173067574137093, "grad_norm": 4.7143096923828125, "learning_rate": 9.161070559610706e-06, "loss": 1.5983, "step": 420300 }, { "epoch": 8.175012153621779, "grad_norm": 3.623365879058838, "learning_rate": 9.151338199513382e-06, "loss": 1.6139, "step": 420400 }, { "epoch": 8.176956733106465, "grad_norm": 5.852679252624512, "learning_rate": 9.141605839416059e-06, "loss": 1.5503, "step": 420500 }, { "epoch": 8.178901312591153, "grad_norm": 7.784555912017822, "learning_rate": 9.131873479318736e-06, "loss": 1.3941, "step": 420600 }, { "epoch": 8.180845892075839, "grad_norm": 5.618833065032959, "learning_rate": 9.122141119221411e-06, "loss": 1.3821, "step": 420700 }, { "epoch": 8.182790471560525, "grad_norm": 2.6127943992614746, "learning_rate": 9.112408759124088e-06, "loss": 1.3711, "step": 420800 }, { "epoch": 8.184735051045212, "grad_norm": 6.428350448608398, "learning_rate": 9.102676399026765e-06, "loss": 1.4051, "step": 420900 }, { "epoch": 8.186679630529898, "grad_norm": 4.1413984298706055, "learning_rate": 9.09294403892944e-06, "loss": 1.5499, "step": 421000 }, { "epoch": 8.188624210014584, "grad_norm": 9.17380428314209, "learning_rate": 9.083211678832117e-06, "loss": 1.5111, "step": 421100 }, { "epoch": 8.19056878949927, "grad_norm": 4.696709632873535, "learning_rate": 9.073479318734794e-06, "loss": 1.6454, "step": 421200 }, { "epoch": 8.192513368983958, "grad_norm": 13.54525375366211, "learning_rate": 9.06374695863747e-06, "loss": 1.5976, "step": 421300 }, { "epoch": 8.194457948468644, "grad_norm": 6.499248027801514, "learning_rate": 9.054014598540147e-06, "loss": 1.3683, "step": 421400 }, { "epoch": 8.19640252795333, "grad_norm": 5.059573173522949, "learning_rate": 9.044379562043796e-06, "loss": 1.5793, "step": 421500 }, { "epoch": 8.198347107438016, "grad_norm": 3.860800266265869, "learning_rate": 9.034647201946472e-06, "loss": 1.4409, "step": 421600 }, { "epoch": 8.200291686922704, "grad_norm": 3.833756923675537, "learning_rate": 9.024914841849149e-06, "loss": 1.3832, "step": 421700 }, { "epoch": 8.20223626640739, "grad_norm": 4.347011566162109, "learning_rate": 9.015182481751826e-06, "loss": 1.6108, "step": 421800 }, { "epoch": 8.204180845892076, "grad_norm": 4.179378986358643, "learning_rate": 9.005450121654503e-06, "loss": 1.6694, "step": 421900 }, { "epoch": 8.206125425376761, "grad_norm": 5.578348159790039, "learning_rate": 8.995717761557178e-06, "loss": 1.5672, "step": 422000 }, { "epoch": 8.20807000486145, "grad_norm": 9.577430725097656, "learning_rate": 8.985985401459853e-06, "loss": 1.5501, "step": 422100 }, { "epoch": 8.210014584346135, "grad_norm": 5.324457168579102, "learning_rate": 8.976253041362532e-06, "loss": 1.5072, "step": 422200 }, { "epoch": 8.211959163830821, "grad_norm": 5.150614261627197, "learning_rate": 8.966520681265207e-06, "loss": 1.4388, "step": 422300 }, { "epoch": 8.213903743315509, "grad_norm": 3.8367409706115723, "learning_rate": 8.956788321167883e-06, "loss": 1.3875, "step": 422400 }, { "epoch": 8.215848322800195, "grad_norm": 4.910453796386719, "learning_rate": 8.947055961070561e-06, "loss": 1.5724, "step": 422500 }, { "epoch": 8.21779290228488, "grad_norm": 4.0322265625, "learning_rate": 8.937323600973237e-06, "loss": 1.5294, "step": 422600 }, { "epoch": 8.219737481769567, "grad_norm": 11.548255920410156, "learning_rate": 8.927591240875912e-06, "loss": 1.4986, "step": 422700 }, { "epoch": 8.221682061254254, "grad_norm": 3.4125823974609375, "learning_rate": 8.917858880778589e-06, "loss": 1.4886, "step": 422800 }, { "epoch": 8.22362664073894, "grad_norm": 2.023475170135498, "learning_rate": 8.908126520681266e-06, "loss": 1.3808, "step": 422900 }, { "epoch": 8.225571220223626, "grad_norm": 14.525912284851074, "learning_rate": 8.898394160583941e-06, "loss": 1.5276, "step": 423000 }, { "epoch": 8.227515799708312, "grad_norm": 5.511640548706055, "learning_rate": 8.888661800486618e-06, "loss": 1.3723, "step": 423100 }, { "epoch": 8.229460379193, "grad_norm": 6.588279724121094, "learning_rate": 8.878929440389294e-06, "loss": 1.4585, "step": 423200 }, { "epoch": 8.231404958677686, "grad_norm": 5.4348626136779785, "learning_rate": 8.869197080291972e-06, "loss": 1.5422, "step": 423300 }, { "epoch": 8.233349538162372, "grad_norm": 3.2786788940429688, "learning_rate": 8.859464720194648e-06, "loss": 1.447, "step": 423400 }, { "epoch": 8.235294117647058, "grad_norm": 2.251085042953491, "learning_rate": 8.849732360097323e-06, "loss": 1.3564, "step": 423500 }, { "epoch": 8.237238697131746, "grad_norm": 4.937480449676514, "learning_rate": 8.840097323600974e-06, "loss": 1.4106, "step": 423600 }, { "epoch": 8.239183276616432, "grad_norm": 3.109715223312378, "learning_rate": 8.83036496350365e-06, "loss": 1.3631, "step": 423700 }, { "epoch": 8.241127856101118, "grad_norm": 3.98667573928833, "learning_rate": 8.820632603406326e-06, "loss": 1.4275, "step": 423800 }, { "epoch": 8.243072435585805, "grad_norm": 6.741735935211182, "learning_rate": 8.810900243309003e-06, "loss": 1.484, "step": 423900 }, { "epoch": 8.245017015070491, "grad_norm": 13.706993103027344, "learning_rate": 8.801167883211679e-06, "loss": 1.3743, "step": 424000 }, { "epoch": 8.246961594555177, "grad_norm": 5.421291351318359, "learning_rate": 8.791435523114356e-06, "loss": 1.4502, "step": 424100 }, { "epoch": 8.248906174039863, "grad_norm": 4.465517520904541, "learning_rate": 8.781703163017033e-06, "loss": 1.5678, "step": 424200 }, { "epoch": 8.250850753524551, "grad_norm": 5.214616298675537, "learning_rate": 8.771970802919708e-06, "loss": 1.477, "step": 424300 }, { "epoch": 8.252795333009237, "grad_norm": 2.000014066696167, "learning_rate": 8.762238442822385e-06, "loss": 1.8997, "step": 424400 }, { "epoch": 8.254739912493923, "grad_norm": 7.900456428527832, "learning_rate": 8.75250608272506e-06, "loss": 1.3621, "step": 424500 }, { "epoch": 8.256684491978609, "grad_norm": 2.9020698070526123, "learning_rate": 8.742773722627738e-06, "loss": 1.3235, "step": 424600 }, { "epoch": 8.258629071463297, "grad_norm": 1.9443341493606567, "learning_rate": 8.733041362530415e-06, "loss": 1.4317, "step": 424700 }, { "epoch": 8.260573650947983, "grad_norm": 4.23136568069458, "learning_rate": 8.72330900243309e-06, "loss": 1.3876, "step": 424800 }, { "epoch": 8.262518230432669, "grad_norm": 2.9302334785461426, "learning_rate": 8.713576642335767e-06, "loss": 1.4962, "step": 424900 }, { "epoch": 8.264462809917354, "grad_norm": 4.400271892547607, "learning_rate": 8.703844282238444e-06, "loss": 1.5491, "step": 425000 }, { "epoch": 8.264462809917354, "eval_accuracy": 0.5665277777777777, "eval_f1": 0.5514801488039874, "eval_loss": 1.1627037525177002, "eval_precision": 0.5671300215630944, "eval_recall": 0.5665277777777777, "eval_runtime": 11715.054, "eval_samples_per_second": 15.365, "eval_steps_per_second": 0.48, "step": 425000 }, { "epoch": 8.266407389402042, "grad_norm": 9.713541984558105, "learning_rate": 8.69411192214112e-06, "loss": 1.4839, "step": 425100 }, { "epoch": 8.268351968886728, "grad_norm": 4.721035003662109, "learning_rate": 8.684379562043796e-06, "loss": 1.4919, "step": 425200 }, { "epoch": 8.270296548371414, "grad_norm": 4.551899433135986, "learning_rate": 8.674647201946473e-06, "loss": 1.5107, "step": 425300 }, { "epoch": 8.272241127856102, "grad_norm": 3.3783130645751953, "learning_rate": 8.664914841849149e-06, "loss": 1.5838, "step": 425400 }, { "epoch": 8.274185707340788, "grad_norm": 1.8538587093353271, "learning_rate": 8.655182481751826e-06, "loss": 1.4268, "step": 425500 }, { "epoch": 8.276130286825474, "grad_norm": 7.332644462585449, "learning_rate": 8.645450121654501e-06, "loss": 1.4927, "step": 425600 }, { "epoch": 8.27807486631016, "grad_norm": 8.281588554382324, "learning_rate": 8.635815085158152e-06, "loss": 1.3447, "step": 425700 }, { "epoch": 8.280019445794847, "grad_norm": 10.077601432800293, "learning_rate": 8.626082725060827e-06, "loss": 1.3733, "step": 425800 }, { "epoch": 8.281964025279533, "grad_norm": 2.8268215656280518, "learning_rate": 8.616350364963504e-06, "loss": 1.4554, "step": 425900 }, { "epoch": 8.28390860476422, "grad_norm": 20.503530502319336, "learning_rate": 8.606618004866181e-06, "loss": 1.5934, "step": 426000 }, { "epoch": 8.285853184248905, "grad_norm": 2.75309681892395, "learning_rate": 8.596885644768857e-06, "loss": 1.5435, "step": 426100 }, { "epoch": 8.287797763733593, "grad_norm": 3.3340306282043457, "learning_rate": 8.587153284671532e-06, "loss": 1.4266, "step": 426200 }, { "epoch": 8.289742343218279, "grad_norm": 8.534436225891113, "learning_rate": 8.57742092457421e-06, "loss": 1.3492, "step": 426300 }, { "epoch": 8.291686922702965, "grad_norm": 3.754237174987793, "learning_rate": 8.567688564476886e-06, "loss": 1.5252, "step": 426400 }, { "epoch": 8.293631502187653, "grad_norm": 19.086172103881836, "learning_rate": 8.557956204379561e-06, "loss": 1.4919, "step": 426500 }, { "epoch": 8.295576081672339, "grad_norm": 4.135617733001709, "learning_rate": 8.548223844282238e-06, "loss": 1.4901, "step": 426600 }, { "epoch": 8.297520661157025, "grad_norm": 4.768415451049805, "learning_rate": 8.538491484184915e-06, "loss": 1.2744, "step": 426700 }, { "epoch": 8.29946524064171, "grad_norm": 1.9658101797103882, "learning_rate": 8.528759124087592e-06, "loss": 1.5636, "step": 426800 }, { "epoch": 8.301409820126398, "grad_norm": 2.35528302192688, "learning_rate": 8.519026763990268e-06, "loss": 1.5965, "step": 426900 }, { "epoch": 8.303354399611084, "grad_norm": 2.655848503112793, "learning_rate": 8.509294403892945e-06, "loss": 1.3056, "step": 427000 }, { "epoch": 8.30529897909577, "grad_norm": 5.045546054840088, "learning_rate": 8.499562043795622e-06, "loss": 1.4668, "step": 427100 }, { "epoch": 8.307243558580456, "grad_norm": 5.5762553215026855, "learning_rate": 8.489829683698297e-06, "loss": 1.3594, "step": 427200 }, { "epoch": 8.309188138065144, "grad_norm": 3.5759100914001465, "learning_rate": 8.480097323600972e-06, "loss": 1.5638, "step": 427300 }, { "epoch": 8.31113271754983, "grad_norm": 18.036643981933594, "learning_rate": 8.470364963503651e-06, "loss": 1.3221, "step": 427400 }, { "epoch": 8.313077297034516, "grad_norm": 5.510782241821289, "learning_rate": 8.460632603406327e-06, "loss": 1.635, "step": 427500 }, { "epoch": 8.315021876519202, "grad_norm": 2.701611042022705, "learning_rate": 8.450900243309002e-06, "loss": 1.4973, "step": 427600 }, { "epoch": 8.31696645600389, "grad_norm": 4.808348655700684, "learning_rate": 8.441265206812653e-06, "loss": 1.4143, "step": 427700 }, { "epoch": 8.318911035488576, "grad_norm": 7.48656702041626, "learning_rate": 8.431532846715328e-06, "loss": 1.6784, "step": 427800 }, { "epoch": 8.320855614973262, "grad_norm": 1.9534175395965576, "learning_rate": 8.421800486618005e-06, "loss": 1.3973, "step": 427900 }, { "epoch": 8.32280019445795, "grad_norm": 3.313847303390503, "learning_rate": 8.412068126520682e-06, "loss": 1.5459, "step": 428000 }, { "epoch": 8.324744773942635, "grad_norm": 14.251270294189453, "learning_rate": 8.402335766423358e-06, "loss": 1.419, "step": 428100 }, { "epoch": 8.326689353427321, "grad_norm": 4.535183429718018, "learning_rate": 8.392603406326035e-06, "loss": 1.6636, "step": 428200 }, { "epoch": 8.328633932912007, "grad_norm": 4.313098430633545, "learning_rate": 8.38287104622871e-06, "loss": 1.4054, "step": 428300 }, { "epoch": 8.330578512396695, "grad_norm": 9.339640617370605, "learning_rate": 8.373138686131387e-06, "loss": 1.3807, "step": 428400 }, { "epoch": 8.33252309188138, "grad_norm": 5.018746376037598, "learning_rate": 8.363406326034064e-06, "loss": 1.4506, "step": 428500 }, { "epoch": 8.334467671366067, "grad_norm": 3.5091066360473633, "learning_rate": 8.35367396593674e-06, "loss": 1.3427, "step": 428600 }, { "epoch": 8.336412250850753, "grad_norm": 4.4335103034973145, "learning_rate": 8.343941605839416e-06, "loss": 1.4215, "step": 428700 }, { "epoch": 8.33835683033544, "grad_norm": 12.219528198242188, "learning_rate": 8.334209245742093e-06, "loss": 1.6282, "step": 428800 }, { "epoch": 8.340301409820126, "grad_norm": 4.522922992706299, "learning_rate": 8.324476885644769e-06, "loss": 1.6715, "step": 428900 }, { "epoch": 8.342245989304812, "grad_norm": 8.35914134979248, "learning_rate": 8.314744525547446e-06, "loss": 1.728, "step": 429000 }, { "epoch": 8.3441905687895, "grad_norm": 5.364083290100098, "learning_rate": 8.305012165450123e-06, "loss": 1.3987, "step": 429100 }, { "epoch": 8.346135148274186, "grad_norm": 3.366781711578369, "learning_rate": 8.295279805352798e-06, "loss": 1.4933, "step": 429200 }, { "epoch": 8.348079727758872, "grad_norm": 2.076951503753662, "learning_rate": 8.285547445255475e-06, "loss": 1.6367, "step": 429300 }, { "epoch": 8.350024307243558, "grad_norm": 2.9219892024993896, "learning_rate": 8.27581508515815e-06, "loss": 1.4672, "step": 429400 }, { "epoch": 8.351968886728246, "grad_norm": 5.126616954803467, "learning_rate": 8.266082725060827e-06, "loss": 1.6374, "step": 429500 }, { "epoch": 8.353913466212932, "grad_norm": 5.61237096786499, "learning_rate": 8.256350364963504e-06, "loss": 1.3796, "step": 429600 }, { "epoch": 8.355858045697618, "grad_norm": 4.143022537231445, "learning_rate": 8.246715328467154e-06, "loss": 1.4682, "step": 429700 }, { "epoch": 8.357802625182304, "grad_norm": 5.017573356628418, "learning_rate": 8.23698296836983e-06, "loss": 1.3841, "step": 429800 }, { "epoch": 8.359747204666991, "grad_norm": 28.120344161987305, "learning_rate": 8.227250608272506e-06, "loss": 1.5439, "step": 429900 }, { "epoch": 8.361691784151677, "grad_norm": 6.478337287902832, "learning_rate": 8.217518248175181e-06, "loss": 1.3878, "step": 430000 }, { "epoch": 8.361691784151677, "eval_accuracy": 0.5606888888888889, "eval_f1": 0.54965227911939, "eval_loss": 1.168774127960205, "eval_precision": 0.571202453740513, "eval_recall": 0.5606888888888889, "eval_runtime": 11726.096, "eval_samples_per_second": 15.35, "eval_steps_per_second": 0.48, "step": 430000 }, { "epoch": 8.363636363636363, "grad_norm": 3.3310227394104004, "learning_rate": 8.20778588807786e-06, "loss": 1.4966, "step": 430100 }, { "epoch": 8.36558094312105, "grad_norm": 1.928804636001587, "learning_rate": 8.198053527980535e-06, "loss": 1.4028, "step": 430200 }, { "epoch": 8.367525522605737, "grad_norm": 8.526968955993652, "learning_rate": 8.188321167883213e-06, "loss": 1.4163, "step": 430300 }, { "epoch": 8.369470102090423, "grad_norm": 2.4266679286956787, "learning_rate": 8.17858880778589e-06, "loss": 1.5402, "step": 430400 }, { "epoch": 8.371414681575109, "grad_norm": 7.080984115600586, "learning_rate": 8.168856447688565e-06, "loss": 1.4473, "step": 430500 }, { "epoch": 8.373359261059797, "grad_norm": 6.382455825805664, "learning_rate": 8.159124087591242e-06, "loss": 1.6095, "step": 430600 }, { "epoch": 8.375303840544483, "grad_norm": 4.2067341804504395, "learning_rate": 8.149391727493917e-06, "loss": 1.4909, "step": 430700 }, { "epoch": 8.377248420029169, "grad_norm": 6.323143482208252, "learning_rate": 8.139659367396594e-06, "loss": 1.6951, "step": 430800 }, { "epoch": 8.379192999513855, "grad_norm": 6.091248512268066, "learning_rate": 8.129927007299271e-06, "loss": 1.5716, "step": 430900 }, { "epoch": 8.381137578998542, "grad_norm": 4.461487293243408, "learning_rate": 8.120194647201947e-06, "loss": 1.5127, "step": 431000 }, { "epoch": 8.383082158483228, "grad_norm": 6.071202278137207, "learning_rate": 8.110462287104622e-06, "loss": 1.4407, "step": 431100 }, { "epoch": 8.385026737967914, "grad_norm": 5.246413707733154, "learning_rate": 8.1007299270073e-06, "loss": 1.4387, "step": 431200 }, { "epoch": 8.3869713174526, "grad_norm": 7.903097152709961, "learning_rate": 8.090997566909976e-06, "loss": 1.481, "step": 431300 }, { "epoch": 8.388915896937288, "grad_norm": 3.4197874069213867, "learning_rate": 8.081265206812651e-06, "loss": 1.3892, "step": 431400 }, { "epoch": 8.390860476421974, "grad_norm": 9.71349048614502, "learning_rate": 8.07153284671533e-06, "loss": 1.466, "step": 431500 }, { "epoch": 8.39280505590666, "grad_norm": 5.614143371582031, "learning_rate": 8.061800486618005e-06, "loss": 1.3988, "step": 431600 }, { "epoch": 8.394749635391346, "grad_norm": 3.4376726150512695, "learning_rate": 8.052165450121655e-06, "loss": 1.4552, "step": 431700 }, { "epoch": 8.396694214876034, "grad_norm": 6.052128314971924, "learning_rate": 8.042433090024332e-06, "loss": 1.4702, "step": 431800 }, { "epoch": 8.39863879436072, "grad_norm": 10.53966236114502, "learning_rate": 8.032700729927007e-06, "loss": 1.5459, "step": 431900 }, { "epoch": 8.400583373845405, "grad_norm": 2.5654609203338623, "learning_rate": 8.022968369829684e-06, "loss": 1.4526, "step": 432000 }, { "epoch": 8.402527953330093, "grad_norm": 4.749578475952148, "learning_rate": 8.013236009732361e-06, "loss": 1.5527, "step": 432100 }, { "epoch": 8.40447253281478, "grad_norm": 8.12285041809082, "learning_rate": 8.003503649635036e-06, "loss": 1.4066, "step": 432200 }, { "epoch": 8.406417112299465, "grad_norm": 2.4346892833709717, "learning_rate": 7.993771289537713e-06, "loss": 1.4085, "step": 432300 }, { "epoch": 8.408361691784151, "grad_norm": 2.8591384887695312, "learning_rate": 7.984038929440389e-06, "loss": 1.3895, "step": 432400 }, { "epoch": 8.410306271268839, "grad_norm": 4.165060043334961, "learning_rate": 7.974306569343066e-06, "loss": 1.3729, "step": 432500 }, { "epoch": 8.412250850753525, "grad_norm": 3.8963093757629395, "learning_rate": 7.964574209245743e-06, "loss": 1.5423, "step": 432600 }, { "epoch": 8.41419543023821, "grad_norm": 3.474496603012085, "learning_rate": 7.954841849148418e-06, "loss": 1.3628, "step": 432700 }, { "epoch": 8.416140009722897, "grad_norm": 4.42515754699707, "learning_rate": 7.945109489051095e-06, "loss": 1.3192, "step": 432800 }, { "epoch": 8.418084589207584, "grad_norm": 12.156871795654297, "learning_rate": 7.935377128953772e-06, "loss": 1.5509, "step": 432900 }, { "epoch": 8.42002916869227, "grad_norm": 7.263619422912598, "learning_rate": 7.925644768856447e-06, "loss": 1.5285, "step": 433000 }, { "epoch": 8.421973748176956, "grad_norm": 4.091464042663574, "learning_rate": 7.915912408759124e-06, "loss": 1.3912, "step": 433100 }, { "epoch": 8.423918327661642, "grad_norm": 12.329352378845215, "learning_rate": 7.906180048661802e-06, "loss": 1.4733, "step": 433200 }, { "epoch": 8.42586290714633, "grad_norm": 6.274325370788574, "learning_rate": 7.896447688564477e-06, "loss": 1.4691, "step": 433300 }, { "epoch": 8.427807486631016, "grad_norm": 6.9759650230407715, "learning_rate": 7.886715328467154e-06, "loss": 1.4361, "step": 433400 }, { "epoch": 8.429752066115702, "grad_norm": 6.068700313568115, "learning_rate": 7.87698296836983e-06, "loss": 1.4358, "step": 433500 }, { "epoch": 8.43169664560039, "grad_norm": 9.831517219543457, "learning_rate": 7.867250608272506e-06, "loss": 1.5008, "step": 433600 }, { "epoch": 8.433641225085076, "grad_norm": 8.565240859985352, "learning_rate": 7.857615571776156e-06, "loss": 1.4286, "step": 433700 }, { "epoch": 8.435585804569762, "grad_norm": 3.469148635864258, "learning_rate": 7.847883211678833e-06, "loss": 1.5276, "step": 433800 }, { "epoch": 8.437530384054448, "grad_norm": 5.910046100616455, "learning_rate": 7.83815085158151e-06, "loss": 1.4575, "step": 433900 }, { "epoch": 8.439474963539135, "grad_norm": 6.157474994659424, "learning_rate": 7.828418491484185e-06, "loss": 1.4958, "step": 434000 }, { "epoch": 8.441419543023821, "grad_norm": 5.711702823638916, "learning_rate": 7.818686131386862e-06, "loss": 1.6529, "step": 434100 }, { "epoch": 8.443364122508507, "grad_norm": 6.680814743041992, "learning_rate": 7.808953771289539e-06, "loss": 1.3611, "step": 434200 }, { "epoch": 8.445308701993193, "grad_norm": 7.937012195587158, "learning_rate": 7.799221411192214e-06, "loss": 1.5881, "step": 434300 }, { "epoch": 8.447253281477881, "grad_norm": 3.176438331604004, "learning_rate": 7.789489051094891e-06, "loss": 1.408, "step": 434400 }, { "epoch": 8.449197860962567, "grad_norm": 4.1358489990234375, "learning_rate": 7.779756690997567e-06, "loss": 1.519, "step": 434500 }, { "epoch": 8.451142440447253, "grad_norm": 9.799615859985352, "learning_rate": 7.770024330900244e-06, "loss": 1.5476, "step": 434600 }, { "epoch": 8.45308701993194, "grad_norm": 5.143398284912109, "learning_rate": 7.76029197080292e-06, "loss": 1.6286, "step": 434700 }, { "epoch": 8.455031599416627, "grad_norm": 4.476614475250244, "learning_rate": 7.750559610705596e-06, "loss": 1.3572, "step": 434800 }, { "epoch": 8.456976178901312, "grad_norm": 6.5966796875, "learning_rate": 7.740827250608273e-06, "loss": 1.514, "step": 434900 }, { "epoch": 8.458920758385998, "grad_norm": 4.835020542144775, "learning_rate": 7.73109489051095e-06, "loss": 1.415, "step": 435000 }, { "epoch": 8.458920758385998, "eval_accuracy": 0.5545833333333333, "eval_f1": 0.5422848740012974, "eval_loss": 1.18012535572052, "eval_precision": 0.565026205796963, "eval_recall": 0.5545833333333333, "eval_runtime": 11716.0331, "eval_samples_per_second": 15.364, "eval_steps_per_second": 0.48, "step": 435000 }, { "epoch": 8.460865337870686, "grad_norm": 1.721895456314087, "learning_rate": 7.721362530413625e-06, "loss": 1.5963, "step": 435100 }, { "epoch": 8.462809917355372, "grad_norm": 4.07999849319458, "learning_rate": 7.711630170316302e-06, "loss": 1.4323, "step": 435200 }, { "epoch": 8.464754496840058, "grad_norm": 8.44343376159668, "learning_rate": 7.70189781021898e-06, "loss": 1.5164, "step": 435300 }, { "epoch": 8.466699076324744, "grad_norm": 6.265290260314941, "learning_rate": 7.692165450121655e-06, "loss": 1.4605, "step": 435400 }, { "epoch": 8.468643655809432, "grad_norm": 13.083685874938965, "learning_rate": 7.682433090024332e-06, "loss": 1.4926, "step": 435500 }, { "epoch": 8.470588235294118, "grad_norm": 3.011011838912964, "learning_rate": 7.672700729927007e-06, "loss": 1.6927, "step": 435600 }, { "epoch": 8.472532814778804, "grad_norm": 3.431312322616577, "learning_rate": 7.663065693430656e-06, "loss": 1.4407, "step": 435700 }, { "epoch": 8.47447739426349, "grad_norm": 4.5923638343811035, "learning_rate": 7.653333333333333e-06, "loss": 1.594, "step": 435800 }, { "epoch": 8.476421973748177, "grad_norm": 11.12926197052002, "learning_rate": 7.64360097323601e-06, "loss": 1.3725, "step": 435900 }, { "epoch": 8.478366553232863, "grad_norm": 2.7827606201171875, "learning_rate": 7.633868613138686e-06, "loss": 1.492, "step": 436000 }, { "epoch": 8.48031113271755, "grad_norm": 8.883254051208496, "learning_rate": 7.624136253041363e-06, "loss": 1.6747, "step": 436100 }, { "epoch": 8.482255712202237, "grad_norm": 3.952280044555664, "learning_rate": 7.614403892944039e-06, "loss": 1.3436, "step": 436200 }, { "epoch": 8.484200291686923, "grad_norm": 5.4533772468566895, "learning_rate": 7.604671532846716e-06, "loss": 1.3617, "step": 436300 }, { "epoch": 8.486144871171609, "grad_norm": 5.42534065246582, "learning_rate": 7.594939172749392e-06, "loss": 1.5148, "step": 436400 }, { "epoch": 8.488089450656295, "grad_norm": 4.449549674987793, "learning_rate": 7.585206812652068e-06, "loss": 1.3216, "step": 436500 }, { "epoch": 8.490034030140983, "grad_norm": 2.4446890354156494, "learning_rate": 7.575474452554745e-06, "loss": 1.4215, "step": 436600 }, { "epoch": 8.491978609625669, "grad_norm": 2.415187120437622, "learning_rate": 7.5657420924574216e-06, "loss": 1.3986, "step": 436700 }, { "epoch": 8.493923189110355, "grad_norm": 6.785268306732178, "learning_rate": 7.556009732360098e-06, "loss": 1.4252, "step": 436800 }, { "epoch": 8.49586776859504, "grad_norm": 3.8268325328826904, "learning_rate": 7.546277372262774e-06, "loss": 1.5016, "step": 436900 }, { "epoch": 8.497812348079728, "grad_norm": 3.640150308609009, "learning_rate": 7.536545012165451e-06, "loss": 1.4854, "step": 437000 }, { "epoch": 8.499756927564414, "grad_norm": 11.369097709655762, "learning_rate": 7.5269099756690994e-06, "loss": 1.4924, "step": 437100 }, { "epoch": 8.5017015070491, "grad_norm": 6.29790735244751, "learning_rate": 7.517177615571777e-06, "loss": 1.3118, "step": 437200 }, { "epoch": 8.503646086533788, "grad_norm": 11.344145774841309, "learning_rate": 7.5074452554744535e-06, "loss": 1.618, "step": 437300 }, { "epoch": 8.505590666018474, "grad_norm": 2.7724392414093018, "learning_rate": 7.497712895377129e-06, "loss": 1.4045, "step": 437400 }, { "epoch": 8.50753524550316, "grad_norm": 3.8797760009765625, "learning_rate": 7.487980535279805e-06, "loss": 1.6115, "step": 437500 }, { "epoch": 8.509479824987846, "grad_norm": 3.898456573486328, "learning_rate": 7.478248175182483e-06, "loss": 1.4951, "step": 437600 }, { "epoch": 8.511424404472534, "grad_norm": 8.355120658874512, "learning_rate": 7.468515815085159e-06, "loss": 1.6671, "step": 437700 }, { "epoch": 8.51336898395722, "grad_norm": 3.6955513954162598, "learning_rate": 7.458783454987834e-06, "loss": 1.4058, "step": 437800 }, { "epoch": 8.515313563441905, "grad_norm": 25.953420639038086, "learning_rate": 7.4490510948905105e-06, "loss": 1.4621, "step": 437900 }, { "epoch": 8.517258142926591, "grad_norm": 3.523697853088379, "learning_rate": 7.439318734793188e-06, "loss": 1.3856, "step": 438000 }, { "epoch": 8.51920272241128, "grad_norm": 7.89404821395874, "learning_rate": 7.429586374695864e-06, "loss": 1.4188, "step": 438100 }, { "epoch": 8.521147301895965, "grad_norm": 3.334312915802002, "learning_rate": 7.41985401459854e-06, "loss": 1.4389, "step": 438200 }, { "epoch": 8.523091881380651, "grad_norm": 3.9564592838287354, "learning_rate": 7.410121654501218e-06, "loss": 1.5057, "step": 438300 }, { "epoch": 8.525036460865337, "grad_norm": 6.5963616371154785, "learning_rate": 7.400389294403894e-06, "loss": 1.4197, "step": 438400 }, { "epoch": 8.526981040350025, "grad_norm": 5.2238640785217285, "learning_rate": 7.390656934306569e-06, "loss": 1.3509, "step": 438500 }, { "epoch": 8.52892561983471, "grad_norm": 4.840470314025879, "learning_rate": 7.3809245742092454e-06, "loss": 1.5284, "step": 438600 }, { "epoch": 8.530870199319397, "grad_norm": 2.626958131790161, "learning_rate": 7.371192214111923e-06, "loss": 1.539, "step": 438700 }, { "epoch": 8.532814778804084, "grad_norm": 6.823154926300049, "learning_rate": 7.361459854014599e-06, "loss": 1.41, "step": 438800 }, { "epoch": 8.53475935828877, "grad_norm": 3.6253504753112793, "learning_rate": 7.351727493917275e-06, "loss": 1.4045, "step": 438900 }, { "epoch": 8.536703937773456, "grad_norm": 5.157498836517334, "learning_rate": 7.341995133819951e-06, "loss": 1.5109, "step": 439000 }, { "epoch": 8.538648517258142, "grad_norm": 7.15588903427124, "learning_rate": 7.332262773722629e-06, "loss": 1.4146, "step": 439100 }, { "epoch": 8.54059309674283, "grad_norm": 5.316734313964844, "learning_rate": 7.322530413625304e-06, "loss": 1.5451, "step": 439200 }, { "epoch": 8.542537676227516, "grad_norm": 13.682252883911133, "learning_rate": 7.31279805352798e-06, "loss": 1.7168, "step": 439300 }, { "epoch": 8.544482255712202, "grad_norm": 4.903896808624268, "learning_rate": 7.303065693430658e-06, "loss": 1.7189, "step": 439400 }, { "epoch": 8.546426835196888, "grad_norm": 3.6988492012023926, "learning_rate": 7.293333333333334e-06, "loss": 1.4559, "step": 439500 }, { "epoch": 8.548371414681576, "grad_norm": 10.38011646270752, "learning_rate": 7.28360097323601e-06, "loss": 1.6814, "step": 439600 }, { "epoch": 8.550315994166262, "grad_norm": 4.675049781799316, "learning_rate": 7.273868613138686e-06, "loss": 1.515, "step": 439700 }, { "epoch": 8.552260573650948, "grad_norm": 4.000533103942871, "learning_rate": 7.264136253041364e-06, "loss": 1.5614, "step": 439800 }, { "epoch": 8.554205153135634, "grad_norm": 9.287104606628418, "learning_rate": 7.254403892944039e-06, "loss": 1.482, "step": 439900 }, { "epoch": 8.556149732620321, "grad_norm": 6.141740798950195, "learning_rate": 7.244671532846715e-06, "loss": 1.3973, "step": 440000 }, { "epoch": 8.556149732620321, "eval_accuracy": 0.5611666666666667, "eval_f1": 0.5537635830822466, "eval_loss": 1.1649545431137085, "eval_precision": 0.5712030083894947, "eval_recall": 0.5611666666666667, "eval_runtime": 11715.5437, "eval_samples_per_second": 15.364, "eval_steps_per_second": 0.48, "step": 440000 }, { "epoch": 8.558094312105007, "grad_norm": 5.826941967010498, "learning_rate": 7.234939172749391e-06, "loss": 1.4955, "step": 440100 }, { "epoch": 8.560038891589693, "grad_norm": 15.611956596374512, "learning_rate": 7.225206812652069e-06, "loss": 1.5641, "step": 440200 }, { "epoch": 8.561983471074381, "grad_norm": 1.6265792846679688, "learning_rate": 7.215474452554745e-06, "loss": 1.5799, "step": 440300 }, { "epoch": 8.563928050559067, "grad_norm": 3.5613155364990234, "learning_rate": 7.205742092457421e-06, "loss": 1.404, "step": 440400 }, { "epoch": 8.565872630043753, "grad_norm": 7.703042984008789, "learning_rate": 7.196009732360099e-06, "loss": 1.563, "step": 440500 }, { "epoch": 8.567817209528439, "grad_norm": 5.278083801269531, "learning_rate": 7.186277372262774e-06, "loss": 1.3828, "step": 440600 }, { "epoch": 8.569761789013127, "grad_norm": 4.194681644439697, "learning_rate": 7.17654501216545e-06, "loss": 1.3631, "step": 440700 }, { "epoch": 8.571706368497813, "grad_norm": 5.660822868347168, "learning_rate": 7.166812652068126e-06, "loss": 1.3516, "step": 440800 }, { "epoch": 8.573650947982498, "grad_norm": 4.156105041503906, "learning_rate": 7.157080291970804e-06, "loss": 1.3966, "step": 440900 }, { "epoch": 8.575595527467184, "grad_norm": 7.705134391784668, "learning_rate": 7.1473479318734795e-06, "loss": 1.6281, "step": 441000 }, { "epoch": 8.577540106951872, "grad_norm": 1.69814932346344, "learning_rate": 7.13771289537713e-06, "loss": 1.4249, "step": 441100 }, { "epoch": 8.579484686436558, "grad_norm": 3.438013792037964, "learning_rate": 7.127980535279806e-06, "loss": 1.5037, "step": 441200 }, { "epoch": 8.581429265921244, "grad_norm": 2.531233072280884, "learning_rate": 7.118248175182482e-06, "loss": 1.3249, "step": 441300 }, { "epoch": 8.58337384540593, "grad_norm": 4.490253448486328, "learning_rate": 7.108515815085158e-06, "loss": 1.4102, "step": 441400 }, { "epoch": 8.585318424890618, "grad_norm": 4.5017409324646, "learning_rate": 7.098783454987835e-06, "loss": 1.4427, "step": 441500 }, { "epoch": 8.587263004375304, "grad_norm": 4.32865571975708, "learning_rate": 7.0890510948905114e-06, "loss": 1.6804, "step": 441600 }, { "epoch": 8.58920758385999, "grad_norm": 4.977347373962402, "learning_rate": 7.079318734793188e-06, "loss": 1.4479, "step": 441700 }, { "epoch": 8.591152163344677, "grad_norm": 6.772371768951416, "learning_rate": 7.069586374695864e-06, "loss": 1.5955, "step": 441800 }, { "epoch": 8.593096742829363, "grad_norm": 4.511282920837402, "learning_rate": 7.059854014598541e-06, "loss": 1.454, "step": 441900 }, { "epoch": 8.59504132231405, "grad_norm": 5.576338768005371, "learning_rate": 7.050121654501217e-06, "loss": 1.4534, "step": 442000 }, { "epoch": 8.596985901798735, "grad_norm": 3.1415228843688965, "learning_rate": 7.040389294403893e-06, "loss": 1.5131, "step": 442100 }, { "epoch": 8.598930481283423, "grad_norm": 14.623051643371582, "learning_rate": 7.03065693430657e-06, "loss": 1.5527, "step": 442200 }, { "epoch": 8.600875060768109, "grad_norm": 4.2645158767700195, "learning_rate": 7.020924574209246e-06, "loss": 1.3374, "step": 442300 }, { "epoch": 8.602819640252795, "grad_norm": 4.924588203430176, "learning_rate": 7.0111922141119225e-06, "loss": 1.6666, "step": 442400 }, { "epoch": 8.604764219737481, "grad_norm": 4.20438289642334, "learning_rate": 7.001459854014599e-06, "loss": 1.4627, "step": 442500 }, { "epoch": 8.606708799222169, "grad_norm": 3.2105488777160645, "learning_rate": 6.991727493917276e-06, "loss": 1.5435, "step": 442600 }, { "epoch": 8.608653378706855, "grad_norm": 7.247870445251465, "learning_rate": 6.981995133819952e-06, "loss": 1.507, "step": 442700 }, { "epoch": 8.61059795819154, "grad_norm": 5.689023494720459, "learning_rate": 6.972262773722628e-06, "loss": 1.4238, "step": 442800 }, { "epoch": 8.612542537676227, "grad_norm": 5.233413219451904, "learning_rate": 6.962530413625304e-06, "loss": 1.3914, "step": 442900 }, { "epoch": 8.614487117160914, "grad_norm": 5.103438854217529, "learning_rate": 6.952798053527981e-06, "loss": 1.3582, "step": 443000 }, { "epoch": 8.6164316966456, "grad_norm": 2.4528119564056396, "learning_rate": 6.94316301703163e-06, "loss": 1.5685, "step": 443100 }, { "epoch": 8.618376276130286, "grad_norm": 4.407835960388184, "learning_rate": 6.933430656934308e-06, "loss": 1.5404, "step": 443200 }, { "epoch": 8.620320855614974, "grad_norm": 4.701404094696045, "learning_rate": 6.923698296836984e-06, "loss": 1.4623, "step": 443300 }, { "epoch": 8.62226543509966, "grad_norm": 4.573940753936768, "learning_rate": 6.913965936739659e-06, "loss": 1.4592, "step": 443400 }, { "epoch": 8.624210014584346, "grad_norm": 7.012394905090332, "learning_rate": 6.904233576642335e-06, "loss": 1.451, "step": 443500 }, { "epoch": 8.626154594069032, "grad_norm": 2.5907199382781982, "learning_rate": 6.894501216545013e-06, "loss": 1.3437, "step": 443600 }, { "epoch": 8.62809917355372, "grad_norm": 2.648209571838379, "learning_rate": 6.884768856447689e-06, "loss": 1.4147, "step": 443700 }, { "epoch": 8.630043753038406, "grad_norm": 4.125791072845459, "learning_rate": 6.875036496350365e-06, "loss": 1.3406, "step": 443800 }, { "epoch": 8.631988332523092, "grad_norm": 5.881978511810303, "learning_rate": 6.865401459854015e-06, "loss": 1.4126, "step": 443900 }, { "epoch": 8.63393291200778, "grad_norm": 5.999048709869385, "learning_rate": 6.855669099756691e-06, "loss": 1.7491, "step": 444000 }, { "epoch": 8.635877491492465, "grad_norm": 4.378171443939209, "learning_rate": 6.845936739659367e-06, "loss": 1.4032, "step": 444100 }, { "epoch": 8.637822070977151, "grad_norm": 4.888139247894287, "learning_rate": 6.836204379562044e-06, "loss": 1.3634, "step": 444200 }, { "epoch": 8.639766650461837, "grad_norm": 4.288183689117432, "learning_rate": 6.82647201946472e-06, "loss": 1.47, "step": 444300 }, { "epoch": 8.641711229946525, "grad_norm": 6.055643558502197, "learning_rate": 6.816739659367397e-06, "loss": 1.5562, "step": 444400 }, { "epoch": 8.64365580943121, "grad_norm": 5.346600532531738, "learning_rate": 6.807007299270074e-06, "loss": 1.4217, "step": 444500 }, { "epoch": 8.645600388915897, "grad_norm": 8.413164138793945, "learning_rate": 6.79727493917275e-06, "loss": 1.3546, "step": 444600 }, { "epoch": 8.647544968400583, "grad_norm": 8.084819793701172, "learning_rate": 6.787542579075426e-06, "loss": 1.5917, "step": 444700 }, { "epoch": 8.64948954788527, "grad_norm": 3.5736780166625977, "learning_rate": 6.777810218978102e-06, "loss": 1.4871, "step": 444800 }, { "epoch": 8.651434127369956, "grad_norm": 4.886806964874268, "learning_rate": 6.768077858880779e-06, "loss": 1.5563, "step": 444900 }, { "epoch": 8.653378706854642, "grad_norm": 5.854095458984375, "learning_rate": 6.758345498783455e-06, "loss": 1.3801, "step": 445000 }, { "epoch": 8.653378706854642, "eval_accuracy": 0.5655, "eval_f1": 0.5524808025791951, "eval_loss": 1.1670869588851929, "eval_precision": 0.5664858205761968, "eval_recall": 0.5655, "eval_runtime": 11720.0089, "eval_samples_per_second": 15.358, "eval_steps_per_second": 0.48, "step": 445000 }, { "epoch": 8.655323286339328, "grad_norm": 4.382760524749756, "learning_rate": 6.7486131386861315e-06, "loss": 1.4319, "step": 445100 }, { "epoch": 8.657267865824016, "grad_norm": 4.5022430419921875, "learning_rate": 6.738880778588808e-06, "loss": 1.3441, "step": 445200 }, { "epoch": 8.659212445308702, "grad_norm": 5.519043445587158, "learning_rate": 6.729148418491485e-06, "loss": 1.4134, "step": 445300 }, { "epoch": 8.661157024793388, "grad_norm": 7.03265380859375, "learning_rate": 6.719416058394161e-06, "loss": 1.5638, "step": 445400 }, { "epoch": 8.663101604278076, "grad_norm": 3.231497287750244, "learning_rate": 6.709683698296837e-06, "loss": 1.4637, "step": 445500 }, { "epoch": 8.665046183762762, "grad_norm": 2.864579200744629, "learning_rate": 6.699951338199514e-06, "loss": 1.5365, "step": 445600 }, { "epoch": 8.666990763247448, "grad_norm": 3.7208714485168457, "learning_rate": 6.69021897810219e-06, "loss": 1.6067, "step": 445700 }, { "epoch": 8.668935342732134, "grad_norm": 5.854006767272949, "learning_rate": 6.680486618004866e-06, "loss": 1.541, "step": 445800 }, { "epoch": 8.670879922216821, "grad_norm": 3.1685099601745605, "learning_rate": 6.6707542579075426e-06, "loss": 1.6868, "step": 445900 }, { "epoch": 8.672824501701507, "grad_norm": 6.86236572265625, "learning_rate": 6.66102189781022e-06, "loss": 1.3465, "step": 446000 }, { "epoch": 8.674769081186193, "grad_norm": 11.851090431213379, "learning_rate": 6.651289537712896e-06, "loss": 1.483, "step": 446100 }, { "epoch": 8.67671366067088, "grad_norm": 5.247538089752197, "learning_rate": 6.641557177615572e-06, "loss": 1.5157, "step": 446200 }, { "epoch": 8.678658240155567, "grad_norm": 7.149824142456055, "learning_rate": 6.631824817518248e-06, "loss": 1.6982, "step": 446300 }, { "epoch": 8.680602819640253, "grad_norm": 5.94351863861084, "learning_rate": 6.622092457420925e-06, "loss": 1.4668, "step": 446400 }, { "epoch": 8.682547399124939, "grad_norm": 7.681812763214111, "learning_rate": 6.612360097323601e-06, "loss": 1.417, "step": 446500 }, { "epoch": 8.684491978609625, "grad_norm": 17.98885154724121, "learning_rate": 6.6026277372262775e-06, "loss": 1.5867, "step": 446600 }, { "epoch": 8.686436558094313, "grad_norm": 5.670619964599609, "learning_rate": 6.5928953771289545e-06, "loss": 1.3768, "step": 446700 }, { "epoch": 8.688381137578999, "grad_norm": 4.528499126434326, "learning_rate": 6.583163017031631e-06, "loss": 1.4728, "step": 446800 }, { "epoch": 8.690325717063685, "grad_norm": 5.921354293823242, "learning_rate": 6.573430656934307e-06, "loss": 1.5013, "step": 446900 }, { "epoch": 8.692270296548372, "grad_norm": 6.856375217437744, "learning_rate": 6.563698296836983e-06, "loss": 1.381, "step": 447000 }, { "epoch": 8.694214876033058, "grad_norm": 6.70449161529541, "learning_rate": 6.55396593673966e-06, "loss": 1.4312, "step": 447100 }, { "epoch": 8.696159455517744, "grad_norm": 3.6972157955169678, "learning_rate": 6.544233576642336e-06, "loss": 1.3991, "step": 447200 }, { "epoch": 8.69810403500243, "grad_norm": 9.586971282958984, "learning_rate": 6.534501216545012e-06, "loss": 1.3343, "step": 447300 }, { "epoch": 8.700048614487118, "grad_norm": 6.957052230834961, "learning_rate": 6.5247688564476885e-06, "loss": 1.4149, "step": 447400 }, { "epoch": 8.701993193971804, "grad_norm": 2.0847935676574707, "learning_rate": 6.5150364963503656e-06, "loss": 1.5073, "step": 447500 }, { "epoch": 8.70393777345649, "grad_norm": 5.066136360168457, "learning_rate": 6.505304136253042e-06, "loss": 1.4314, "step": 447600 }, { "epoch": 8.705882352941176, "grad_norm": 4.151937484741211, "learning_rate": 6.495571776155718e-06, "loss": 1.3999, "step": 447700 }, { "epoch": 8.707826932425863, "grad_norm": 3.7421395778656006, "learning_rate": 6.485839416058394e-06, "loss": 1.3737, "step": 447800 }, { "epoch": 8.70977151191055, "grad_norm": 8.208084106445312, "learning_rate": 6.476204379562044e-06, "loss": 1.5742, "step": 447900 }, { "epoch": 8.711716091395235, "grad_norm": 4.458754539489746, "learning_rate": 6.46647201946472e-06, "loss": 1.4443, "step": 448000 }, { "epoch": 8.713660670879921, "grad_norm": 1.7587813138961792, "learning_rate": 6.45683698296837e-06, "loss": 1.5646, "step": 448100 }, { "epoch": 8.715605250364609, "grad_norm": 2.5160069465637207, "learning_rate": 6.447104622871046e-06, "loss": 1.6983, "step": 448200 }, { "epoch": 8.717549829849295, "grad_norm": 10.849141120910645, "learning_rate": 6.437372262773724e-06, "loss": 1.3474, "step": 448300 }, { "epoch": 8.719494409333981, "grad_norm": 5.68195104598999, "learning_rate": 6.427639902676399e-06, "loss": 1.49, "step": 448400 }, { "epoch": 8.721438988818669, "grad_norm": 4.217276573181152, "learning_rate": 6.417907542579075e-06, "loss": 1.5708, "step": 448500 }, { "epoch": 8.723383568303355, "grad_norm": 3.36798095703125, "learning_rate": 6.4081751824817516e-06, "loss": 1.5534, "step": 448600 }, { "epoch": 8.72532814778804, "grad_norm": 5.941211223602295, "learning_rate": 6.3984428223844286e-06, "loss": 1.4511, "step": 448700 }, { "epoch": 8.727272727272727, "grad_norm": 7.6788763999938965, "learning_rate": 6.388710462287105e-06, "loss": 1.3368, "step": 448800 }, { "epoch": 8.729217306757414, "grad_norm": 6.07912015914917, "learning_rate": 6.378978102189781e-06, "loss": 1.4964, "step": 448900 }, { "epoch": 8.7311618862421, "grad_norm": 2.8865816593170166, "learning_rate": 6.369245742092459e-06, "loss": 1.3773, "step": 449000 }, { "epoch": 8.733106465726786, "grad_norm": 7.568879127502441, "learning_rate": 6.359513381995134e-06, "loss": 1.3862, "step": 449100 }, { "epoch": 8.735051045211472, "grad_norm": 8.604303359985352, "learning_rate": 6.34978102189781e-06, "loss": 1.3765, "step": 449200 }, { "epoch": 8.73699562469616, "grad_norm": 5.288290500640869, "learning_rate": 6.3400486618004865e-06, "loss": 1.3984, "step": 449300 }, { "epoch": 8.738940204180846, "grad_norm": 3.37740159034729, "learning_rate": 6.3303163017031635e-06, "loss": 1.4229, "step": 449400 }, { "epoch": 8.740884783665532, "grad_norm": 4.467552661895752, "learning_rate": 6.32058394160584e-06, "loss": 1.4375, "step": 449500 }, { "epoch": 8.742829363150218, "grad_norm": 3.010103464126587, "learning_rate": 6.310851581508516e-06, "loss": 1.498, "step": 449600 }, { "epoch": 8.744773942634906, "grad_norm": 4.748867988586426, "learning_rate": 6.301119221411192e-06, "loss": 1.3732, "step": 449700 }, { "epoch": 8.746718522119592, "grad_norm": 5.183506011962891, "learning_rate": 6.291386861313869e-06, "loss": 1.3574, "step": 449800 }, { "epoch": 8.748663101604278, "grad_norm": 3.1337711811065674, "learning_rate": 6.281654501216545e-06, "loss": 1.4658, "step": 449900 }, { "epoch": 8.750607681088965, "grad_norm": 5.486715793609619, "learning_rate": 6.271922141119221e-06, "loss": 1.4631, "step": 450000 }, { "epoch": 8.750607681088965, "eval_accuracy": 0.5552277777777778, "eval_f1": 0.5413647235134557, "eval_loss": 1.183850884437561, "eval_precision": 0.5630728408065909, "eval_recall": 0.5552277777777779, "eval_runtime": 11717.7766, "eval_samples_per_second": 15.361, "eval_steps_per_second": 0.48, "step": 450000 }, { "epoch": 8.752552260573651, "grad_norm": 3.3492178916931152, "learning_rate": 6.262189781021898e-06, "loss": 1.4649, "step": 450100 }, { "epoch": 8.754496840058337, "grad_norm": 4.537657260894775, "learning_rate": 6.2524574209245746e-06, "loss": 1.3911, "step": 450200 }, { "epoch": 8.756441419543023, "grad_norm": 8.45727825164795, "learning_rate": 6.242725060827251e-06, "loss": 1.3693, "step": 450300 }, { "epoch": 8.75838599902771, "grad_norm": 7.932658672332764, "learning_rate": 6.232992700729928e-06, "loss": 1.526, "step": 450400 }, { "epoch": 8.760330578512397, "grad_norm": 7.755575180053711, "learning_rate": 6.223260340632604e-06, "loss": 1.5931, "step": 450500 }, { "epoch": 8.762275157997083, "grad_norm": 5.906830787658691, "learning_rate": 6.21352798053528e-06, "loss": 1.569, "step": 450600 }, { "epoch": 8.764219737481769, "grad_norm": 7.7674479484558105, "learning_rate": 6.203795620437956e-06, "loss": 1.6082, "step": 450700 }, { "epoch": 8.766164316966456, "grad_norm": 3.660429000854492, "learning_rate": 6.194063260340633e-06, "loss": 1.6236, "step": 450800 }, { "epoch": 8.768108896451142, "grad_norm": 6.790037631988525, "learning_rate": 6.1843309002433095e-06, "loss": 1.355, "step": 450900 }, { "epoch": 8.770053475935828, "grad_norm": 4.46774435043335, "learning_rate": 6.174598540145986e-06, "loss": 1.4145, "step": 451000 }, { "epoch": 8.771998055420516, "grad_norm": 5.208642482757568, "learning_rate": 6.164866180048662e-06, "loss": 1.4938, "step": 451100 }, { "epoch": 8.773942634905202, "grad_norm": 4.837177276611328, "learning_rate": 6.155133819951339e-06, "loss": 1.3709, "step": 451200 }, { "epoch": 8.775887214389888, "grad_norm": 6.265285491943359, "learning_rate": 6.145498783454988e-06, "loss": 1.3821, "step": 451300 }, { "epoch": 8.777831793874574, "grad_norm": 4.3414306640625, "learning_rate": 6.135766423357664e-06, "loss": 1.4987, "step": 451400 }, { "epoch": 8.779776373359262, "grad_norm": 4.596713542938232, "learning_rate": 6.1260340632603405e-06, "loss": 1.4299, "step": 451500 }, { "epoch": 8.781720952843948, "grad_norm": 3.5231802463531494, "learning_rate": 6.1163017031630176e-06, "loss": 1.4812, "step": 451600 }, { "epoch": 8.783665532328634, "grad_norm": 4.402547359466553, "learning_rate": 6.106569343065694e-06, "loss": 1.6383, "step": 451700 }, { "epoch": 8.78561011181332, "grad_norm": 4.433861255645752, "learning_rate": 6.09683698296837e-06, "loss": 1.3643, "step": 451800 }, { "epoch": 8.787554691298007, "grad_norm": 5.398416042327881, "learning_rate": 6.087104622871047e-06, "loss": 1.299, "step": 451900 }, { "epoch": 8.789499270782693, "grad_norm": 4.775507926940918, "learning_rate": 6.077372262773723e-06, "loss": 1.4578, "step": 452000 }, { "epoch": 8.79144385026738, "grad_norm": 6.597721576690674, "learning_rate": 6.067639902676399e-06, "loss": 1.3325, "step": 452100 }, { "epoch": 8.793388429752067, "grad_norm": 5.152734756469727, "learning_rate": 6.0579075425790754e-06, "loss": 1.5049, "step": 452200 }, { "epoch": 8.795333009236753, "grad_norm": 37.23589324951172, "learning_rate": 6.0481751824817525e-06, "loss": 1.4626, "step": 452300 }, { "epoch": 8.797277588721439, "grad_norm": 7.050821781158447, "learning_rate": 6.038442822384429e-06, "loss": 1.5102, "step": 452400 }, { "epoch": 8.799222168206125, "grad_norm": 4.786181926727295, "learning_rate": 6.028710462287105e-06, "loss": 1.297, "step": 452500 }, { "epoch": 8.801166747690813, "grad_norm": 2.630399227142334, "learning_rate": 6.018978102189781e-06, "loss": 1.562, "step": 452600 }, { "epoch": 8.803111327175499, "grad_norm": 3.8852710723876953, "learning_rate": 6.009245742092458e-06, "loss": 1.453, "step": 452700 }, { "epoch": 8.805055906660185, "grad_norm": 6.826753616333008, "learning_rate": 5.999513381995134e-06, "loss": 1.3698, "step": 452800 }, { "epoch": 8.80700048614487, "grad_norm": 6.4105353355407715, "learning_rate": 5.98978102189781e-06, "loss": 1.4571, "step": 452900 }, { "epoch": 8.808945065629558, "grad_norm": 5.433711051940918, "learning_rate": 5.980048661800487e-06, "loss": 1.4346, "step": 453000 }, { "epoch": 8.810889645114244, "grad_norm": 6.225008010864258, "learning_rate": 5.9703163017031635e-06, "loss": 1.3631, "step": 453100 }, { "epoch": 8.81283422459893, "grad_norm": 4.170408725738525, "learning_rate": 5.96058394160584e-06, "loss": 1.4948, "step": 453200 }, { "epoch": 8.814778804083616, "grad_norm": 4.447789669036865, "learning_rate": 5.950851581508516e-06, "loss": 1.6834, "step": 453300 }, { "epoch": 8.816723383568304, "grad_norm": 3.5379369258880615, "learning_rate": 5.941119221411193e-06, "loss": 1.3998, "step": 453400 }, { "epoch": 8.81866796305299, "grad_norm": 12.474016189575195, "learning_rate": 5.931386861313869e-06, "loss": 1.4946, "step": 453500 }, { "epoch": 8.820612542537676, "grad_norm": 4.151916980743408, "learning_rate": 5.921654501216545e-06, "loss": 1.4213, "step": 453600 }, { "epoch": 8.822557122022364, "grad_norm": 8.365198135375977, "learning_rate": 5.911922141119221e-06, "loss": 1.4237, "step": 453700 }, { "epoch": 8.82450170150705, "grad_norm": 2.7883288860321045, "learning_rate": 5.9021897810218984e-06, "loss": 1.6266, "step": 453800 }, { "epoch": 8.826446280991735, "grad_norm": 10.011359214782715, "learning_rate": 5.892457420924575e-06, "loss": 1.4169, "step": 453900 }, { "epoch": 8.828390860476421, "grad_norm": 3.361529588699341, "learning_rate": 5.882725060827251e-06, "loss": 1.4322, "step": 454000 }, { "epoch": 8.83033543996111, "grad_norm": 4.80180549621582, "learning_rate": 5.872992700729928e-06, "loss": 1.4082, "step": 454100 }, { "epoch": 8.832280019445795, "grad_norm": 5.0534892082214355, "learning_rate": 5.863260340632604e-06, "loss": 1.4788, "step": 454200 }, { "epoch": 8.834224598930481, "grad_norm": 5.605487823486328, "learning_rate": 5.85352798053528e-06, "loss": 1.4697, "step": 454300 }, { "epoch": 8.836169178415167, "grad_norm": 2.2940149307250977, "learning_rate": 5.843795620437956e-06, "loss": 1.5243, "step": 454400 }, { "epoch": 8.838113757899855, "grad_norm": 4.457152843475342, "learning_rate": 5.834063260340633e-06, "loss": 1.4588, "step": 454500 }, { "epoch": 8.84005833738454, "grad_norm": 6.765555381774902, "learning_rate": 5.8243309002433095e-06, "loss": 1.3991, "step": 454600 }, { "epoch": 8.842002916869227, "grad_norm": 4.858703136444092, "learning_rate": 5.814598540145986e-06, "loss": 1.3021, "step": 454700 }, { "epoch": 8.843947496353913, "grad_norm": 3.4576685428619385, "learning_rate": 5.804866180048662e-06, "loss": 1.6369, "step": 454800 }, { "epoch": 8.8458920758386, "grad_norm": 2.01601505279541, "learning_rate": 5.795133819951339e-06, "loss": 1.4201, "step": 454900 }, { "epoch": 8.847836655323286, "grad_norm": 5.052001953125, "learning_rate": 5.785401459854014e-06, "loss": 1.4076, "step": 455000 }, { "epoch": 8.847836655323286, "eval_accuracy": 0.5603666666666667, "eval_f1": 0.545214793423963, "eval_loss": 1.1724889278411865, "eval_precision": 0.5668383334928007, "eval_recall": 0.5603666666666668, "eval_runtime": 11717.5675, "eval_samples_per_second": 15.362, "eval_steps_per_second": 0.48, "step": 455000 }, { "epoch": 8.849781234807972, "grad_norm": 5.476498126983643, "learning_rate": 5.775669099756691e-06, "loss": 1.5272, "step": 455100 }, { "epoch": 8.85172581429266, "grad_norm": 6.491879463195801, "learning_rate": 5.765936739659367e-06, "loss": 1.4694, "step": 455200 }, { "epoch": 8.853670393777346, "grad_norm": 12.690709114074707, "learning_rate": 5.756301703163018e-06, "loss": 1.4829, "step": 455300 }, { "epoch": 8.855614973262032, "grad_norm": 4.573943614959717, "learning_rate": 5.746569343065694e-06, "loss": 1.4716, "step": 455400 }, { "epoch": 8.857559552746718, "grad_norm": 8.289894104003906, "learning_rate": 5.73683698296837e-06, "loss": 1.5134, "step": 455500 }, { "epoch": 8.859504132231406, "grad_norm": 3.1698625087738037, "learning_rate": 5.727104622871046e-06, "loss": 1.415, "step": 455600 }, { "epoch": 8.861448711716092, "grad_norm": 6.152886390686035, "learning_rate": 5.717372262773723e-06, "loss": 1.4315, "step": 455700 }, { "epoch": 8.863393291200778, "grad_norm": 1.8255255222320557, "learning_rate": 5.707639902676399e-06, "loss": 1.3948, "step": 455800 }, { "epoch": 8.865337870685464, "grad_norm": 2.764484167098999, "learning_rate": 5.6979075425790755e-06, "loss": 1.4108, "step": 455900 }, { "epoch": 8.867282450170151, "grad_norm": 5.550587177276611, "learning_rate": 5.6881751824817525e-06, "loss": 1.3923, "step": 456000 }, { "epoch": 8.869227029654837, "grad_norm": 4.336770534515381, "learning_rate": 5.678442822384429e-06, "loss": 1.3757, "step": 456100 }, { "epoch": 8.871171609139523, "grad_norm": 3.180382251739502, "learning_rate": 5.668710462287105e-06, "loss": 1.4508, "step": 456200 }, { "epoch": 8.87311618862421, "grad_norm": 5.118285179138184, "learning_rate": 5.658978102189781e-06, "loss": 1.5307, "step": 456300 }, { "epoch": 8.875060768108897, "grad_norm": 4.895171165466309, "learning_rate": 5.649245742092458e-06, "loss": 1.5081, "step": 456400 }, { "epoch": 8.877005347593583, "grad_norm": 15.689165115356445, "learning_rate": 5.639513381995134e-06, "loss": 1.4865, "step": 456500 }, { "epoch": 8.878949927078269, "grad_norm": 3.4000372886657715, "learning_rate": 5.62978102189781e-06, "loss": 1.3814, "step": 456600 }, { "epoch": 8.880894506562957, "grad_norm": 19.52117347717285, "learning_rate": 5.620048661800487e-06, "loss": 1.4946, "step": 456700 }, { "epoch": 8.882839086047642, "grad_norm": 5.876932621002197, "learning_rate": 5.610316301703164e-06, "loss": 1.5349, "step": 456800 }, { "epoch": 8.884783665532328, "grad_norm": 5.996990203857422, "learning_rate": 5.60058394160584e-06, "loss": 1.396, "step": 456900 }, { "epoch": 8.886728245017014, "grad_norm": 6.9575910568237305, "learning_rate": 5.590851581508516e-06, "loss": 1.4811, "step": 457000 }, { "epoch": 8.888672824501702, "grad_norm": 7.844732284545898, "learning_rate": 5.581119221411193e-06, "loss": 1.4539, "step": 457100 }, { "epoch": 8.890617403986388, "grad_norm": 1.8744443655014038, "learning_rate": 5.571386861313869e-06, "loss": 1.4905, "step": 457200 }, { "epoch": 8.892561983471074, "grad_norm": 6.884345531463623, "learning_rate": 5.561654501216545e-06, "loss": 1.816, "step": 457300 }, { "epoch": 8.89450656295576, "grad_norm": 7.580263614654541, "learning_rate": 5.552019464720195e-06, "loss": 1.4814, "step": 457400 }, { "epoch": 8.896451142440448, "grad_norm": 6.560033798217773, "learning_rate": 5.542287104622872e-06, "loss": 1.5296, "step": 457500 }, { "epoch": 8.898395721925134, "grad_norm": 7.851956844329834, "learning_rate": 5.532554744525548e-06, "loss": 1.4799, "step": 457600 }, { "epoch": 8.90034030140982, "grad_norm": 2.142841339111328, "learning_rate": 5.522822384428224e-06, "loss": 1.4714, "step": 457700 }, { "epoch": 8.902284880894506, "grad_norm": 5.6116943359375, "learning_rate": 5.5130900243309e-06, "loss": 1.3131, "step": 457800 }, { "epoch": 8.904229460379193, "grad_norm": 6.259384632110596, "learning_rate": 5.503357664233577e-06, "loss": 1.4294, "step": 457900 }, { "epoch": 8.90617403986388, "grad_norm": 5.430453777313232, "learning_rate": 5.493625304136253e-06, "loss": 1.5062, "step": 458000 }, { "epoch": 8.908118619348565, "grad_norm": 2.965707540512085, "learning_rate": 5.48389294403893e-06, "loss": 1.6236, "step": 458100 }, { "epoch": 8.910063198833253, "grad_norm": 4.699295997619629, "learning_rate": 5.474160583941606e-06, "loss": 1.5837, "step": 458200 }, { "epoch": 8.912007778317939, "grad_norm": 11.99661922454834, "learning_rate": 5.464428223844283e-06, "loss": 1.5154, "step": 458300 }, { "epoch": 8.913952357802625, "grad_norm": 2.2403228282928467, "learning_rate": 5.454695863746959e-06, "loss": 1.4278, "step": 458400 }, { "epoch": 8.915896937287311, "grad_norm": 3.719651937484741, "learning_rate": 5.444963503649635e-06, "loss": 1.5231, "step": 458500 }, { "epoch": 8.917841516771999, "grad_norm": 3.6288108825683594, "learning_rate": 5.435231143552311e-06, "loss": 1.4477, "step": 458600 }, { "epoch": 8.919786096256685, "grad_norm": 5.278757572174072, "learning_rate": 5.425498783454988e-06, "loss": 1.4387, "step": 458700 }, { "epoch": 8.92173067574137, "grad_norm": 1.7197930812835693, "learning_rate": 5.4157664233576645e-06, "loss": 1.3705, "step": 458800 }, { "epoch": 8.923675255226057, "grad_norm": 2.4787681102752686, "learning_rate": 5.406034063260341e-06, "loss": 1.4273, "step": 458900 }, { "epoch": 8.925619834710744, "grad_norm": 4.856957912445068, "learning_rate": 5.396301703163018e-06, "loss": 1.4356, "step": 459000 }, { "epoch": 8.92756441419543, "grad_norm": 4.851674556732178, "learning_rate": 5.386569343065694e-06, "loss": 1.5097, "step": 459100 }, { "epoch": 8.929508993680116, "grad_norm": 7.463958263397217, "learning_rate": 5.37683698296837e-06, "loss": 1.6374, "step": 459200 }, { "epoch": 8.931453573164804, "grad_norm": 6.389491558074951, "learning_rate": 5.367104622871046e-06, "loss": 1.4599, "step": 459300 }, { "epoch": 8.93339815264949, "grad_norm": 4.541326999664307, "learning_rate": 5.357469586374696e-06, "loss": 1.4368, "step": 459400 }, { "epoch": 8.935342732134176, "grad_norm": 3.6359877586364746, "learning_rate": 5.347737226277373e-06, "loss": 1.497, "step": 459500 }, { "epoch": 8.937287311618862, "grad_norm": 15.247823715209961, "learning_rate": 5.338004866180049e-06, "loss": 1.4044, "step": 459600 }, { "epoch": 8.93923189110355, "grad_norm": 1.4103835821151733, "learning_rate": 5.328272506082725e-06, "loss": 1.4598, "step": 459700 }, { "epoch": 8.941176470588236, "grad_norm": 4.258283615112305, "learning_rate": 5.318540145985402e-06, "loss": 1.549, "step": 459800 }, { "epoch": 8.943121050072921, "grad_norm": 8.196500778198242, "learning_rate": 5.308905109489051e-06, "loss": 1.3919, "step": 459900 }, { "epoch": 8.945065629557607, "grad_norm": 4.91665506362915, "learning_rate": 5.299172749391728e-06, "loss": 1.6888, "step": 460000 }, { "epoch": 8.945065629557607, "eval_accuracy": 0.5642166666666667, "eval_f1": 0.5532961117543383, "eval_loss": 1.1622065305709839, "eval_precision": 0.5732132528180172, "eval_recall": 0.5642166666666667, "eval_runtime": 11700.9349, "eval_samples_per_second": 15.383, "eval_steps_per_second": 0.481, "step": 460000 }, { "epoch": 8.947010209042295, "grad_norm": 16.816659927368164, "learning_rate": 5.289440389294404e-06, "loss": 1.4379, "step": 460100 }, { "epoch": 8.948954788526981, "grad_norm": 26.46392250061035, "learning_rate": 5.279708029197081e-06, "loss": 1.5572, "step": 460200 }, { "epoch": 8.950899368011667, "grad_norm": 8.855502128601074, "learning_rate": 5.269975669099757e-06, "loss": 1.5888, "step": 460300 }, { "epoch": 8.952843947496355, "grad_norm": 5.86868143081665, "learning_rate": 5.260243309002434e-06, "loss": 1.3944, "step": 460400 }, { "epoch": 8.95478852698104, "grad_norm": 4.7319016456604, "learning_rate": 5.250510948905109e-06, "loss": 1.4499, "step": 460500 }, { "epoch": 8.956733106465727, "grad_norm": 7.153744220733643, "learning_rate": 5.240778588807786e-06, "loss": 1.3956, "step": 460600 }, { "epoch": 8.958677685950413, "grad_norm": 3.6998796463012695, "learning_rate": 5.231046228710462e-06, "loss": 1.4224, "step": 460700 }, { "epoch": 8.9606222654351, "grad_norm": 5.846383094787598, "learning_rate": 5.2213138686131386e-06, "loss": 1.4374, "step": 460800 }, { "epoch": 8.962566844919786, "grad_norm": 5.725977420806885, "learning_rate": 5.211581508515816e-06, "loss": 1.3654, "step": 460900 }, { "epoch": 8.964511424404472, "grad_norm": 1.588238000869751, "learning_rate": 5.201849148418492e-06, "loss": 1.5503, "step": 461000 }, { "epoch": 8.966456003889158, "grad_norm": 5.179589748382568, "learning_rate": 5.192116788321169e-06, "loss": 1.2824, "step": 461100 }, { "epoch": 8.968400583373846, "grad_norm": 4.531299591064453, "learning_rate": 5.182384428223844e-06, "loss": 1.3852, "step": 461200 }, { "epoch": 8.970345162858532, "grad_norm": 2.4984536170959473, "learning_rate": 5.172652068126521e-06, "loss": 1.4519, "step": 461300 }, { "epoch": 8.972289742343218, "grad_norm": 6.359990119934082, "learning_rate": 5.162919708029197e-06, "loss": 1.4227, "step": 461400 }, { "epoch": 8.974234321827904, "grad_norm": 8.115845680236816, "learning_rate": 5.1531873479318735e-06, "loss": 1.5917, "step": 461500 }, { "epoch": 8.976178901312592, "grad_norm": 6.912419319152832, "learning_rate": 5.14345498783455e-06, "loss": 1.2961, "step": 461600 }, { "epoch": 8.978123480797278, "grad_norm": 7.4345927238464355, "learning_rate": 5.133722627737227e-06, "loss": 1.5129, "step": 461700 }, { "epoch": 8.980068060281964, "grad_norm": 5.409885406494141, "learning_rate": 5.123990267639903e-06, "loss": 1.4654, "step": 461800 }, { "epoch": 8.982012639766651, "grad_norm": 7.990585803985596, "learning_rate": 5.114257907542579e-06, "loss": 1.4622, "step": 461900 }, { "epoch": 8.983957219251337, "grad_norm": 4.986490249633789, "learning_rate": 5.104525547445255e-06, "loss": 1.5972, "step": 462000 }, { "epoch": 8.985901798736023, "grad_norm": 5.597066402435303, "learning_rate": 5.094793187347932e-06, "loss": 1.4006, "step": 462100 }, { "epoch": 8.98784637822071, "grad_norm": 4.309126377105713, "learning_rate": 5.085060827250609e-06, "loss": 1.6519, "step": 462200 }, { "epoch": 8.989790957705397, "grad_norm": 6.883898735046387, "learning_rate": 5.0753284671532845e-06, "loss": 1.4377, "step": 462300 }, { "epoch": 8.991735537190083, "grad_norm": 9.195684432983398, "learning_rate": 5.0655961070559616e-06, "loss": 1.4171, "step": 462400 }, { "epoch": 8.993680116674769, "grad_norm": 4.775413513183594, "learning_rate": 5.055863746958638e-06, "loss": 1.3835, "step": 462500 }, { "epoch": 8.995624696159455, "grad_norm": 2.9205074310302734, "learning_rate": 5.046131386861314e-06, "loss": 1.3823, "step": 462600 }, { "epoch": 8.997569275644143, "grad_norm": 3.326233148574829, "learning_rate": 5.03639902676399e-06, "loss": 1.6278, "step": 462700 }, { "epoch": 8.999513855128829, "grad_norm": 19.321455001831055, "learning_rate": 5.026666666666667e-06, "loss": 1.4973, "step": 462800 }, { "epoch": 9.001458434613514, "grad_norm": 3.5324435234069824, "learning_rate": 5.016934306569343e-06, "loss": 1.3939, "step": 462900 }, { "epoch": 9.0034030140982, "grad_norm": 5.454329967498779, "learning_rate": 5.0072019464720194e-06, "loss": 1.4468, "step": 463000 }, { "epoch": 9.005347593582888, "grad_norm": 4.930783271789551, "learning_rate": 4.997469586374696e-06, "loss": 1.4378, "step": 463100 }, { "epoch": 9.007292173067574, "grad_norm": 4.861337661743164, "learning_rate": 4.987737226277373e-06, "loss": 1.5426, "step": 463200 }, { "epoch": 9.00923675255226, "grad_norm": 5.506051063537598, "learning_rate": 4.978004866180049e-06, "loss": 1.3668, "step": 463300 }, { "epoch": 9.011181332036948, "grad_norm": 8.094209671020508, "learning_rate": 4.968272506082725e-06, "loss": 1.6692, "step": 463400 }, { "epoch": 9.013125911521634, "grad_norm": 4.889486789703369, "learning_rate": 4.958540145985402e-06, "loss": 1.4395, "step": 463500 }, { "epoch": 9.01507049100632, "grad_norm": 4.979310512542725, "learning_rate": 4.948807785888078e-06, "loss": 1.4238, "step": 463600 }, { "epoch": 9.017015070491006, "grad_norm": 1.298567295074463, "learning_rate": 4.939075425790754e-06, "loss": 1.4751, "step": 463700 }, { "epoch": 9.018959649975693, "grad_norm": 5.4813151359558105, "learning_rate": 4.9293430656934305e-06, "loss": 1.5009, "step": 463800 }, { "epoch": 9.02090422946038, "grad_norm": 10.554287910461426, "learning_rate": 4.919708029197081e-06, "loss": 1.4128, "step": 463900 }, { "epoch": 9.022848808945065, "grad_norm": 2.254279136657715, "learning_rate": 4.909975669099757e-06, "loss": 1.443, "step": 464000 }, { "epoch": 9.024793388429751, "grad_norm": 4.685361862182617, "learning_rate": 4.900243309002434e-06, "loss": 1.4605, "step": 464100 }, { "epoch": 9.026737967914439, "grad_norm": 4.211101531982422, "learning_rate": 4.890510948905109e-06, "loss": 1.4264, "step": 464200 }, { "epoch": 9.028682547399125, "grad_norm": 7.342095375061035, "learning_rate": 4.880778588807786e-06, "loss": 1.4002, "step": 464300 }, { "epoch": 9.030627126883811, "grad_norm": 4.0220723152160645, "learning_rate": 4.8710462287104625e-06, "loss": 1.6233, "step": 464400 }, { "epoch": 9.032571706368497, "grad_norm": 5.095726013183594, "learning_rate": 4.861313868613139e-06, "loss": 1.5962, "step": 464500 }, { "epoch": 9.034516285853185, "grad_norm": 2.7739007472991943, "learning_rate": 4.851581508515815e-06, "loss": 1.4288, "step": 464600 }, { "epoch": 9.03646086533787, "grad_norm": 6.117439270019531, "learning_rate": 4.841849148418492e-06, "loss": 1.3654, "step": 464700 }, { "epoch": 9.038405444822557, "grad_norm": 2.9665277004241943, "learning_rate": 4.832116788321168e-06, "loss": 1.4414, "step": 464800 }, { "epoch": 9.040350024307244, "grad_norm": 3.5646770000457764, "learning_rate": 4.822384428223844e-06, "loss": 1.4191, "step": 464900 }, { "epoch": 9.04229460379193, "grad_norm": 2.852037191390991, "learning_rate": 4.812652068126521e-06, "loss": 1.4282, "step": 465000 }, { "epoch": 9.04229460379193, "eval_accuracy": 0.5682333333333334, "eval_f1": 0.557868831530041, "eval_loss": 1.1565907001495361, "eval_precision": 0.5726040322041763, "eval_recall": 0.5682333333333333, "eval_runtime": 11702.4097, "eval_samples_per_second": 15.381, "eval_steps_per_second": 0.481, "step": 465000 }, { "epoch": 9.044239183276616, "grad_norm": 4.128431797027588, "learning_rate": 4.802919708029197e-06, "loss": 1.4888, "step": 465100 }, { "epoch": 9.046183762761302, "grad_norm": 1.6851270198822021, "learning_rate": 4.793284671532847e-06, "loss": 1.5825, "step": 465200 }, { "epoch": 9.04812834224599, "grad_norm": 4.715086460113525, "learning_rate": 4.783552311435524e-06, "loss": 1.558, "step": 465300 }, { "epoch": 9.050072921730676, "grad_norm": 3.8186821937561035, "learning_rate": 4.773819951338199e-06, "loss": 1.5578, "step": 465400 }, { "epoch": 9.052017501215362, "grad_norm": 6.139969348907471, "learning_rate": 4.764087591240876e-06, "loss": 1.4783, "step": 465500 }, { "epoch": 9.053962080700048, "grad_norm": 11.855351448059082, "learning_rate": 4.754355231143553e-06, "loss": 1.5419, "step": 465600 }, { "epoch": 9.055906660184736, "grad_norm": 1.691763997077942, "learning_rate": 4.7446228710462284e-06, "loss": 1.5345, "step": 465700 }, { "epoch": 9.057851239669422, "grad_norm": 4.943518161773682, "learning_rate": 4.7348905109489055e-06, "loss": 1.5048, "step": 465800 }, { "epoch": 9.059795819154107, "grad_norm": 7.311404228210449, "learning_rate": 4.725158150851582e-06, "loss": 1.3923, "step": 465900 }, { "epoch": 9.061740398638795, "grad_norm": 8.700784683227539, "learning_rate": 4.715425790754259e-06, "loss": 1.4715, "step": 466000 }, { "epoch": 9.063684978123481, "grad_norm": 2.41282057762146, "learning_rate": 4.705693430656934e-06, "loss": 1.5938, "step": 466100 }, { "epoch": 9.065629557608167, "grad_norm": 3.4380202293395996, "learning_rate": 4.695961070559611e-06, "loss": 1.4812, "step": 466200 }, { "epoch": 9.067574137092853, "grad_norm": 4.421656608581543, "learning_rate": 4.686228710462287e-06, "loss": 1.3977, "step": 466300 }, { "epoch": 9.06951871657754, "grad_norm": 4.956746578216553, "learning_rate": 4.676496350364964e-06, "loss": 1.4697, "step": 466400 }, { "epoch": 9.071463296062227, "grad_norm": 2.9929518699645996, "learning_rate": 4.6667639902676395e-06, "loss": 1.5557, "step": 466500 }, { "epoch": 9.073407875546913, "grad_norm": 5.746070384979248, "learning_rate": 4.6570316301703165e-06, "loss": 1.4204, "step": 466600 }, { "epoch": 9.075352455031599, "grad_norm": 3.0187551975250244, "learning_rate": 4.6472992700729935e-06, "loss": 1.4581, "step": 466700 }, { "epoch": 9.077297034516286, "grad_norm": 4.1894707679748535, "learning_rate": 4.637566909975669e-06, "loss": 1.6626, "step": 466800 }, { "epoch": 9.079241614000972, "grad_norm": 4.404465198516846, "learning_rate": 4.627834549878346e-06, "loss": 1.5885, "step": 466900 }, { "epoch": 9.081186193485658, "grad_norm": 4.629497528076172, "learning_rate": 4.618102189781022e-06, "loss": 1.6075, "step": 467000 }, { "epoch": 9.083130772970344, "grad_norm": 1.6874381303787231, "learning_rate": 4.608369829683699e-06, "loss": 1.365, "step": 467100 }, { "epoch": 9.085075352455032, "grad_norm": 3.7292373180389404, "learning_rate": 4.598637469586374e-06, "loss": 1.5604, "step": 467200 }, { "epoch": 9.087019931939718, "grad_norm": 7.161989688873291, "learning_rate": 4.5889051094890514e-06, "loss": 1.5058, "step": 467300 }, { "epoch": 9.088964511424404, "grad_norm": 4.564218044281006, "learning_rate": 4.579172749391728e-06, "loss": 1.4388, "step": 467400 }, { "epoch": 9.090909090909092, "grad_norm": 4.800302982330322, "learning_rate": 4.569440389294404e-06, "loss": 1.5283, "step": 467500 }, { "epoch": 9.092853670393778, "grad_norm": 7.01707124710083, "learning_rate": 4.55970802919708e-06, "loss": 1.4451, "step": 467600 }, { "epoch": 9.094798249878464, "grad_norm": 3.555619478225708, "learning_rate": 4.549975669099757e-06, "loss": 1.3503, "step": 467700 }, { "epoch": 9.09674282936315, "grad_norm": 8.852524757385254, "learning_rate": 4.540243309002434e-06, "loss": 1.4314, "step": 467800 }, { "epoch": 9.098687408847837, "grad_norm": 5.891478538513184, "learning_rate": 4.530510948905109e-06, "loss": 1.3024, "step": 467900 }, { "epoch": 9.100631988332523, "grad_norm": 6.297675132751465, "learning_rate": 4.520778588807786e-06, "loss": 1.312, "step": 468000 }, { "epoch": 9.10257656781721, "grad_norm": 2.2130074501037598, "learning_rate": 4.5110462287104625e-06, "loss": 1.4821, "step": 468100 }, { "epoch": 9.104521147301895, "grad_norm": 3.6980443000793457, "learning_rate": 4.5013138686131395e-06, "loss": 1.4846, "step": 468200 }, { "epoch": 9.106465726786583, "grad_norm": 2.1614391803741455, "learning_rate": 4.491581508515815e-06, "loss": 1.3629, "step": 468300 }, { "epoch": 9.108410306271269, "grad_norm": 8.353337287902832, "learning_rate": 4.481849148418492e-06, "loss": 1.3557, "step": 468400 }, { "epoch": 9.110354885755955, "grad_norm": 5.2390265464782715, "learning_rate": 4.472116788321168e-06, "loss": 1.4717, "step": 468500 }, { "epoch": 9.112299465240643, "grad_norm": 6.850253582000732, "learning_rate": 4.462384428223844e-06, "loss": 1.3911, "step": 468600 }, { "epoch": 9.114244044725329, "grad_norm": 4.123579502105713, "learning_rate": 4.45265206812652e-06, "loss": 1.4197, "step": 468700 }, { "epoch": 9.116188624210015, "grad_norm": 4.453581809997559, "learning_rate": 4.442919708029197e-06, "loss": 1.4988, "step": 468800 }, { "epoch": 9.1181332036947, "grad_norm": 5.472794055938721, "learning_rate": 4.433187347931874e-06, "loss": 1.5154, "step": 468900 }, { "epoch": 9.120077783179388, "grad_norm": 5.41421365737915, "learning_rate": 4.42345498783455e-06, "loss": 1.372, "step": 469000 }, { "epoch": 9.122022362664074, "grad_norm": 6.34230375289917, "learning_rate": 4.413722627737227e-06, "loss": 1.7968, "step": 469100 }, { "epoch": 9.12396694214876, "grad_norm": 7.936646938323975, "learning_rate": 4.404087591240876e-06, "loss": 1.4322, "step": 469200 }, { "epoch": 9.125911521633446, "grad_norm": 9.029552459716797, "learning_rate": 4.394355231143552e-06, "loss": 1.4131, "step": 469300 }, { "epoch": 9.127856101118134, "grad_norm": 5.943960666656494, "learning_rate": 4.384622871046229e-06, "loss": 1.2922, "step": 469400 }, { "epoch": 9.12980068060282, "grad_norm": 6.051487445831299, "learning_rate": 4.3748905109489055e-06, "loss": 1.4413, "step": 469500 }, { "epoch": 9.131745260087506, "grad_norm": 5.462738990783691, "learning_rate": 4.365158150851582e-06, "loss": 1.596, "step": 469600 }, { "epoch": 9.133689839572192, "grad_norm": 5.35571813583374, "learning_rate": 4.355425790754259e-06, "loss": 1.3917, "step": 469700 }, { "epoch": 9.13563441905688, "grad_norm": 3.8852267265319824, "learning_rate": 4.345693430656934e-06, "loss": 1.5204, "step": 469800 }, { "epoch": 9.137578998541565, "grad_norm": 5.286252021789551, "learning_rate": 4.335961070559611e-06, "loss": 1.6417, "step": 469900 }, { "epoch": 9.139523578026251, "grad_norm": 3.969184637069702, "learning_rate": 4.326228710462287e-06, "loss": 1.4833, "step": 470000 }, { "epoch": 9.139523578026251, "eval_accuracy": 0.5634555555555556, "eval_f1": 0.552636540314876, "eval_loss": 1.1657545566558838, "eval_precision": 0.5725251620200354, "eval_recall": 0.5634555555555555, "eval_runtime": 11703.4404, "eval_samples_per_second": 15.38, "eval_steps_per_second": 0.481, "step": 470000 }, { "epoch": 9.141468157510939, "grad_norm": 3.784302234649658, "learning_rate": 4.316496350364964e-06, "loss": 1.4835, "step": 470100 }, { "epoch": 9.143412736995625, "grad_norm": 12.761284828186035, "learning_rate": 4.3067639902676396e-06, "loss": 1.3705, "step": 470200 }, { "epoch": 9.145357316480311, "grad_norm": 24.416667938232422, "learning_rate": 4.297031630170317e-06, "loss": 1.4364, "step": 470300 }, { "epoch": 9.147301895964997, "grad_norm": 8.790715217590332, "learning_rate": 4.287299270072993e-06, "loss": 1.6951, "step": 470400 }, { "epoch": 9.149246475449685, "grad_norm": 6.2379255294799805, "learning_rate": 4.277566909975669e-06, "loss": 1.4958, "step": 470500 }, { "epoch": 9.15119105493437, "grad_norm": 1.7105664014816284, "learning_rate": 4.267834549878346e-06, "loss": 1.5163, "step": 470600 }, { "epoch": 9.153135634419057, "grad_norm": 4.530811786651611, "learning_rate": 4.258102189781022e-06, "loss": 1.3516, "step": 470700 }, { "epoch": 9.155080213903743, "grad_norm": 2.9568028450012207, "learning_rate": 4.248369829683699e-06, "loss": 1.4434, "step": 470800 }, { "epoch": 9.15702479338843, "grad_norm": 7.6042633056640625, "learning_rate": 4.2386374695863745e-06, "loss": 1.5734, "step": 470900 }, { "epoch": 9.158969372873116, "grad_norm": 5.599851131439209, "learning_rate": 4.2289051094890515e-06, "loss": 1.5205, "step": 471000 }, { "epoch": 9.160913952357802, "grad_norm": 3.501152753829956, "learning_rate": 4.219172749391728e-06, "loss": 1.4714, "step": 471100 }, { "epoch": 9.162858531842488, "grad_norm": 4.682856559753418, "learning_rate": 4.209537712895378e-06, "loss": 1.4045, "step": 471200 }, { "epoch": 9.164803111327176, "grad_norm": 4.413924217224121, "learning_rate": 4.199805352798054e-06, "loss": 1.4454, "step": 471300 }, { "epoch": 9.166747690811862, "grad_norm": 6.31458854675293, "learning_rate": 4.19007299270073e-06, "loss": 1.5909, "step": 471400 }, { "epoch": 9.168692270296548, "grad_norm": 3.879279613494873, "learning_rate": 4.180340632603406e-06, "loss": 1.5156, "step": 471500 }, { "epoch": 9.170636849781236, "grad_norm": 2.438936710357666, "learning_rate": 4.170608272506083e-06, "loss": 1.3664, "step": 471600 }, { "epoch": 9.172581429265922, "grad_norm": 6.300048828125, "learning_rate": 4.160875912408759e-06, "loss": 1.4539, "step": 471700 }, { "epoch": 9.174526008750608, "grad_norm": 29.37751007080078, "learning_rate": 4.151143552311436e-06, "loss": 1.5479, "step": 471800 }, { "epoch": 9.176470588235293, "grad_norm": 6.76544713973999, "learning_rate": 4.141411192214112e-06, "loss": 1.4268, "step": 471900 }, { "epoch": 9.178415167719981, "grad_norm": 4.209535121917725, "learning_rate": 4.131678832116789e-06, "loss": 1.3159, "step": 472000 }, { "epoch": 9.180359747204667, "grad_norm": 2.277170419692993, "learning_rate": 4.121946472019464e-06, "loss": 1.7128, "step": 472100 }, { "epoch": 9.182304326689353, "grad_norm": 4.333306789398193, "learning_rate": 4.112214111922141e-06, "loss": 1.472, "step": 472200 }, { "epoch": 9.184248906174039, "grad_norm": 2.4382052421569824, "learning_rate": 4.102481751824818e-06, "loss": 1.4742, "step": 472300 }, { "epoch": 9.186193485658727, "grad_norm": 5.250733852386475, "learning_rate": 4.0927493917274945e-06, "loss": 1.4923, "step": 472400 }, { "epoch": 9.188138065143413, "grad_norm": 6.835832118988037, "learning_rate": 4.083017031630171e-06, "loss": 1.4133, "step": 472500 }, { "epoch": 9.190082644628099, "grad_norm": 6.613422393798828, "learning_rate": 4.073284671532847e-06, "loss": 1.4716, "step": 472600 }, { "epoch": 9.192027224112785, "grad_norm": 16.367334365844727, "learning_rate": 4.063552311435524e-06, "loss": 1.4999, "step": 472700 }, { "epoch": 9.193971803597472, "grad_norm": 3.773860454559326, "learning_rate": 4.053819951338199e-06, "loss": 1.4323, "step": 472800 }, { "epoch": 9.195916383082158, "grad_norm": 5.798492431640625, "learning_rate": 4.044087591240876e-06, "loss": 1.3518, "step": 472900 }, { "epoch": 9.197860962566844, "grad_norm": 14.511983871459961, "learning_rate": 4.034355231143552e-06, "loss": 1.3464, "step": 473000 }, { "epoch": 9.199805542051532, "grad_norm": 4.941860198974609, "learning_rate": 4.024622871046229e-06, "loss": 1.364, "step": 473100 }, { "epoch": 9.201750121536218, "grad_norm": 4.389184474945068, "learning_rate": 4.014890510948905e-06, "loss": 1.4173, "step": 473200 }, { "epoch": 9.203694701020904, "grad_norm": 3.844609498977661, "learning_rate": 4.005255474452555e-06, "loss": 1.5007, "step": 473300 }, { "epoch": 9.20563928050559, "grad_norm": 3.583920478820801, "learning_rate": 3.995523114355231e-06, "loss": 1.4281, "step": 473400 }, { "epoch": 9.207583859990278, "grad_norm": 6.253390789031982, "learning_rate": 3.985790754257908e-06, "loss": 1.3635, "step": 473500 }, { "epoch": 9.209528439474964, "grad_norm": 4.447307586669922, "learning_rate": 3.976058394160584e-06, "loss": 1.565, "step": 473600 }, { "epoch": 9.21147301895965, "grad_norm": 4.090397834777832, "learning_rate": 3.9663260340632605e-06, "loss": 1.4125, "step": 473700 }, { "epoch": 9.213417598444336, "grad_norm": 2.722989082336426, "learning_rate": 3.956593673965937e-06, "loss": 1.515, "step": 473800 }, { "epoch": 9.215362177929023, "grad_norm": 6.2140092849731445, "learning_rate": 3.946861313868614e-06, "loss": 1.3392, "step": 473900 }, { "epoch": 9.21730675741371, "grad_norm": 5.175930500030518, "learning_rate": 3.93712895377129e-06, "loss": 1.4915, "step": 474000 }, { "epoch": 9.219251336898395, "grad_norm": 3.526729106903076, "learning_rate": 3.927396593673966e-06, "loss": 1.4091, "step": 474100 }, { "epoch": 9.221195916383083, "grad_norm": 5.5268354415893555, "learning_rate": 3.917664233576643e-06, "loss": 1.5815, "step": 474200 }, { "epoch": 9.223140495867769, "grad_norm": 12.851936340332031, "learning_rate": 3.907931873479319e-06, "loss": 1.5139, "step": 474300 }, { "epoch": 9.225085075352455, "grad_norm": 7.180195331573486, "learning_rate": 3.898199513381995e-06, "loss": 1.4605, "step": 474400 }, { "epoch": 9.22702965483714, "grad_norm": 8.791768074035645, "learning_rate": 3.8884671532846716e-06, "loss": 1.6015, "step": 474500 }, { "epoch": 9.228974234321829, "grad_norm": 6.598504066467285, "learning_rate": 3.8787347931873486e-06, "loss": 1.4329, "step": 474600 }, { "epoch": 9.230918813806515, "grad_norm": 15.040843963623047, "learning_rate": 3.869002433090024e-06, "loss": 1.3986, "step": 474700 }, { "epoch": 9.2328633932912, "grad_norm": 3.1750216484069824, "learning_rate": 3.859270072992701e-06, "loss": 1.4005, "step": 474800 }, { "epoch": 9.234807972775886, "grad_norm": 1.7882877588272095, "learning_rate": 3.849537712895377e-06, "loss": 1.554, "step": 474900 }, { "epoch": 9.236752552260574, "grad_norm": 7.846591472625732, "learning_rate": 3.839902676399027e-06, "loss": 1.5365, "step": 475000 }, { "epoch": 9.236752552260574, "eval_accuracy": 0.5684166666666667, "eval_f1": 0.5566801658249079, "eval_loss": 1.1588540077209473, "eval_precision": 0.5686811796893826, "eval_recall": 0.5684166666666667, "eval_runtime": 11690.4416, "eval_samples_per_second": 15.397, "eval_steps_per_second": 0.481, "step": 475000 }, { "epoch": 9.23869713174526, "grad_norm": 2.9897382259368896, "learning_rate": 3.8301703163017035e-06, "loss": 1.4592, "step": 475100 }, { "epoch": 9.240641711229946, "grad_norm": 2.5172488689422607, "learning_rate": 3.82043795620438e-06, "loss": 1.5252, "step": 475200 }, { "epoch": 9.242586290714632, "grad_norm": 11.483311653137207, "learning_rate": 3.810705596107056e-06, "loss": 1.5638, "step": 475300 }, { "epoch": 9.24453087019932, "grad_norm": 1.4219626188278198, "learning_rate": 3.8009732360097324e-06, "loss": 1.5479, "step": 475400 }, { "epoch": 9.246475449684006, "grad_norm": 5.330941200256348, "learning_rate": 3.7912408759124086e-06, "loss": 1.4702, "step": 475500 }, { "epoch": 9.248420029168692, "grad_norm": 5.334885597229004, "learning_rate": 3.781508515815085e-06, "loss": 1.5821, "step": 475600 }, { "epoch": 9.25036460865338, "grad_norm": 3.506192207336426, "learning_rate": 3.771776155717762e-06, "loss": 1.5688, "step": 475700 }, { "epoch": 9.252309188138065, "grad_norm": 4.882051467895508, "learning_rate": 3.762043795620438e-06, "loss": 1.4353, "step": 475800 }, { "epoch": 9.254253767622751, "grad_norm": 3.425546884536743, "learning_rate": 3.752311435523115e-06, "loss": 1.3611, "step": 475900 }, { "epoch": 9.256198347107437, "grad_norm": 9.299677848815918, "learning_rate": 3.7425790754257907e-06, "loss": 1.4325, "step": 476000 }, { "epoch": 9.258142926592125, "grad_norm": 3.4942049980163574, "learning_rate": 3.7328467153284677e-06, "loss": 1.3346, "step": 476100 }, { "epoch": 9.260087506076811, "grad_norm": 4.559672832489014, "learning_rate": 3.7231143552311435e-06, "loss": 1.5051, "step": 476200 }, { "epoch": 9.262032085561497, "grad_norm": 6.516272068023682, "learning_rate": 3.71338199513382e-06, "loss": 1.543, "step": 476300 }, { "epoch": 9.263976665046183, "grad_norm": 3.160700798034668, "learning_rate": 3.7036496350364963e-06, "loss": 1.5793, "step": 476400 }, { "epoch": 9.26592124453087, "grad_norm": 6.690160751342773, "learning_rate": 3.693917274939173e-06, "loss": 1.3486, "step": 476500 }, { "epoch": 9.267865824015557, "grad_norm": 5.6706767082214355, "learning_rate": 3.684184914841849e-06, "loss": 1.496, "step": 476600 }, { "epoch": 9.269810403500243, "grad_norm": 3.408165454864502, "learning_rate": 3.6744525547445256e-06, "loss": 1.3665, "step": 476700 }, { "epoch": 9.27175498298493, "grad_norm": 5.020935535430908, "learning_rate": 3.664720194647202e-06, "loss": 1.4709, "step": 476800 }, { "epoch": 9.273699562469616, "grad_norm": 3.813861846923828, "learning_rate": 3.6549878345498784e-06, "loss": 1.3119, "step": 476900 }, { "epoch": 9.275644141954302, "grad_norm": 2.673283815383911, "learning_rate": 3.6452554744525554e-06, "loss": 1.6571, "step": 477000 }, { "epoch": 9.277588721438988, "grad_norm": 5.329628944396973, "learning_rate": 3.635523114355231e-06, "loss": 1.4629, "step": 477100 }, { "epoch": 9.279533300923676, "grad_norm": 3.5821821689605713, "learning_rate": 3.6257907542579078e-06, "loss": 1.6271, "step": 477200 }, { "epoch": 9.281477880408362, "grad_norm": 3.6718711853027344, "learning_rate": 3.616058394160584e-06, "loss": 1.3911, "step": 477300 }, { "epoch": 9.283422459893048, "grad_norm": 5.3257927894592285, "learning_rate": 3.6063260340632605e-06, "loss": 1.3273, "step": 477400 }, { "epoch": 9.285367039377734, "grad_norm": 3.204380512237549, "learning_rate": 3.5965936739659367e-06, "loss": 1.5359, "step": 477500 }, { "epoch": 9.287311618862422, "grad_norm": 3.4415509700775146, "learning_rate": 3.5868613138686133e-06, "loss": 1.4455, "step": 477600 }, { "epoch": 9.289256198347108, "grad_norm": 14.648444175720215, "learning_rate": 3.5771289537712895e-06, "loss": 1.5958, "step": 477700 }, { "epoch": 9.291200777831794, "grad_norm": 6.346559047698975, "learning_rate": 3.567396593673966e-06, "loss": 1.3403, "step": 477800 }, { "epoch": 9.29314535731648, "grad_norm": 3.186769485473633, "learning_rate": 3.5576642335766422e-06, "loss": 1.4668, "step": 477900 }, { "epoch": 9.295089936801167, "grad_norm": 17.9744930267334, "learning_rate": 3.547931873479319e-06, "loss": 1.6604, "step": 478000 }, { "epoch": 9.297034516285853, "grad_norm": 5.0392231941223145, "learning_rate": 3.5381995133819954e-06, "loss": 1.3846, "step": 478100 }, { "epoch": 9.29897909577054, "grad_norm": 4.360180377960205, "learning_rate": 3.5284671532846716e-06, "loss": 1.4683, "step": 478200 }, { "epoch": 9.300923675255227, "grad_norm": 4.834597587585449, "learning_rate": 3.518734793187348e-06, "loss": 1.837, "step": 478300 }, { "epoch": 9.302868254739913, "grad_norm": 3.9046952724456787, "learning_rate": 3.5090024330900244e-06, "loss": 1.498, "step": 478400 }, { "epoch": 9.304812834224599, "grad_norm": 4.061009883880615, "learning_rate": 3.499270072992701e-06, "loss": 1.3363, "step": 478500 }, { "epoch": 9.306757413709285, "grad_norm": 2.69549560546875, "learning_rate": 3.489537712895377e-06, "loss": 1.417, "step": 478600 }, { "epoch": 9.308701993193973, "grad_norm": 12.862143516540527, "learning_rate": 3.4798053527980537e-06, "loss": 1.5454, "step": 478700 }, { "epoch": 9.310646572678658, "grad_norm": 7.32778263092041, "learning_rate": 3.47007299270073e-06, "loss": 1.3423, "step": 478800 }, { "epoch": 9.312591152163344, "grad_norm": 4.017085552215576, "learning_rate": 3.4603406326034065e-06, "loss": 1.4635, "step": 478900 }, { "epoch": 9.31453573164803, "grad_norm": 3.8959052562713623, "learning_rate": 3.450705596107056e-06, "loss": 1.3645, "step": 479000 }, { "epoch": 9.316480311132718, "grad_norm": 4.748368263244629, "learning_rate": 3.441070559610706e-06, "loss": 1.4179, "step": 479100 }, { "epoch": 9.318424890617404, "grad_norm": 3.2101597785949707, "learning_rate": 3.4313381995133823e-06, "loss": 1.4733, "step": 479200 }, { "epoch": 9.32036947010209, "grad_norm": 7.966134071350098, "learning_rate": 3.421605839416059e-06, "loss": 1.5202, "step": 479300 }, { "epoch": 9.322314049586776, "grad_norm": 7.295009136199951, "learning_rate": 3.411873479318735e-06, "loss": 1.319, "step": 479400 }, { "epoch": 9.324258629071464, "grad_norm": 10.47667407989502, "learning_rate": 3.4021411192214116e-06, "loss": 1.4726, "step": 479500 }, { "epoch": 9.32620320855615, "grad_norm": 7.011773586273193, "learning_rate": 3.3924087591240874e-06, "loss": 1.5288, "step": 479600 }, { "epoch": 9.328147788040836, "grad_norm": 6.884186744689941, "learning_rate": 3.3826763990267644e-06, "loss": 1.4903, "step": 479700 }, { "epoch": 9.330092367525523, "grad_norm": 2.2260937690734863, "learning_rate": 3.37294403892944e-06, "loss": 1.3736, "step": 479800 }, { "epoch": 9.33203694701021, "grad_norm": 5.607173442840576, "learning_rate": 3.363211678832117e-06, "loss": 1.4422, "step": 479900 }, { "epoch": 9.333981526494895, "grad_norm": 3.262277364730835, "learning_rate": 3.353479318734793e-06, "loss": 1.3789, "step": 480000 }, { "epoch": 9.333981526494895, "eval_accuracy": 0.5616, "eval_f1": 0.5489021756240522, "eval_loss": 1.1688286066055298, "eval_precision": 0.5677822690509335, "eval_recall": 0.5616, "eval_runtime": 11697.2259, "eval_samples_per_second": 15.388, "eval_steps_per_second": 0.481, "step": 480000 }, { "epoch": 9.335926105979581, "grad_norm": 6.750067234039307, "learning_rate": 3.34374695863747e-06, "loss": 1.5618, "step": 480100 }, { "epoch": 9.337870685464269, "grad_norm": 2.733699083328247, "learning_rate": 3.3340145985401457e-06, "loss": 1.6166, "step": 480200 }, { "epoch": 9.339815264948955, "grad_norm": 3.668950080871582, "learning_rate": 3.3242822384428227e-06, "loss": 1.4716, "step": 480300 }, { "epoch": 9.341759844433641, "grad_norm": 2.5667574405670166, "learning_rate": 3.3145498783454993e-06, "loss": 1.4265, "step": 480400 }, { "epoch": 9.343704423918327, "grad_norm": 5.900805473327637, "learning_rate": 3.304817518248175e-06, "loss": 1.416, "step": 480500 }, { "epoch": 9.345649003403015, "grad_norm": 5.509023189544678, "learning_rate": 3.295085158150852e-06, "loss": 1.3924, "step": 480600 }, { "epoch": 9.3475935828877, "grad_norm": 11.012356758117676, "learning_rate": 3.285352798053528e-06, "loss": 1.6321, "step": 480700 }, { "epoch": 9.349538162372387, "grad_norm": 4.470006465911865, "learning_rate": 3.275620437956205e-06, "loss": 1.5284, "step": 480800 }, { "epoch": 9.351482741857073, "grad_norm": 10.679895401000977, "learning_rate": 3.2658880778588806e-06, "loss": 1.5443, "step": 480900 }, { "epoch": 9.35342732134176, "grad_norm": 2.9647209644317627, "learning_rate": 3.2561557177615576e-06, "loss": 1.3941, "step": 481000 }, { "epoch": 9.355371900826446, "grad_norm": 5.000911235809326, "learning_rate": 3.2464233576642334e-06, "loss": 1.5305, "step": 481100 }, { "epoch": 9.357316480311132, "grad_norm": 6.6800456047058105, "learning_rate": 3.2366909975669104e-06, "loss": 1.5602, "step": 481200 }, { "epoch": 9.35926105979582, "grad_norm": 3.7385354042053223, "learning_rate": 3.226958637469586e-06, "loss": 1.5318, "step": 481300 }, { "epoch": 9.361205639280506, "grad_norm": 3.741938352584839, "learning_rate": 3.2172262773722627e-06, "loss": 1.4662, "step": 481400 }, { "epoch": 9.363150218765192, "grad_norm": 27.43154525756836, "learning_rate": 3.2074939172749398e-06, "loss": 1.5855, "step": 481500 }, { "epoch": 9.365094798249878, "grad_norm": 4.264724254608154, "learning_rate": 3.1977615571776155e-06, "loss": 1.5216, "step": 481600 }, { "epoch": 9.367039377734566, "grad_norm": 8.640564918518066, "learning_rate": 3.1880291970802925e-06, "loss": 1.4892, "step": 481700 }, { "epoch": 9.368983957219251, "grad_norm": 6.012699127197266, "learning_rate": 3.1782968369829683e-06, "loss": 1.527, "step": 481800 }, { "epoch": 9.370928536703937, "grad_norm": 4.2711615562438965, "learning_rate": 3.1685644768856453e-06, "loss": 1.3925, "step": 481900 }, { "epoch": 9.372873116188623, "grad_norm": 2.8830888271331787, "learning_rate": 3.158832116788321e-06, "loss": 1.4318, "step": 482000 }, { "epoch": 9.374817695673311, "grad_norm": 4.881452560424805, "learning_rate": 3.149099756690998e-06, "loss": 1.4863, "step": 482100 }, { "epoch": 9.376762275157997, "grad_norm": 5.51538610458374, "learning_rate": 3.139367396593674e-06, "loss": 1.4381, "step": 482200 }, { "epoch": 9.378706854642683, "grad_norm": 3.1317245960235596, "learning_rate": 3.1296350364963504e-06, "loss": 1.4602, "step": 482300 }, { "epoch": 9.38065143412737, "grad_norm": 1.975581169128418, "learning_rate": 3.119902676399027e-06, "loss": 1.3855, "step": 482400 }, { "epoch": 9.382596013612057, "grad_norm": 4.630918502807617, "learning_rate": 3.110170316301703e-06, "loss": 1.453, "step": 482500 }, { "epoch": 9.384540593096743, "grad_norm": 5.208189487457275, "learning_rate": 3.1004379562043798e-06, "loss": 1.4849, "step": 482600 }, { "epoch": 9.386485172581429, "grad_norm": 9.226777076721191, "learning_rate": 3.090705596107056e-06, "loss": 1.5082, "step": 482700 }, { "epoch": 9.388429752066116, "grad_norm": 4.073162078857422, "learning_rate": 3.0809732360097325e-06, "loss": 1.4823, "step": 482800 }, { "epoch": 9.390374331550802, "grad_norm": 4.054359436035156, "learning_rate": 3.0712408759124087e-06, "loss": 1.4801, "step": 482900 }, { "epoch": 9.392318911035488, "grad_norm": 1.498626470565796, "learning_rate": 3.0615085158150853e-06, "loss": 1.387, "step": 483000 }, { "epoch": 9.394263490520174, "grad_norm": 3.2660186290740967, "learning_rate": 3.051873479318735e-06, "loss": 1.4065, "step": 483100 }, { "epoch": 9.396208070004862, "grad_norm": 6.896129131317139, "learning_rate": 3.0421411192214113e-06, "loss": 1.3446, "step": 483200 }, { "epoch": 9.398152649489548, "grad_norm": 5.149844646453857, "learning_rate": 3.032408759124088e-06, "loss": 1.5511, "step": 483300 }, { "epoch": 9.400097228974234, "grad_norm": 3.7908151149749756, "learning_rate": 3.022676399026764e-06, "loss": 1.5453, "step": 483400 }, { "epoch": 9.40204180845892, "grad_norm": 4.7801618576049805, "learning_rate": 3.0129440389294402e-06, "loss": 1.4575, "step": 483500 }, { "epoch": 9.403986387943608, "grad_norm": 3.732755661010742, "learning_rate": 3.003211678832117e-06, "loss": 1.5867, "step": 483600 }, { "epoch": 9.405930967428294, "grad_norm": 2.9124815464019775, "learning_rate": 2.9934793187347934e-06, "loss": 1.6093, "step": 483700 }, { "epoch": 9.40787554691298, "grad_norm": 3.490593910217285, "learning_rate": 2.98374695863747e-06, "loss": 1.3904, "step": 483800 }, { "epoch": 9.409820126397667, "grad_norm": 5.240288734436035, "learning_rate": 2.974014598540146e-06, "loss": 1.4614, "step": 483900 }, { "epoch": 9.411764705882353, "grad_norm": 4.187028408050537, "learning_rate": 2.9642822384428228e-06, "loss": 1.4062, "step": 484000 }, { "epoch": 9.41370928536704, "grad_norm": 9.285012245178223, "learning_rate": 2.954549878345499e-06, "loss": 1.5207, "step": 484100 }, { "epoch": 9.415653864851725, "grad_norm": 4.620297431945801, "learning_rate": 2.9448175182481755e-06, "loss": 1.538, "step": 484200 }, { "epoch": 9.417598444336413, "grad_norm": 4.404288291931152, "learning_rate": 2.9350851581508517e-06, "loss": 1.3295, "step": 484300 }, { "epoch": 9.419543023821099, "grad_norm": 1.7184360027313232, "learning_rate": 2.925352798053528e-06, "loss": 1.4838, "step": 484400 }, { "epoch": 9.421487603305785, "grad_norm": 7.949393272399902, "learning_rate": 2.9156204379562045e-06, "loss": 1.4694, "step": 484500 }, { "epoch": 9.42343218279047, "grad_norm": 5.303266525268555, "learning_rate": 2.9058880778588807e-06, "loss": 1.4904, "step": 484600 }, { "epoch": 9.425376762275159, "grad_norm": 6.096667289733887, "learning_rate": 2.8961557177615573e-06, "loss": 1.4137, "step": 484700 }, { "epoch": 9.427321341759844, "grad_norm": 3.5722298622131348, "learning_rate": 2.8864233576642334e-06, "loss": 1.5367, "step": 484800 }, { "epoch": 9.42926592124453, "grad_norm": 6.105239391326904, "learning_rate": 2.8766909975669104e-06, "loss": 1.388, "step": 484900 }, { "epoch": 9.431210500729218, "grad_norm": 12.162073135375977, "learning_rate": 2.8669586374695866e-06, "loss": 1.3586, "step": 485000 }, { "epoch": 9.431210500729218, "eval_accuracy": 0.5547444444444445, "eval_f1": 0.5427247798214804, "eval_loss": 1.179588794708252, "eval_precision": 0.5646443503612556, "eval_recall": 0.5547444444444445, "eval_runtime": 11700.6159, "eval_samples_per_second": 15.384, "eval_steps_per_second": 0.481, "step": 485000 }, { "epoch": 9.433155080213904, "grad_norm": 7.432811260223389, "learning_rate": 2.8572262773722632e-06, "loss": 1.3921, "step": 485100 }, { "epoch": 9.43509965969859, "grad_norm": 4.6151885986328125, "learning_rate": 2.8475912408759126e-06, "loss": 1.4229, "step": 485200 }, { "epoch": 9.437044239183276, "grad_norm": 6.413217544555664, "learning_rate": 2.8378588807785888e-06, "loss": 1.5871, "step": 485300 }, { "epoch": 9.438988818667964, "grad_norm": 6.404345989227295, "learning_rate": 2.8281265206812654e-06, "loss": 1.4576, "step": 485400 }, { "epoch": 9.44093339815265, "grad_norm": 3.1728672981262207, "learning_rate": 2.818394160583942e-06, "loss": 1.3697, "step": 485500 }, { "epoch": 9.442877977637336, "grad_norm": 5.572206974029541, "learning_rate": 2.808661800486618e-06, "loss": 1.65, "step": 485600 }, { "epoch": 9.444822557122022, "grad_norm": 8.278780937194824, "learning_rate": 2.7989294403892947e-06, "loss": 1.4911, "step": 485700 }, { "epoch": 9.44676713660671, "grad_norm": 3.0510194301605225, "learning_rate": 2.789197080291971e-06, "loss": 1.3977, "step": 485800 }, { "epoch": 9.448711716091395, "grad_norm": 3.904240131378174, "learning_rate": 2.7794647201946475e-06, "loss": 1.5515, "step": 485900 }, { "epoch": 9.450656295576081, "grad_norm": 5.6559062004089355, "learning_rate": 2.7697323600973237e-06, "loss": 1.5334, "step": 486000 }, { "epoch": 9.452600875060767, "grad_norm": 5.430581569671631, "learning_rate": 2.7600000000000003e-06, "loss": 1.5444, "step": 486100 }, { "epoch": 9.454545454545455, "grad_norm": 9.792162895202637, "learning_rate": 2.7502676399026764e-06, "loss": 1.4912, "step": 486200 }, { "epoch": 9.456490034030141, "grad_norm": 4.088655471801758, "learning_rate": 2.740535279805353e-06, "loss": 1.4415, "step": 486300 }, { "epoch": 9.458434613514827, "grad_norm": 3.0737249851226807, "learning_rate": 2.730802919708029e-06, "loss": 1.3689, "step": 486400 }, { "epoch": 9.460379192999515, "grad_norm": 3.7767817974090576, "learning_rate": 2.7210705596107054e-06, "loss": 1.4571, "step": 486500 }, { "epoch": 9.4623237724842, "grad_norm": 14.393135070800781, "learning_rate": 2.7113381995133824e-06, "loss": 1.4876, "step": 486600 }, { "epoch": 9.464268351968887, "grad_norm": 6.687946319580078, "learning_rate": 2.7016058394160586e-06, "loss": 1.5401, "step": 486700 }, { "epoch": 9.466212931453573, "grad_norm": 1.903867483139038, "learning_rate": 2.691873479318735e-06, "loss": 1.5775, "step": 486800 }, { "epoch": 9.46815751093826, "grad_norm": 3.7257962226867676, "learning_rate": 2.6821411192214113e-06, "loss": 1.5088, "step": 486900 }, { "epoch": 9.470102090422946, "grad_norm": 2.2514877319335938, "learning_rate": 2.672408759124088e-06, "loss": 1.4506, "step": 487000 }, { "epoch": 9.472046669907632, "grad_norm": 9.950082778930664, "learning_rate": 2.662676399026764e-06, "loss": 1.5392, "step": 487100 }, { "epoch": 9.473991249392318, "grad_norm": 3.8903651237487793, "learning_rate": 2.653041362530414e-06, "loss": 1.4865, "step": 487200 }, { "epoch": 9.475935828877006, "grad_norm": 8.798778533935547, "learning_rate": 2.64330900243309e-06, "loss": 1.4198, "step": 487300 }, { "epoch": 9.477880408361692, "grad_norm": 6.665349006652832, "learning_rate": 2.6335766423357667e-06, "loss": 1.41, "step": 487400 }, { "epoch": 9.479824987846378, "grad_norm": 4.13215970993042, "learning_rate": 2.623941605839416e-06, "loss": 1.476, "step": 487500 }, { "epoch": 9.481769567331064, "grad_norm": 4.943906307220459, "learning_rate": 2.6142092457420926e-06, "loss": 1.5145, "step": 487600 }, { "epoch": 9.483714146815752, "grad_norm": 4.209045886993408, "learning_rate": 2.6044768856447692e-06, "loss": 1.5179, "step": 487700 }, { "epoch": 9.485658726300437, "grad_norm": 3.142425060272217, "learning_rate": 2.5947445255474454e-06, "loss": 1.5092, "step": 487800 }, { "epoch": 9.487603305785123, "grad_norm": 3.6042001247406006, "learning_rate": 2.585012165450122e-06, "loss": 1.4424, "step": 487900 }, { "epoch": 9.489547885269811, "grad_norm": 4.886570930480957, "learning_rate": 2.575279805352798e-06, "loss": 1.4926, "step": 488000 }, { "epoch": 9.491492464754497, "grad_norm": 6.125028133392334, "learning_rate": 2.5655474452554748e-06, "loss": 1.4058, "step": 488100 }, { "epoch": 9.493437044239183, "grad_norm": 6.601408958435059, "learning_rate": 2.555815085158151e-06, "loss": 1.3785, "step": 488200 }, { "epoch": 9.495381623723869, "grad_norm": 5.169848442077637, "learning_rate": 2.5460827250608275e-06, "loss": 1.3633, "step": 488300 }, { "epoch": 9.497326203208557, "grad_norm": 3.0870118141174316, "learning_rate": 2.5363503649635037e-06, "loss": 1.5664, "step": 488400 }, { "epoch": 9.499270782693243, "grad_norm": 4.518991947174072, "learning_rate": 2.52661800486618e-06, "loss": 1.5007, "step": 488500 }, { "epoch": 9.501215362177929, "grad_norm": 4.5506463050842285, "learning_rate": 2.5168856447688565e-06, "loss": 1.3959, "step": 488600 }, { "epoch": 9.503159941662615, "grad_norm": 4.086271286010742, "learning_rate": 2.5071532846715326e-06, "loss": 1.5391, "step": 488700 }, { "epoch": 9.505104521147302, "grad_norm": 4.690094470977783, "learning_rate": 2.4974209245742097e-06, "loss": 1.5958, "step": 488800 }, { "epoch": 9.507049100631988, "grad_norm": 6.4785943031311035, "learning_rate": 2.487688564476886e-06, "loss": 1.5794, "step": 488900 }, { "epoch": 9.508993680116674, "grad_norm": 11.36889362335205, "learning_rate": 2.4779562043795624e-06, "loss": 1.3043, "step": 489000 }, { "epoch": 9.51093825960136, "grad_norm": 4.861423492431641, "learning_rate": 2.4682238442822386e-06, "loss": 1.3314, "step": 489100 }, { "epoch": 9.512882839086048, "grad_norm": 3.161161422729492, "learning_rate": 2.458491484184915e-06, "loss": 1.6111, "step": 489200 }, { "epoch": 9.514827418570734, "grad_norm": 3.7282698154449463, "learning_rate": 2.4487591240875914e-06, "loss": 1.5364, "step": 489300 }, { "epoch": 9.51677199805542, "grad_norm": 5.361255168914795, "learning_rate": 2.4390267639902675e-06, "loss": 1.5288, "step": 489400 }, { "epoch": 9.518716577540108, "grad_norm": 2.4172449111938477, "learning_rate": 2.429294403892944e-06, "loss": 1.464, "step": 489500 }, { "epoch": 9.520661157024794, "grad_norm": 10.703693389892578, "learning_rate": 2.4195620437956203e-06, "loss": 1.371, "step": 489600 }, { "epoch": 9.52260573650948, "grad_norm": 8.933009147644043, "learning_rate": 2.409829683698297e-06, "loss": 1.3923, "step": 489700 }, { "epoch": 9.524550315994166, "grad_norm": 3.4789745807647705, "learning_rate": 2.400097323600973e-06, "loss": 1.5307, "step": 489800 }, { "epoch": 9.526494895478853, "grad_norm": 6.331401348114014, "learning_rate": 2.3903649635036497e-06, "loss": 1.4188, "step": 489900 }, { "epoch": 9.52843947496354, "grad_norm": 5.903407573699951, "learning_rate": 2.3806326034063263e-06, "loss": 1.4582, "step": 490000 }, { "epoch": 9.52843947496354, "eval_accuracy": 0.5606, "eval_f1": 0.5484520732537922, "eval_loss": 1.172527551651001, "eval_precision": 0.5635081361794745, "eval_recall": 0.5606000000000001, "eval_runtime": 11693.9349, "eval_samples_per_second": 15.393, "eval_steps_per_second": 0.481, "step": 490000 }, { "epoch": 9.530384054448225, "grad_norm": 15.22344970703125, "learning_rate": 2.3709975669099756e-06, "loss": 1.8428, "step": 490100 }, { "epoch": 9.532328633932911, "grad_norm": 3.385577440261841, "learning_rate": 2.3612652068126522e-06, "loss": 1.4209, "step": 490200 }, { "epoch": 9.534273213417599, "grad_norm": 2.5593855381011963, "learning_rate": 2.3515328467153284e-06, "loss": 1.4373, "step": 490300 }, { "epoch": 9.536217792902285, "grad_norm": 4.48569393157959, "learning_rate": 2.341800486618005e-06, "loss": 1.525, "step": 490400 }, { "epoch": 9.53816237238697, "grad_norm": 4.491428375244141, "learning_rate": 2.3320681265206816e-06, "loss": 1.4328, "step": 490500 }, { "epoch": 9.540106951871659, "grad_norm": 7.457737922668457, "learning_rate": 2.3223357664233578e-06, "loss": 1.4856, "step": 490600 }, { "epoch": 9.542051531356345, "grad_norm": 5.132516860961914, "learning_rate": 2.3126034063260344e-06, "loss": 1.4825, "step": 490700 }, { "epoch": 9.54399611084103, "grad_norm": 7.971620082855225, "learning_rate": 2.3028710462287106e-06, "loss": 1.3146, "step": 490800 }, { "epoch": 9.545940690325716, "grad_norm": 10.797852516174316, "learning_rate": 2.293138686131387e-06, "loss": 1.4187, "step": 490900 }, { "epoch": 9.547885269810404, "grad_norm": 8.40354061126709, "learning_rate": 2.2834063260340633e-06, "loss": 1.3268, "step": 491000 }, { "epoch": 9.54982984929509, "grad_norm": 7.39490270614624, "learning_rate": 2.27367396593674e-06, "loss": 1.5689, "step": 491100 }, { "epoch": 9.551774428779776, "grad_norm": 7.612358570098877, "learning_rate": 2.263941605839416e-06, "loss": 1.4383, "step": 491200 }, { "epoch": 9.553719008264462, "grad_norm": 2.526289463043213, "learning_rate": 2.2542092457420927e-06, "loss": 1.5192, "step": 491300 }, { "epoch": 9.55566358774915, "grad_norm": 7.468990802764893, "learning_rate": 2.244476885644769e-06, "loss": 1.5212, "step": 491400 }, { "epoch": 9.557608167233836, "grad_norm": 10.011000633239746, "learning_rate": 2.234744525547445e-06, "loss": 1.4483, "step": 491500 }, { "epoch": 9.559552746718522, "grad_norm": 5.183506011962891, "learning_rate": 2.2250121654501216e-06, "loss": 1.4756, "step": 491600 }, { "epoch": 9.56149732620321, "grad_norm": 6.665558815002441, "learning_rate": 2.2152798053527982e-06, "loss": 1.7131, "step": 491700 }, { "epoch": 9.563441905687895, "grad_norm": 5.259783744812012, "learning_rate": 2.205547445255475e-06, "loss": 1.4009, "step": 491800 }, { "epoch": 9.565386485172581, "grad_norm": 6.661199569702148, "learning_rate": 2.195815085158151e-06, "loss": 1.4788, "step": 491900 }, { "epoch": 9.567331064657267, "grad_norm": 1.3769772052764893, "learning_rate": 2.1860827250608276e-06, "loss": 1.41, "step": 492000 }, { "epoch": 9.569275644141955, "grad_norm": 5.341777324676514, "learning_rate": 2.176447688564477e-06, "loss": 1.3426, "step": 492100 }, { "epoch": 9.571220223626641, "grad_norm": 4.39161491394043, "learning_rate": 2.1667153284671536e-06, "loss": 1.4723, "step": 492200 }, { "epoch": 9.573164803111327, "grad_norm": 3.933502197265625, "learning_rate": 2.1569829683698297e-06, "loss": 1.5155, "step": 492300 }, { "epoch": 9.575109382596013, "grad_norm": 18.19536781311035, "learning_rate": 2.1472506082725063e-06, "loss": 1.4687, "step": 492400 }, { "epoch": 9.5770539620807, "grad_norm": 4.862549304962158, "learning_rate": 2.1375182481751825e-06, "loss": 1.4297, "step": 492500 }, { "epoch": 9.578998541565387, "grad_norm": 5.979335784912109, "learning_rate": 2.127785888077859e-06, "loss": 1.4215, "step": 492600 }, { "epoch": 9.580943121050073, "grad_norm": 3.655754566192627, "learning_rate": 2.1180535279805353e-06, "loss": 1.6236, "step": 492700 }, { "epoch": 9.582887700534759, "grad_norm": 4.126145839691162, "learning_rate": 2.108321167883212e-06, "loss": 1.4155, "step": 492800 }, { "epoch": 9.584832280019446, "grad_norm": 2.0141749382019043, "learning_rate": 2.098588807785888e-06, "loss": 1.5157, "step": 492900 }, { "epoch": 9.586776859504132, "grad_norm": 3.040306329727173, "learning_rate": 2.0888564476885646e-06, "loss": 1.3939, "step": 493000 }, { "epoch": 9.588721438988818, "grad_norm": 5.4748454093933105, "learning_rate": 2.079124087591241e-06, "loss": 1.4061, "step": 493100 }, { "epoch": 9.590666018473506, "grad_norm": 5.187029838562012, "learning_rate": 2.0693917274939174e-06, "loss": 1.4752, "step": 493200 }, { "epoch": 9.592610597958192, "grad_norm": 5.883757591247559, "learning_rate": 2.059659367396594e-06, "loss": 1.4377, "step": 493300 }, { "epoch": 9.594555177442878, "grad_norm": 3.895289421081543, "learning_rate": 2.04992700729927e-06, "loss": 1.3941, "step": 493400 }, { "epoch": 9.596499756927564, "grad_norm": 2.9567065238952637, "learning_rate": 2.0401946472019468e-06, "loss": 1.6599, "step": 493500 }, { "epoch": 9.598444336412252, "grad_norm": 6.994021892547607, "learning_rate": 2.030462287104623e-06, "loss": 1.5488, "step": 493600 }, { "epoch": 9.600388915896938, "grad_norm": 7.595653057098389, "learning_rate": 2.0207299270072995e-06, "loss": 1.3949, "step": 493700 }, { "epoch": 9.602333495381624, "grad_norm": 3.5919759273529053, "learning_rate": 2.0109975669099757e-06, "loss": 1.508, "step": 493800 }, { "epoch": 9.60427807486631, "grad_norm": 3.262787342071533, "learning_rate": 2.0012652068126523e-06, "loss": 1.3328, "step": 493900 }, { "epoch": 9.606222654350997, "grad_norm": 13.381658554077148, "learning_rate": 1.9915328467153285e-06, "loss": 1.2874, "step": 494000 }, { "epoch": 9.608167233835683, "grad_norm": 1.5138696432113647, "learning_rate": 1.981800486618005e-06, "loss": 1.5483, "step": 494100 }, { "epoch": 9.61011181332037, "grad_norm": 13.03799057006836, "learning_rate": 1.9720681265206812e-06, "loss": 1.4772, "step": 494200 }, { "epoch": 9.612056392805055, "grad_norm": 3.215442419052124, "learning_rate": 1.962335766423358e-06, "loss": 1.436, "step": 494300 }, { "epoch": 9.614000972289743, "grad_norm": 5.409635543823242, "learning_rate": 1.952603406326034e-06, "loss": 1.3589, "step": 494400 }, { "epoch": 9.615945551774429, "grad_norm": 4.085748672485352, "learning_rate": 1.9428710462287106e-06, "loss": 1.3982, "step": 494500 }, { "epoch": 9.617890131259115, "grad_norm": 5.4419684410095215, "learning_rate": 1.933138686131387e-06, "loss": 1.6087, "step": 494600 }, { "epoch": 9.619834710743802, "grad_norm": 8.57927417755127, "learning_rate": 1.9234063260340634e-06, "loss": 1.6132, "step": 494700 }, { "epoch": 9.621779290228488, "grad_norm": 9.136397361755371, "learning_rate": 1.91367396593674e-06, "loss": 1.365, "step": 494800 }, { "epoch": 9.623723869713174, "grad_norm": 5.216989994049072, "learning_rate": 1.9039416058394161e-06, "loss": 1.4749, "step": 494900 }, { "epoch": 9.62566844919786, "grad_norm": 9.482110977172852, "learning_rate": 1.8942092457420925e-06, "loss": 1.439, "step": 495000 }, { "epoch": 9.62566844919786, "eval_accuracy": 0.56495, "eval_f1": 0.5534352258547871, "eval_loss": 1.164296269416809, "eval_precision": 0.5699997967067438, "eval_recall": 0.5649500000000001, "eval_runtime": 11695.5997, "eval_samples_per_second": 15.39, "eval_steps_per_second": 0.481, "step": 495000 }, { "epoch": 9.627613028682548, "grad_norm": 8.233530044555664, "learning_rate": 1.884476885644769e-06, "loss": 1.3187, "step": 495100 }, { "epoch": 9.629557608167234, "grad_norm": 3.7052595615386963, "learning_rate": 1.8747445255474453e-06, "loss": 1.6404, "step": 495200 }, { "epoch": 9.63150218765192, "grad_norm": 5.121022701263428, "learning_rate": 1.8650121654501217e-06, "loss": 1.3903, "step": 495300 }, { "epoch": 9.633446767136606, "grad_norm": 6.092931270599365, "learning_rate": 1.855279805352798e-06, "loss": 1.4707, "step": 495400 }, { "epoch": 9.635391346621294, "grad_norm": 17.34490203857422, "learning_rate": 1.8455474452554745e-06, "loss": 1.4688, "step": 495500 }, { "epoch": 9.63733592610598, "grad_norm": 5.244154453277588, "learning_rate": 1.8358150851581508e-06, "loss": 1.5427, "step": 495600 }, { "epoch": 9.639280505590666, "grad_norm": 3.5735325813293457, "learning_rate": 1.8260827250608274e-06, "loss": 1.5014, "step": 495700 }, { "epoch": 9.641225085075352, "grad_norm": 3.5131325721740723, "learning_rate": 1.8163503649635038e-06, "loss": 1.3716, "step": 495800 }, { "epoch": 9.64316966456004, "grad_norm": 11.508795738220215, "learning_rate": 1.8066180048661802e-06, "loss": 1.376, "step": 495900 }, { "epoch": 9.645114244044725, "grad_norm": 2.5179877281188965, "learning_rate": 1.7968856447688566e-06, "loss": 1.4453, "step": 496000 }, { "epoch": 9.647058823529411, "grad_norm": 4.83087158203125, "learning_rate": 1.787250608272506e-06, "loss": 1.619, "step": 496100 }, { "epoch": 9.649003403014099, "grad_norm": 4.44631290435791, "learning_rate": 1.7775182481751828e-06, "loss": 1.3224, "step": 496200 }, { "epoch": 9.650947982498785, "grad_norm": 2.9442498683929443, "learning_rate": 1.7677858880778592e-06, "loss": 1.5906, "step": 496300 }, { "epoch": 9.652892561983471, "grad_norm": 6.763004779815674, "learning_rate": 1.7580535279805355e-06, "loss": 1.5545, "step": 496400 }, { "epoch": 9.654837141468157, "grad_norm": 4.237571716308594, "learning_rate": 1.748321167883212e-06, "loss": 1.5698, "step": 496500 }, { "epoch": 9.656781720952845, "grad_norm": 4.524792194366455, "learning_rate": 1.7385888077858883e-06, "loss": 1.356, "step": 496600 }, { "epoch": 9.65872630043753, "grad_norm": 2.6222996711730957, "learning_rate": 1.7288564476885645e-06, "loss": 1.4075, "step": 496700 }, { "epoch": 9.660670879922217, "grad_norm": 4.2565789222717285, "learning_rate": 1.7191240875912409e-06, "loss": 1.3973, "step": 496800 }, { "epoch": 9.662615459406902, "grad_norm": 6.216490745544434, "learning_rate": 1.7093917274939172e-06, "loss": 1.4951, "step": 496900 }, { "epoch": 9.66456003889159, "grad_norm": 5.9319987297058105, "learning_rate": 1.6996593673965936e-06, "loss": 1.3761, "step": 497000 }, { "epoch": 9.666504618376276, "grad_norm": 5.9118523597717285, "learning_rate": 1.68992700729927e-06, "loss": 1.3902, "step": 497100 }, { "epoch": 9.668449197860962, "grad_norm": 3.3750157356262207, "learning_rate": 1.6801946472019464e-06, "loss": 1.379, "step": 497200 }, { "epoch": 9.670393777345648, "grad_norm": 4.806015491485596, "learning_rate": 1.6705596107055962e-06, "loss": 1.5751, "step": 497300 }, { "epoch": 9.672338356830336, "grad_norm": 2.991344928741455, "learning_rate": 1.6608272506082726e-06, "loss": 1.5903, "step": 497400 }, { "epoch": 9.674282936315022, "grad_norm": 2.6493308544158936, "learning_rate": 1.651094890510949e-06, "loss": 1.6733, "step": 497500 }, { "epoch": 9.676227515799708, "grad_norm": 8.645270347595215, "learning_rate": 1.6413625304136253e-06, "loss": 1.3682, "step": 497600 }, { "epoch": 9.678172095284395, "grad_norm": 5.3393402099609375, "learning_rate": 1.6316301703163017e-06, "loss": 1.3855, "step": 497700 }, { "epoch": 9.680116674769081, "grad_norm": 4.3428635597229, "learning_rate": 1.6218978102189781e-06, "loss": 1.374, "step": 497800 }, { "epoch": 9.682061254253767, "grad_norm": 2.623297929763794, "learning_rate": 1.6121654501216547e-06, "loss": 1.5202, "step": 497900 }, { "epoch": 9.684005833738453, "grad_norm": 6.496540546417236, "learning_rate": 1.602433090024331e-06, "loss": 1.3885, "step": 498000 }, { "epoch": 9.685950413223141, "grad_norm": 6.453309059143066, "learning_rate": 1.5927007299270075e-06, "loss": 1.4865, "step": 498100 }, { "epoch": 9.687894992707827, "grad_norm": 4.179862022399902, "learning_rate": 1.5829683698296839e-06, "loss": 1.3887, "step": 498200 }, { "epoch": 9.689839572192513, "grad_norm": 8.680489540100098, "learning_rate": 1.5732360097323602e-06, "loss": 1.6549, "step": 498300 }, { "epoch": 9.691784151677199, "grad_norm": 5.699830055236816, "learning_rate": 1.5635036496350366e-06, "loss": 1.4697, "step": 498400 }, { "epoch": 9.693728731161887, "grad_norm": 4.527194499969482, "learning_rate": 1.553771289537713e-06, "loss": 1.3187, "step": 498500 }, { "epoch": 9.695673310646573, "grad_norm": 9.063657760620117, "learning_rate": 1.5440389294403894e-06, "loss": 1.2901, "step": 498600 }, { "epoch": 9.697617890131259, "grad_norm": 4.19493293762207, "learning_rate": 1.5343065693430658e-06, "loss": 1.5947, "step": 498700 }, { "epoch": 9.699562469615946, "grad_norm": 4.128293991088867, "learning_rate": 1.5245742092457422e-06, "loss": 1.3966, "step": 498800 }, { "epoch": 9.701507049100632, "grad_norm": 3.22719144821167, "learning_rate": 1.5148418491484186e-06, "loss": 1.5644, "step": 498900 }, { "epoch": 9.703451628585318, "grad_norm": 2.5675411224365234, "learning_rate": 1.505109489051095e-06, "loss": 1.437, "step": 499000 }, { "epoch": 9.705396208070004, "grad_norm": 2.6379926204681396, "learning_rate": 1.4953771289537713e-06, "loss": 1.3679, "step": 499100 }, { "epoch": 9.707340787554692, "grad_norm": 3.498093605041504, "learning_rate": 1.4856447688564477e-06, "loss": 1.4323, "step": 499200 }, { "epoch": 9.709285367039378, "grad_norm": 2.867985248565674, "learning_rate": 1.475912408759124e-06, "loss": 1.5846, "step": 499300 }, { "epoch": 9.711229946524064, "grad_norm": 4.686440944671631, "learning_rate": 1.4661800486618007e-06, "loss": 1.3913, "step": 499400 }, { "epoch": 9.71317452600875, "grad_norm": 6.427061557769775, "learning_rate": 1.456447688564477e-06, "loss": 1.4852, "step": 499500 }, { "epoch": 9.715119105493438, "grad_norm": 3.9726247787475586, "learning_rate": 1.4467153284671532e-06, "loss": 1.4058, "step": 499600 }, { "epoch": 9.717063684978124, "grad_norm": 3.472531318664551, "learning_rate": 1.4369829683698296e-06, "loss": 1.3848, "step": 499700 }, { "epoch": 9.71900826446281, "grad_norm": 3.247661828994751, "learning_rate": 1.427250608272506e-06, "loss": 1.3286, "step": 499800 }, { "epoch": 9.720952843947497, "grad_norm": 5.334167003631592, "learning_rate": 1.4175182481751826e-06, "loss": 1.3761, "step": 499900 }, { "epoch": 9.722897423432183, "grad_norm": 3.4059395790100098, "learning_rate": 1.407785888077859e-06, "loss": 1.4671, "step": 500000 }, { "epoch": 9.722897423432183, "eval_accuracy": 0.5617166666666666, "eval_f1": 0.5495215190577568, "eval_loss": 1.168825387954712, "eval_precision": 0.5666776395773699, "eval_recall": 0.5617166666666666, "eval_runtime": 11713.0806, "eval_samples_per_second": 15.367, "eval_steps_per_second": 0.48, "step": 500000 }, { "epoch": 9.72484200291687, "grad_norm": 9.32169246673584, "learning_rate": 1.3980535279805354e-06, "loss": 1.4248, "step": 500100 }, { "epoch": 9.726786582401555, "grad_norm": 12.363140106201172, "learning_rate": 1.3883211678832118e-06, "loss": 1.3962, "step": 500200 }, { "epoch": 9.728731161886243, "grad_norm": 14.681207656860352, "learning_rate": 1.3785888077858882e-06, "loss": 1.3719, "step": 500300 }, { "epoch": 9.730675741370929, "grad_norm": 3.9496610164642334, "learning_rate": 1.3688564476885645e-06, "loss": 1.3181, "step": 500400 }, { "epoch": 9.732620320855615, "grad_norm": 9.73677921295166, "learning_rate": 1.359124087591241e-06, "loss": 1.541, "step": 500500 }, { "epoch": 9.7345649003403, "grad_norm": 5.298023700714111, "learning_rate": 1.3493917274939173e-06, "loss": 1.3373, "step": 500600 }, { "epoch": 9.736509479824988, "grad_norm": 2.9722185134887695, "learning_rate": 1.3396593673965937e-06, "loss": 1.5289, "step": 500700 }, { "epoch": 9.738454059309674, "grad_norm": 5.976816654205322, "learning_rate": 1.32992700729927e-06, "loss": 1.4469, "step": 500800 }, { "epoch": 9.74039863879436, "grad_norm": 4.181572914123535, "learning_rate": 1.3201946472019465e-06, "loss": 1.3535, "step": 500900 }, { "epoch": 9.742343218279046, "grad_norm": 4.400452613830566, "learning_rate": 1.3104622871046228e-06, "loss": 1.3408, "step": 501000 }, { "epoch": 9.744287797763734, "grad_norm": 6.752807140350342, "learning_rate": 1.3008272506082726e-06, "loss": 1.4512, "step": 501100 }, { "epoch": 9.74623237724842, "grad_norm": 3.0906834602355957, "learning_rate": 1.291094890510949e-06, "loss": 1.4485, "step": 501200 }, { "epoch": 9.748176956733106, "grad_norm": 6.4581098556518555, "learning_rate": 1.2813625304136254e-06, "loss": 1.4172, "step": 501300 }, { "epoch": 9.750121536217794, "grad_norm": 5.747981071472168, "learning_rate": 1.2716301703163018e-06, "loss": 1.5185, "step": 501400 }, { "epoch": 9.75206611570248, "grad_norm": 17.66242790222168, "learning_rate": 1.2618978102189782e-06, "loss": 1.4729, "step": 501500 }, { "epoch": 9.754010695187166, "grad_norm": 5.452305793762207, "learning_rate": 1.2521654501216546e-06, "loss": 1.4093, "step": 501600 }, { "epoch": 9.755955274671852, "grad_norm": 3.026542901992798, "learning_rate": 1.242433090024331e-06, "loss": 1.3882, "step": 501700 }, { "epoch": 9.75789985415654, "grad_norm": 6.439193248748779, "learning_rate": 1.2327007299270073e-06, "loss": 1.5208, "step": 501800 }, { "epoch": 9.759844433641225, "grad_norm": 4.9976301193237305, "learning_rate": 1.2229683698296837e-06, "loss": 1.596, "step": 501900 }, { "epoch": 9.761789013125911, "grad_norm": 4.985130786895752, "learning_rate": 1.21323600973236e-06, "loss": 1.4834, "step": 502000 }, { "epoch": 9.763733592610597, "grad_norm": 3.2169933319091797, "learning_rate": 1.2035036496350365e-06, "loss": 1.4871, "step": 502100 }, { "epoch": 9.765678172095285, "grad_norm": 4.477692127227783, "learning_rate": 1.193771289537713e-06, "loss": 1.5228, "step": 502200 }, { "epoch": 9.767622751579971, "grad_norm": 12.357294082641602, "learning_rate": 1.1840389294403895e-06, "loss": 1.5244, "step": 502300 }, { "epoch": 9.769567331064657, "grad_norm": 11.576659202575684, "learning_rate": 1.1743065693430658e-06, "loss": 1.5433, "step": 502400 }, { "epoch": 9.771511910549343, "grad_norm": 12.242835998535156, "learning_rate": 1.1645742092457422e-06, "loss": 1.6094, "step": 502500 }, { "epoch": 9.77345649003403, "grad_norm": 5.696420669555664, "learning_rate": 1.1548418491484184e-06, "loss": 1.3927, "step": 502600 }, { "epoch": 9.775401069518717, "grad_norm": 4.0394086837768555, "learning_rate": 1.1451094890510948e-06, "loss": 1.4225, "step": 502700 }, { "epoch": 9.777345649003403, "grad_norm": 4.384671211242676, "learning_rate": 1.1353771289537714e-06, "loss": 1.418, "step": 502800 }, { "epoch": 9.77929022848809, "grad_norm": 2.790855646133423, "learning_rate": 1.1256447688564478e-06, "loss": 1.4602, "step": 502900 }, { "epoch": 9.781234807972776, "grad_norm": 3.315758466720581, "learning_rate": 1.1159124087591242e-06, "loss": 1.3551, "step": 503000 }, { "epoch": 9.783179387457462, "grad_norm": 4.120906829833984, "learning_rate": 1.1061800486618005e-06, "loss": 1.4332, "step": 503100 }, { "epoch": 9.785123966942148, "grad_norm": 2.327486991882324, "learning_rate": 1.096447688564477e-06, "loss": 1.417, "step": 503200 }, { "epoch": 9.787068546426836, "grad_norm": 2.8314340114593506, "learning_rate": 1.0867153284671533e-06, "loss": 1.6033, "step": 503300 }, { "epoch": 9.789013125911522, "grad_norm": 1.760416865348816, "learning_rate": 1.07698296836983e-06, "loss": 1.4473, "step": 503400 }, { "epoch": 9.790957705396208, "grad_norm": 7.398601531982422, "learning_rate": 1.067250608272506e-06, "loss": 1.3753, "step": 503500 }, { "epoch": 9.792902284880894, "grad_norm": 6.785481929779053, "learning_rate": 1.0575182481751825e-06, "loss": 1.4597, "step": 503600 }, { "epoch": 9.794846864365581, "grad_norm": 9.971598625183105, "learning_rate": 1.0477858880778588e-06, "loss": 1.3713, "step": 503700 }, { "epoch": 9.796791443850267, "grad_norm": 5.188291072845459, "learning_rate": 1.0380535279805352e-06, "loss": 1.4876, "step": 503800 }, { "epoch": 9.798736023334953, "grad_norm": 4.120114803314209, "learning_rate": 1.0283211678832116e-06, "loss": 1.6367, "step": 503900 }, { "epoch": 9.80068060281964, "grad_norm": 2.117976665496826, "learning_rate": 1.0185888077858882e-06, "loss": 1.4596, "step": 504000 }, { "epoch": 9.802625182304327, "grad_norm": 5.730849742889404, "learning_rate": 1.0088564476885646e-06, "loss": 1.4738, "step": 504100 }, { "epoch": 9.804569761789013, "grad_norm": 4.389968395233154, "learning_rate": 9.99124087591241e-07, "loss": 1.5165, "step": 504200 }, { "epoch": 9.806514341273699, "grad_norm": 2.864900588989258, "learning_rate": 9.893917274939174e-07, "loss": 1.5046, "step": 504300 }, { "epoch": 9.808458920758387, "grad_norm": 4.5152764320373535, "learning_rate": 9.796593673965937e-07, "loss": 1.4615, "step": 504400 }, { "epoch": 9.810403500243073, "grad_norm": 3.026303768157959, "learning_rate": 9.699270072992701e-07, "loss": 1.3658, "step": 504500 }, { "epoch": 9.812348079727759, "grad_norm": 4.440712928771973, "learning_rate": 9.601946472019465e-07, "loss": 1.4231, "step": 504600 }, { "epoch": 9.814292659212445, "grad_norm": 3.647207736968994, "learning_rate": 9.504622871046229e-07, "loss": 1.4537, "step": 504700 }, { "epoch": 9.816237238697132, "grad_norm": 3.6174798011779785, "learning_rate": 9.407299270072993e-07, "loss": 1.4252, "step": 504800 }, { "epoch": 9.818181818181818, "grad_norm": 7.081727981567383, "learning_rate": 9.309975669099757e-07, "loss": 1.2888, "step": 504900 }, { "epoch": 9.820126397666504, "grad_norm": 5.355539321899414, "learning_rate": 9.212652068126521e-07, "loss": 1.4149, "step": 505000 }, { "epoch": 9.820126397666504, "eval_accuracy": 0.5652111111111111, "eval_f1": 0.5534724341212299, "eval_loss": 1.1640022993087769, "eval_precision": 0.5662023702424617, "eval_recall": 0.5652111111111111, "eval_runtime": 11693.6528, "eval_samples_per_second": 15.393, "eval_steps_per_second": 0.481, "step": 505000 }, { "epoch": 9.82207097715119, "grad_norm": 2.8822076320648193, "learning_rate": 9.116301703163018e-07, "loss": 1.3792, "step": 505100 }, { "epoch": 9.824015556635878, "grad_norm": 8.059639930725098, "learning_rate": 9.018978102189781e-07, "loss": 1.5647, "step": 505200 }, { "epoch": 9.825960136120564, "grad_norm": 4.043630123138428, "learning_rate": 8.921654501216545e-07, "loss": 1.3389, "step": 505300 }, { "epoch": 9.82790471560525, "grad_norm": 5.7546281814575195, "learning_rate": 8.824330900243309e-07, "loss": 1.5204, "step": 505400 }, { "epoch": 9.829849295089936, "grad_norm": 5.627684593200684, "learning_rate": 8.727007299270073e-07, "loss": 1.5262, "step": 505500 }, { "epoch": 9.831793874574624, "grad_norm": 2.168923854827881, "learning_rate": 8.629683698296837e-07, "loss": 1.4836, "step": 505600 }, { "epoch": 9.83373845405931, "grad_norm": 3.648069143295288, "learning_rate": 8.532360097323602e-07, "loss": 1.4269, "step": 505700 }, { "epoch": 9.835683033543996, "grad_norm": 18.566469192504883, "learning_rate": 8.435036496350365e-07, "loss": 1.3639, "step": 505800 }, { "epoch": 9.837627613028683, "grad_norm": 5.280477523803711, "learning_rate": 8.337712895377129e-07, "loss": 1.3963, "step": 505900 }, { "epoch": 9.83957219251337, "grad_norm": 22.299678802490234, "learning_rate": 8.240389294403893e-07, "loss": 1.4652, "step": 506000 }, { "epoch": 9.841516771998055, "grad_norm": 2.2540016174316406, "learning_rate": 8.143065693430657e-07, "loss": 1.4458, "step": 506100 }, { "epoch": 9.843461351482741, "grad_norm": 5.478411674499512, "learning_rate": 8.045742092457422e-07, "loss": 1.4779, "step": 506200 }, { "epoch": 9.845405930967429, "grad_norm": 4.330811023712158, "learning_rate": 7.948418491484186e-07, "loss": 1.38, "step": 506300 }, { "epoch": 9.847350510452115, "grad_norm": 6.54302453994751, "learning_rate": 7.85109489051095e-07, "loss": 1.3943, "step": 506400 }, { "epoch": 9.8492950899368, "grad_norm": 5.836950778961182, "learning_rate": 7.753771289537713e-07, "loss": 1.4036, "step": 506500 }, { "epoch": 9.851239669421489, "grad_norm": 6.2962446212768555, "learning_rate": 7.656447688564477e-07, "loss": 1.3795, "step": 506600 }, { "epoch": 9.853184248906175, "grad_norm": 5.919554233551025, "learning_rate": 7.559124087591241e-07, "loss": 1.3321, "step": 506700 }, { "epoch": 9.85512882839086, "grad_norm": 4.183110237121582, "learning_rate": 7.461800486618005e-07, "loss": 1.5074, "step": 506800 }, { "epoch": 9.857073407875546, "grad_norm": 4.448294162750244, "learning_rate": 7.364476885644769e-07, "loss": 1.4757, "step": 506900 }, { "epoch": 9.859017987360234, "grad_norm": 8.109779357910156, "learning_rate": 7.267153284671534e-07, "loss": 1.4396, "step": 507000 }, { "epoch": 9.86096256684492, "grad_norm": 3.9925172328948975, "learning_rate": 7.170802919708029e-07, "loss": 1.4964, "step": 507100 }, { "epoch": 9.862907146329606, "grad_norm": 8.504049301147461, "learning_rate": 7.073479318734793e-07, "loss": 1.3379, "step": 507200 }, { "epoch": 9.864851725814292, "grad_norm": 5.758234977722168, "learning_rate": 6.976155717761557e-07, "loss": 1.5863, "step": 507300 }, { "epoch": 9.86679630529898, "grad_norm": 3.3473548889160156, "learning_rate": 6.878832116788321e-07, "loss": 1.6136, "step": 507400 }, { "epoch": 9.868740884783666, "grad_norm": 4.12747049331665, "learning_rate": 6.781508515815086e-07, "loss": 1.7647, "step": 507500 }, { "epoch": 9.870685464268352, "grad_norm": 5.799735069274902, "learning_rate": 6.68418491484185e-07, "loss": 1.5141, "step": 507600 }, { "epoch": 9.872630043753038, "grad_norm": 8.76323127746582, "learning_rate": 6.586861313868614e-07, "loss": 1.4756, "step": 507700 }, { "epoch": 9.874574623237725, "grad_norm": 2.8343746662139893, "learning_rate": 6.489537712895377e-07, "loss": 1.399, "step": 507800 }, { "epoch": 9.876519202722411, "grad_norm": 4.801392555236816, "learning_rate": 6.392214111922141e-07, "loss": 1.4404, "step": 507900 }, { "epoch": 9.878463782207097, "grad_norm": 4.436782360076904, "learning_rate": 6.294890510948905e-07, "loss": 1.5881, "step": 508000 }, { "epoch": 9.880408361691785, "grad_norm": 2.1621651649475098, "learning_rate": 6.19756690997567e-07, "loss": 1.3844, "step": 508100 }, { "epoch": 9.882352941176471, "grad_norm": 12.113930702209473, "learning_rate": 6.100243309002433e-07, "loss": 1.4149, "step": 508200 }, { "epoch": 9.884297520661157, "grad_norm": 4.372655391693115, "learning_rate": 6.002919708029197e-07, "loss": 1.839, "step": 508300 }, { "epoch": 9.886242100145843, "grad_norm": 2.7626302242279053, "learning_rate": 5.905596107055962e-07, "loss": 1.3849, "step": 508400 }, { "epoch": 9.88818667963053, "grad_norm": 5.111601829528809, "learning_rate": 5.808272506082725e-07, "loss": 1.4507, "step": 508500 }, { "epoch": 9.890131259115217, "grad_norm": 5.59827995300293, "learning_rate": 5.710948905109489e-07, "loss": 1.6174, "step": 508600 }, { "epoch": 9.892075838599903, "grad_norm": 5.309809684753418, "learning_rate": 5.613625304136253e-07, "loss": 1.4811, "step": 508700 }, { "epoch": 9.894020418084589, "grad_norm": 2.8330886363983154, "learning_rate": 5.516301703163017e-07, "loss": 1.3634, "step": 508800 }, { "epoch": 9.895964997569276, "grad_norm": 3.832893133163452, "learning_rate": 5.418978102189782e-07, "loss": 1.4031, "step": 508900 }, { "epoch": 9.897909577053962, "grad_norm": 4.366866111755371, "learning_rate": 5.321654501216546e-07, "loss": 1.3831, "step": 509000 }, { "epoch": 9.899854156538648, "grad_norm": 6.460314750671387, "learning_rate": 5.225304136253042e-07, "loss": 1.4236, "step": 509100 }, { "epoch": 9.901798736023334, "grad_norm": 11.325443267822266, "learning_rate": 5.127980535279805e-07, "loss": 1.5583, "step": 509200 }, { "epoch": 9.903743315508022, "grad_norm": 3.3453681468963623, "learning_rate": 5.030656934306569e-07, "loss": 1.5861, "step": 509300 }, { "epoch": 9.905687894992708, "grad_norm": 4.594818592071533, "learning_rate": 4.933333333333333e-07, "loss": 1.4022, "step": 509400 }, { "epoch": 9.907632474477394, "grad_norm": 2.3997511863708496, "learning_rate": 4.836009732360098e-07, "loss": 1.3111, "step": 509500 }, { "epoch": 9.909577053962082, "grad_norm": 5.861173152923584, "learning_rate": 4.738686131386861e-07, "loss": 1.5259, "step": 509600 }, { "epoch": 9.911521633446768, "grad_norm": 3.567429304122925, "learning_rate": 4.641362530413625e-07, "loss": 1.4225, "step": 509700 }, { "epoch": 9.913466212931453, "grad_norm": 6.693932056427002, "learning_rate": 4.5440389294403895e-07, "loss": 1.4111, "step": 509800 }, { "epoch": 9.91541079241614, "grad_norm": 2.095886468887329, "learning_rate": 4.4467153284671533e-07, "loss": 1.4004, "step": 509900 }, { "epoch": 9.917355371900827, "grad_norm": 6.709057331085205, "learning_rate": 4.349391727493917e-07, "loss": 1.5227, "step": 510000 }, { "epoch": 9.917355371900827, "eval_accuracy": 0.5645888888888889, "eval_f1": 0.5530578176790054, "eval_loss": 1.1633964776992798, "eval_precision": 0.5685691361035028, "eval_recall": 0.5645888888888889, "eval_runtime": 11700.5815, "eval_samples_per_second": 15.384, "eval_steps_per_second": 0.481, "step": 510000 }, { "epoch": 9.919299951385513, "grad_norm": 3.1799023151397705, "learning_rate": 4.2520681265206815e-07, "loss": 1.3823, "step": 510100 }, { "epoch": 9.921244530870199, "grad_norm": 4.009042263031006, "learning_rate": 4.1547445255474454e-07, "loss": 1.3879, "step": 510200 }, { "epoch": 9.923189110354885, "grad_norm": 8.33570384979248, "learning_rate": 4.05742092457421e-07, "loss": 1.4563, "step": 510300 }, { "epoch": 9.925133689839573, "grad_norm": 4.384056091308594, "learning_rate": 3.9600973236009736e-07, "loss": 1.513, "step": 510400 }, { "epoch": 9.927078269324259, "grad_norm": 7.421847343444824, "learning_rate": 3.8627737226277375e-07, "loss": 1.3911, "step": 510500 }, { "epoch": 9.929022848808945, "grad_norm": 1.3759781122207642, "learning_rate": 3.7654501216545013e-07, "loss": 1.5135, "step": 510600 }, { "epoch": 9.93096742829363, "grad_norm": 4.708298683166504, "learning_rate": 3.6681265206812657e-07, "loss": 1.449, "step": 510700 }, { "epoch": 9.932912007778318, "grad_norm": 4.764796733856201, "learning_rate": 3.570802919708029e-07, "loss": 1.6107, "step": 510800 }, { "epoch": 9.934856587263004, "grad_norm": 3.7432031631469727, "learning_rate": 3.4734793187347934e-07, "loss": 1.4198, "step": 510900 }, { "epoch": 9.93680116674769, "grad_norm": 12.9108304977417, "learning_rate": 3.376155717761557e-07, "loss": 1.5311, "step": 511000 }, { "epoch": 9.938745746232378, "grad_norm": 6.556179046630859, "learning_rate": 3.278832116788321e-07, "loss": 1.4423, "step": 511100 }, { "epoch": 9.940690325717064, "grad_norm": 3.1468379497528076, "learning_rate": 3.1815085158150854e-07, "loss": 1.5079, "step": 511200 }, { "epoch": 9.94263490520175, "grad_norm": 6.641810417175293, "learning_rate": 3.0841849148418493e-07, "loss": 1.3465, "step": 511300 }, { "epoch": 9.944579484686436, "grad_norm": 3.819746255874634, "learning_rate": 2.9868613138686137e-07, "loss": 1.3329, "step": 511400 }, { "epoch": 9.946524064171124, "grad_norm": 5.6493120193481445, "learning_rate": 2.889537712895377e-07, "loss": 1.5089, "step": 511500 }, { "epoch": 9.94846864365581, "grad_norm": 5.250295162200928, "learning_rate": 2.7922141119221413e-07, "loss": 1.3826, "step": 511600 }, { "epoch": 9.950413223140496, "grad_norm": 4.307497024536133, "learning_rate": 2.694890510948905e-07, "loss": 1.3914, "step": 511700 }, { "epoch": 9.952357802625182, "grad_norm": 6.67548131942749, "learning_rate": 2.597566909975669e-07, "loss": 1.3691, "step": 511800 }, { "epoch": 9.95430238210987, "grad_norm": 9.785871505737305, "learning_rate": 2.5002433090024334e-07, "loss": 1.3961, "step": 511900 }, { "epoch": 9.956246961594555, "grad_norm": 6.880806922912598, "learning_rate": 2.402919708029197e-07, "loss": 1.6011, "step": 512000 }, { "epoch": 9.958191541079241, "grad_norm": 5.766727924346924, "learning_rate": 2.305596107055961e-07, "loss": 1.3594, "step": 512100 }, { "epoch": 9.960136120563927, "grad_norm": 5.029293060302734, "learning_rate": 2.2082725060827252e-07, "loss": 1.529, "step": 512200 }, { "epoch": 9.962080700048615, "grad_norm": 4.742208480834961, "learning_rate": 2.1109489051094893e-07, "loss": 1.3686, "step": 512300 }, { "epoch": 9.9640252795333, "grad_norm": 5.4627509117126465, "learning_rate": 2.0145985401459857e-07, "loss": 1.4649, "step": 512400 }, { "epoch": 9.965969859017987, "grad_norm": 6.061867713928223, "learning_rate": 1.9172749391727495e-07, "loss": 1.4218, "step": 512500 }, { "epoch": 9.967914438502675, "grad_norm": 8.732996940612793, "learning_rate": 1.8199513381995133e-07, "loss": 1.5197, "step": 512600 }, { "epoch": 9.96985901798736, "grad_norm": 2.3442013263702393, "learning_rate": 1.7226277372262775e-07, "loss": 1.4059, "step": 512700 }, { "epoch": 9.971803597472046, "grad_norm": 4.697150707244873, "learning_rate": 1.6253041362530413e-07, "loss": 1.5319, "step": 512800 }, { "epoch": 9.973748176956732, "grad_norm": 4.8477349281311035, "learning_rate": 1.5279805352798054e-07, "loss": 1.6459, "step": 512900 }, { "epoch": 9.97569275644142, "grad_norm": 3.796152353286743, "learning_rate": 1.4306569343065695e-07, "loss": 1.4126, "step": 513000 }, { "epoch": 9.977637335926106, "grad_norm": 7.915294647216797, "learning_rate": 1.3333333333333334e-07, "loss": 1.4033, "step": 513100 }, { "epoch": 9.979581915410792, "grad_norm": 4.40884256362915, "learning_rate": 1.2360097323600975e-07, "loss": 1.4092, "step": 513200 }, { "epoch": 9.981526494895478, "grad_norm": 4.808947563171387, "learning_rate": 1.1386861313868613e-07, "loss": 1.7266, "step": 513300 }, { "epoch": 9.983471074380166, "grad_norm": 16.63046646118164, "learning_rate": 1.0413625304136253e-07, "loss": 1.516, "step": 513400 }, { "epoch": 9.985415653864852, "grad_norm": 13.094378471374512, "learning_rate": 9.440389294403893e-08, "loss": 1.4162, "step": 513500 }, { "epoch": 9.987360233349538, "grad_norm": 7.931845664978027, "learning_rate": 8.467153284671532e-08, "loss": 1.4168, "step": 513600 }, { "epoch": 9.989304812834224, "grad_norm": 4.993491172790527, "learning_rate": 7.493917274939174e-08, "loss": 1.5654, "step": 513700 }, { "epoch": 9.991249392318911, "grad_norm": 2.873997926712036, "learning_rate": 6.520681265206813e-08, "loss": 1.3245, "step": 513800 }, { "epoch": 9.993193971803597, "grad_norm": 8.44487476348877, "learning_rate": 5.5474452554744525e-08, "loss": 1.4064, "step": 513900 }, { "epoch": 9.995138551288283, "grad_norm": 5.093390941619873, "learning_rate": 4.574209245742092e-08, "loss": 1.6073, "step": 514000 }, { "epoch": 9.997083130772971, "grad_norm": 6.009812831878662, "learning_rate": 3.600973236009733e-08, "loss": 1.5239, "step": 514100 }, { "epoch": 9.999027710257657, "grad_norm": 8.101922988891602, "learning_rate": 2.627737226277372e-08, "loss": 1.3907, "step": 514200 }, { "epoch": 10.0, "step": 514250, "total_flos": 3.255708097594316e+21, "train_loss": 1.0024504264716565, "train_runtime": 1110055.9196, "train_samples_per_second": 7.412, "train_steps_per_second": 0.463 } ], "logging_steps": 100, "max_steps": 514250, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.255708097594316e+21, "train_batch_size": 8, "trial_name": null, "trial_params": null }