{ "best_metric": 0.15805459022521973, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.0835421888053467, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000835421888053467, "grad_norm": 7.261774063110352, "learning_rate": 1e-06, "loss": 1.5114, "step": 1 }, { "epoch": 0.000835421888053467, "eval_loss": 1.6066429615020752, "eval_runtime": 38.4253, "eval_samples_per_second": 13.116, "eval_steps_per_second": 3.279, "step": 1 }, { "epoch": 0.001670843776106934, "grad_norm": 17.708011627197266, "learning_rate": 2e-06, "loss": 1.5639, "step": 2 }, { "epoch": 0.002506265664160401, "grad_norm": 19.94588851928711, "learning_rate": 3e-06, "loss": 2.2871, "step": 3 }, { "epoch": 0.003341687552213868, "grad_norm": 11.027591705322266, "learning_rate": 4e-06, "loss": 1.1737, "step": 4 }, { "epoch": 0.004177109440267335, "grad_norm": 19.168018341064453, "learning_rate": 4.9999999999999996e-06, "loss": 1.6066, "step": 5 }, { "epoch": 0.005012531328320802, "grad_norm": 11.331216812133789, "learning_rate": 6e-06, "loss": 1.7864, "step": 6 }, { "epoch": 0.005847953216374269, "grad_norm": 12.50044059753418, "learning_rate": 7e-06, "loss": 1.3554, "step": 7 }, { "epoch": 0.006683375104427736, "grad_norm": 11.597599983215332, "learning_rate": 8e-06, "loss": 1.2366, "step": 8 }, { "epoch": 0.007518796992481203, "grad_norm": 5.6630682945251465, "learning_rate": 9e-06, "loss": 0.9487, "step": 9 }, { "epoch": 0.00835421888053467, "grad_norm": 5.019942283630371, "learning_rate": 9.999999999999999e-06, "loss": 1.0092, "step": 10 }, { "epoch": 0.009189640768588136, "grad_norm": 8.010810852050781, "learning_rate": 1.1e-05, "loss": 1.4717, "step": 11 }, { "epoch": 0.010025062656641603, "grad_norm": 6.473798751831055, "learning_rate": 1.2e-05, "loss": 0.8519, "step": 12 }, { "epoch": 0.01086048454469507, "grad_norm": 4.7110595703125, "learning_rate": 1.3000000000000001e-05, "loss": 0.7024, "step": 13 }, { "epoch": 0.011695906432748537, "grad_norm": 4.263626575469971, "learning_rate": 1.4e-05, "loss": 0.7116, "step": 14 }, { "epoch": 0.012531328320802004, "grad_norm": 3.472412586212158, "learning_rate": 1.5e-05, "loss": 0.5141, "step": 15 }, { "epoch": 0.013366750208855471, "grad_norm": 3.362398624420166, "learning_rate": 1.6e-05, "loss": 0.6892, "step": 16 }, { "epoch": 0.014202172096908938, "grad_norm": 2.528292179107666, "learning_rate": 1.7e-05, "loss": 0.3903, "step": 17 }, { "epoch": 0.015037593984962405, "grad_norm": 1.7493354082107544, "learning_rate": 1.8e-05, "loss": 0.2833, "step": 18 }, { "epoch": 0.015873015873015872, "grad_norm": 1.5588998794555664, "learning_rate": 1.9e-05, "loss": 0.246, "step": 19 }, { "epoch": 0.01670843776106934, "grad_norm": 1.64949369430542, "learning_rate": 1.9999999999999998e-05, "loss": 0.2675, "step": 20 }, { "epoch": 0.017543859649122806, "grad_norm": 0.8435382843017578, "learning_rate": 2.1e-05, "loss": 0.1553, "step": 21 }, { "epoch": 0.018379281537176273, "grad_norm": 0.6495569348335266, "learning_rate": 2.2e-05, "loss": 0.1196, "step": 22 }, { "epoch": 0.01921470342522974, "grad_norm": 1.0182844400405884, "learning_rate": 2.3000000000000003e-05, "loss": 0.1567, "step": 23 }, { "epoch": 0.020050125313283207, "grad_norm": 2.8188316822052, "learning_rate": 2.4e-05, "loss": 0.2064, "step": 24 }, { "epoch": 0.020885547201336674, "grad_norm": 1.613542914390564, "learning_rate": 2.5e-05, "loss": 0.173, "step": 25 }, { "epoch": 0.02172096908939014, "grad_norm": 1.5958133935928345, "learning_rate": 2.6000000000000002e-05, "loss": 0.184, "step": 26 }, { "epoch": 0.022556390977443608, "grad_norm": 0.7841217517852783, "learning_rate": 2.7000000000000002e-05, "loss": 0.1614, "step": 27 }, { "epoch": 0.023391812865497075, "grad_norm": 1.057112455368042, "learning_rate": 2.8e-05, "loss": 0.1174, "step": 28 }, { "epoch": 0.02422723475355054, "grad_norm": 2.9374876022338867, "learning_rate": 2.9e-05, "loss": 0.4939, "step": 29 }, { "epoch": 0.02506265664160401, "grad_norm": 0.8594872355461121, "learning_rate": 3e-05, "loss": 0.1437, "step": 30 }, { "epoch": 0.025898078529657476, "grad_norm": 0.6192961931228638, "learning_rate": 2.9984895998119723e-05, "loss": 0.1466, "step": 31 }, { "epoch": 0.026733500417710943, "grad_norm": 1.5436159372329712, "learning_rate": 2.993961440992859e-05, "loss": 0.2683, "step": 32 }, { "epoch": 0.02756892230576441, "grad_norm": 0.42387092113494873, "learning_rate": 2.9864246426519023e-05, "loss": 0.1387, "step": 33 }, { "epoch": 0.028404344193817876, "grad_norm": 3.636671304702759, "learning_rate": 2.9758943828979444e-05, "loss": 0.2699, "step": 34 }, { "epoch": 0.029239766081871343, "grad_norm": 0.8583257794380188, "learning_rate": 2.9623918682727355e-05, "loss": 0.1803, "step": 35 }, { "epoch": 0.03007518796992481, "grad_norm": 1.826141595840454, "learning_rate": 2.9459442910437798e-05, "loss": 0.3046, "step": 36 }, { "epoch": 0.030910609857978277, "grad_norm": 0.7456461787223816, "learning_rate": 2.9265847744427305e-05, "loss": 0.1901, "step": 37 }, { "epoch": 0.031746031746031744, "grad_norm": 0.8312434554100037, "learning_rate": 2.904352305959606e-05, "loss": 0.2098, "step": 38 }, { "epoch": 0.03258145363408521, "grad_norm": 0.42538195848464966, "learning_rate": 2.8792916588271762e-05, "loss": 0.1526, "step": 39 }, { "epoch": 0.03341687552213868, "grad_norm": 0.3908769488334656, "learning_rate": 2.8514533018536286e-05, "loss": 0.1633, "step": 40 }, { "epoch": 0.034252297410192145, "grad_norm": 0.3974226713180542, "learning_rate": 2.820893297785107e-05, "loss": 0.1491, "step": 41 }, { "epoch": 0.03508771929824561, "grad_norm": 0.36495357751846313, "learning_rate": 2.7876731904027994e-05, "loss": 0.1951, "step": 42 }, { "epoch": 0.03592314118629908, "grad_norm": 0.560374915599823, "learning_rate": 2.7518598805819542e-05, "loss": 0.1859, "step": 43 }, { "epoch": 0.036758563074352546, "grad_norm": 0.3539278209209442, "learning_rate": 2.7135254915624213e-05, "loss": 0.15, "step": 44 }, { "epoch": 0.03759398496240601, "grad_norm": 1.142353892326355, "learning_rate": 2.672747223702045e-05, "loss": 0.174, "step": 45 }, { "epoch": 0.03842940685045948, "grad_norm": 0.7058913111686707, "learning_rate": 2.6296071990054167e-05, "loss": 0.1992, "step": 46 }, { "epoch": 0.03926482873851295, "grad_norm": 0.38486120104789734, "learning_rate": 2.5841922957410875e-05, "loss": 0.1652, "step": 47 }, { "epoch": 0.040100250626566414, "grad_norm": 3.6414639949798584, "learning_rate": 2.5365939734802973e-05, "loss": 0.4362, "step": 48 }, { "epoch": 0.04093567251461988, "grad_norm": 0.3310062885284424, "learning_rate": 2.4869080889095693e-05, "loss": 0.2132, "step": 49 }, { "epoch": 0.04177109440267335, "grad_norm": 0.39665135741233826, "learning_rate": 2.4352347027881003e-05, "loss": 0.2063, "step": 50 }, { "epoch": 0.04177109440267335, "eval_loss": 0.1654907464981079, "eval_runtime": 39.1179, "eval_samples_per_second": 12.884, "eval_steps_per_second": 3.221, "step": 50 }, { "epoch": 0.042606516290726815, "grad_norm": 0.3080545663833618, "learning_rate": 2.3816778784387097e-05, "loss": 0.1177, "step": 51 }, { "epoch": 0.04344193817878028, "grad_norm": 0.40985429286956787, "learning_rate": 2.3263454721781537e-05, "loss": 0.1433, "step": 52 }, { "epoch": 0.04427736006683375, "grad_norm": 0.15664848685264587, "learning_rate": 2.2693489161088592e-05, "loss": 0.0884, "step": 53 }, { "epoch": 0.045112781954887216, "grad_norm": 0.15345360338687897, "learning_rate": 2.210802993709498e-05, "loss": 0.1044, "step": 54 }, { "epoch": 0.04594820384294068, "grad_norm": 0.20342934131622314, "learning_rate": 2.1508256086763372e-05, "loss": 0.1088, "step": 55 }, { "epoch": 0.04678362573099415, "grad_norm": 0.16992981731891632, "learning_rate": 2.0895375474808857e-05, "loss": 0.1459, "step": 56 }, { "epoch": 0.047619047619047616, "grad_norm": 0.17031916975975037, "learning_rate": 2.0270622361220143e-05, "loss": 0.1282, "step": 57 }, { "epoch": 0.04845446950710108, "grad_norm": 0.2852511703968048, "learning_rate": 1.963525491562421e-05, "loss": 0.1329, "step": 58 }, { "epoch": 0.04928989139515455, "grad_norm": 0.4877017140388489, "learning_rate": 1.8990552683500128e-05, "loss": 0.1365, "step": 59 }, { "epoch": 0.05012531328320802, "grad_norm": 0.3902769684791565, "learning_rate": 1.8337814009344716e-05, "loss": 0.1584, "step": 60 }, { "epoch": 0.050960735171261484, "grad_norm": 0.2292199432849884, "learning_rate": 1.767835342197955e-05, "loss": 0.1402, "step": 61 }, { "epoch": 0.05179615705931495, "grad_norm": 0.2017497420310974, "learning_rate": 1.7013498987264832e-05, "loss": 0.1482, "step": 62 }, { "epoch": 0.05263157894736842, "grad_norm": 0.2939629852771759, "learning_rate": 1.6344589633551502e-05, "loss": 0.1461, "step": 63 }, { "epoch": 0.053467000835421885, "grad_norm": 0.1926645189523697, "learning_rate": 1.5672972455257726e-05, "loss": 0.1044, "step": 64 }, { "epoch": 0.05430242272347535, "grad_norm": 0.3081521987915039, "learning_rate": 1.5e-05, "loss": 0.1596, "step": 65 }, { "epoch": 0.05513784461152882, "grad_norm": 0.4025512933731079, "learning_rate": 1.4327027544742281e-05, "loss": 0.1735, "step": 66 }, { "epoch": 0.055973266499582286, "grad_norm": 0.20314837992191315, "learning_rate": 1.36554103664485e-05, "loss": 0.125, "step": 67 }, { "epoch": 0.05680868838763575, "grad_norm": 0.20744721591472626, "learning_rate": 1.2986501012735174e-05, "loss": 0.1095, "step": 68 }, { "epoch": 0.05764411027568922, "grad_norm": 0.2043089121580124, "learning_rate": 1.2321646578020452e-05, "loss": 0.1544, "step": 69 }, { "epoch": 0.05847953216374269, "grad_norm": 0.2829361855983734, "learning_rate": 1.1662185990655285e-05, "loss": 0.1347, "step": 70 }, { "epoch": 0.059314954051796154, "grad_norm": 0.2790044844150543, "learning_rate": 1.1009447316499875e-05, "loss": 0.1564, "step": 71 }, { "epoch": 0.06015037593984962, "grad_norm": 2.54337739944458, "learning_rate": 1.036474508437579e-05, "loss": 0.1629, "step": 72 }, { "epoch": 0.06098579782790309, "grad_norm": 0.279619425535202, "learning_rate": 9.729377638779859e-06, "loss": 0.1506, "step": 73 }, { "epoch": 0.061821219715956555, "grad_norm": 0.2148342877626419, "learning_rate": 9.104624525191147e-06, "loss": 0.1222, "step": 74 }, { "epoch": 0.06265664160401002, "grad_norm": 3.0420451164245605, "learning_rate": 8.491743913236629e-06, "loss": 0.1404, "step": 75 }, { "epoch": 0.06349206349206349, "grad_norm": 0.2432384490966797, "learning_rate": 7.89197006290502e-06, "loss": 0.1076, "step": 76 }, { "epoch": 0.06432748538011696, "grad_norm": 3.1877377033233643, "learning_rate": 7.30651083891141e-06, "loss": 0.152, "step": 77 }, { "epoch": 0.06516290726817042, "grad_norm": 0.4180755019187927, "learning_rate": 6.736545278218464e-06, "loss": 0.1127, "step": 78 }, { "epoch": 0.06599832915622389, "grad_norm": 0.25477147102355957, "learning_rate": 6.1832212156129045e-06, "loss": 0.1277, "step": 79 }, { "epoch": 0.06683375104427736, "grad_norm": 0.399618536233902, "learning_rate": 5.647652972118998e-06, "loss": 0.1418, "step": 80 }, { "epoch": 0.06766917293233082, "grad_norm": 0.27185890078544617, "learning_rate": 5.130919110904311e-06, "loss": 0.155, "step": 81 }, { "epoch": 0.06850459482038429, "grad_norm": 1.3203309774398804, "learning_rate": 4.6340602651970304e-06, "loss": 0.3033, "step": 82 }, { "epoch": 0.06934001670843776, "grad_norm": 0.3346821367740631, "learning_rate": 4.158077042589129e-06, "loss": 0.1639, "step": 83 }, { "epoch": 0.07017543859649122, "grad_norm": 0.3132845163345337, "learning_rate": 3.7039280099458373e-06, "loss": 0.1417, "step": 84 }, { "epoch": 0.07101086048454469, "grad_norm": 0.30599430203437805, "learning_rate": 3.272527762979553e-06, "loss": 0.1666, "step": 85 }, { "epoch": 0.07184628237259816, "grad_norm": 0.5425288677215576, "learning_rate": 2.86474508437579e-06, "loss": 0.1837, "step": 86 }, { "epoch": 0.07268170426065163, "grad_norm": 0.2609088122844696, "learning_rate": 2.4814011941804603e-06, "loss": 0.1981, "step": 87 }, { "epoch": 0.07351712614870509, "grad_norm": 0.2561860680580139, "learning_rate": 2.1232680959720085e-06, "loss": 0.1169, "step": 88 }, { "epoch": 0.07435254803675856, "grad_norm": 0.26870644092559814, "learning_rate": 1.79106702214893e-06, "loss": 0.162, "step": 89 }, { "epoch": 0.07518796992481203, "grad_norm": 0.35758742690086365, "learning_rate": 1.4854669814637145e-06, "loss": 0.1946, "step": 90 }, { "epoch": 0.07602339181286549, "grad_norm": 0.40720826387405396, "learning_rate": 1.2070834117282414e-06, "loss": 0.1676, "step": 91 }, { "epoch": 0.07685881370091896, "grad_norm": 0.2641846835613251, "learning_rate": 9.56476940403942e-07, "loss": 0.1419, "step": 92 }, { "epoch": 0.07769423558897243, "grad_norm": 0.32390695810317993, "learning_rate": 7.341522555726971e-07, "loss": 0.1727, "step": 93 }, { "epoch": 0.0785296574770259, "grad_norm": 0.17990685999393463, "learning_rate": 5.405570895622014e-07, "loss": 0.1278, "step": 94 }, { "epoch": 0.07936507936507936, "grad_norm": 0.5421126484870911, "learning_rate": 3.760813172726457e-07, "loss": 0.1988, "step": 95 }, { "epoch": 0.08020050125313283, "grad_norm": 0.22047364711761475, "learning_rate": 2.41056171020555e-07, "loss": 0.1636, "step": 96 }, { "epoch": 0.0810359231411863, "grad_norm": 0.2979952096939087, "learning_rate": 1.357535734809795e-07, "loss": 0.1766, "step": 97 }, { "epoch": 0.08187134502923976, "grad_norm": 0.4420830309391022, "learning_rate": 6.038559007141397e-08, "loss": 0.2073, "step": 98 }, { "epoch": 0.08270676691729323, "grad_norm": 0.8136470913887024, "learning_rate": 1.510400188028116e-08, "loss": 0.2351, "step": 99 }, { "epoch": 0.0835421888053467, "grad_norm": 0.37833961844444275, "learning_rate": 0.0, "loss": 0.1832, "step": 100 }, { "epoch": 0.0835421888053467, "eval_loss": 0.15805459022521973, "eval_runtime": 39.1836, "eval_samples_per_second": 12.863, "eval_steps_per_second": 3.216, "step": 100 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.3812411056128e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }