{ "best_metric": 0.3771364390850067, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.7532956685499058, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003766478342749529, "grad_norm": 2.3061294555664062, "learning_rate": 1.0060000000000002e-05, "loss": 0.587, "step": 1 }, { "epoch": 0.003766478342749529, "eval_loss": 0.8022822141647339, "eval_runtime": 18.9674, "eval_samples_per_second": 5.905, "eval_steps_per_second": 1.476, "step": 1 }, { "epoch": 0.007532956685499058, "grad_norm": 0.5502987504005432, "learning_rate": 2.0120000000000004e-05, "loss": 0.6444, "step": 2 }, { "epoch": 0.011299435028248588, "grad_norm": 0.8654094934463501, "learning_rate": 3.018e-05, "loss": 0.7336, "step": 3 }, { "epoch": 0.015065913370998116, "grad_norm": 0.6728006601333618, "learning_rate": 4.024000000000001e-05, "loss": 0.5629, "step": 4 }, { "epoch": 0.018832391713747645, "grad_norm": 0.5054320096969604, "learning_rate": 5.03e-05, "loss": 0.514, "step": 5 }, { "epoch": 0.022598870056497175, "grad_norm": 0.8824482560157776, "learning_rate": 6.036e-05, "loss": 0.638, "step": 6 }, { "epoch": 0.026365348399246705, "grad_norm": 0.4188986122608185, "learning_rate": 7.042e-05, "loss": 0.4911, "step": 7 }, { "epoch": 0.030131826741996232, "grad_norm": 0.5569682717323303, "learning_rate": 8.048000000000002e-05, "loss": 0.5245, "step": 8 }, { "epoch": 0.03389830508474576, "grad_norm": 0.4881710112094879, "learning_rate": 9.054000000000001e-05, "loss": 0.5209, "step": 9 }, { "epoch": 0.03766478342749529, "grad_norm": 0.726476788520813, "learning_rate": 0.0001006, "loss": 0.5509, "step": 10 }, { "epoch": 0.04143126177024482, "grad_norm": 0.5789438486099243, "learning_rate": 0.00010007052631578948, "loss": 0.4475, "step": 11 }, { "epoch": 0.04519774011299435, "grad_norm": 0.5653628706932068, "learning_rate": 9.954105263157895e-05, "loss": 0.5234, "step": 12 }, { "epoch": 0.04896421845574388, "grad_norm": 0.7087174654006958, "learning_rate": 9.901157894736842e-05, "loss": 0.5173, "step": 13 }, { "epoch": 0.05273069679849341, "grad_norm": 0.5317690372467041, "learning_rate": 9.84821052631579e-05, "loss": 0.5216, "step": 14 }, { "epoch": 0.05649717514124294, "grad_norm": 0.4678763747215271, "learning_rate": 9.795263157894737e-05, "loss": 0.4691, "step": 15 }, { "epoch": 0.060263653483992465, "grad_norm": 0.4033360779285431, "learning_rate": 9.742315789473686e-05, "loss": 0.5041, "step": 16 }, { "epoch": 0.064030131826742, "grad_norm": 0.3931688964366913, "learning_rate": 9.689368421052633e-05, "loss": 0.5035, "step": 17 }, { "epoch": 0.06779661016949153, "grad_norm": 0.3954808712005615, "learning_rate": 9.63642105263158e-05, "loss": 0.4895, "step": 18 }, { "epoch": 0.07156308851224105, "grad_norm": 0.4155082106590271, "learning_rate": 9.583473684210527e-05, "loss": 0.6003, "step": 19 }, { "epoch": 0.07532956685499058, "grad_norm": 0.36141490936279297, "learning_rate": 9.530526315789474e-05, "loss": 0.3604, "step": 20 }, { "epoch": 0.07909604519774012, "grad_norm": 0.3821530044078827, "learning_rate": 9.477578947368422e-05, "loss": 0.3565, "step": 21 }, { "epoch": 0.08286252354048965, "grad_norm": 0.38217034935951233, "learning_rate": 9.424631578947369e-05, "loss": 0.4556, "step": 22 }, { "epoch": 0.08662900188323917, "grad_norm": 0.424104779958725, "learning_rate": 9.371684210526316e-05, "loss": 0.446, "step": 23 }, { "epoch": 0.0903954802259887, "grad_norm": 0.4113253653049469, "learning_rate": 9.318736842105263e-05, "loss": 0.4466, "step": 24 }, { "epoch": 0.09416195856873823, "grad_norm": 0.3927508592605591, "learning_rate": 9.26578947368421e-05, "loss": 0.3874, "step": 25 }, { "epoch": 0.09792843691148775, "grad_norm": 0.4676135182380676, "learning_rate": 9.212842105263159e-05, "loss": 0.4495, "step": 26 }, { "epoch": 0.1016949152542373, "grad_norm": 0.9011837840080261, "learning_rate": 9.159894736842107e-05, "loss": 0.3897, "step": 27 }, { "epoch": 0.10546139359698682, "grad_norm": 0.45149439573287964, "learning_rate": 9.106947368421054e-05, "loss": 0.4115, "step": 28 }, { "epoch": 0.10922787193973635, "grad_norm": 0.4575241804122925, "learning_rate": 9.054000000000001e-05, "loss": 0.459, "step": 29 }, { "epoch": 0.11299435028248588, "grad_norm": 0.4709136486053467, "learning_rate": 9.001052631578948e-05, "loss": 0.4282, "step": 30 }, { "epoch": 0.1167608286252354, "grad_norm": 0.48434707522392273, "learning_rate": 8.948105263157895e-05, "loss": 0.4216, "step": 31 }, { "epoch": 0.12052730696798493, "grad_norm": 0.4398117959499359, "learning_rate": 8.895157894736842e-05, "loss": 0.4041, "step": 32 }, { "epoch": 0.12429378531073447, "grad_norm": 0.5581742525100708, "learning_rate": 8.842210526315789e-05, "loss": 0.5232, "step": 33 }, { "epoch": 0.128060263653484, "grad_norm": 0.4656040370464325, "learning_rate": 8.789263157894738e-05, "loss": 0.4143, "step": 34 }, { "epoch": 0.1318267419962335, "grad_norm": 0.47406572103500366, "learning_rate": 8.736315789473685e-05, "loss": 0.3553, "step": 35 }, { "epoch": 0.13559322033898305, "grad_norm": 1.1412121057510376, "learning_rate": 8.683368421052632e-05, "loss": 0.3242, "step": 36 }, { "epoch": 0.1393596986817326, "grad_norm": 0.5711633563041687, "learning_rate": 8.63042105263158e-05, "loss": 0.4541, "step": 37 }, { "epoch": 0.1431261770244821, "grad_norm": 0.5628933310508728, "learning_rate": 8.577473684210527e-05, "loss": 0.4493, "step": 38 }, { "epoch": 0.14689265536723164, "grad_norm": 0.5506464838981628, "learning_rate": 8.524526315789474e-05, "loss": 0.4519, "step": 39 }, { "epoch": 0.15065913370998116, "grad_norm": 0.521560549736023, "learning_rate": 8.471578947368421e-05, "loss": 0.3709, "step": 40 }, { "epoch": 0.1544256120527307, "grad_norm": 0.6122710108757019, "learning_rate": 8.41863157894737e-05, "loss": 0.4376, "step": 41 }, { "epoch": 0.15819209039548024, "grad_norm": 0.7392388582229614, "learning_rate": 8.365684210526317e-05, "loss": 0.5101, "step": 42 }, { "epoch": 0.16195856873822975, "grad_norm": 0.5833247900009155, "learning_rate": 8.312736842105264e-05, "loss": 0.4233, "step": 43 }, { "epoch": 0.1657250470809793, "grad_norm": 0.5848302841186523, "learning_rate": 8.259789473684211e-05, "loss": 0.3661, "step": 44 }, { "epoch": 0.1694915254237288, "grad_norm": 0.5838742256164551, "learning_rate": 8.206842105263158e-05, "loss": 0.3675, "step": 45 }, { "epoch": 0.17325800376647835, "grad_norm": 0.8844621777534485, "learning_rate": 8.153894736842105e-05, "loss": 0.376, "step": 46 }, { "epoch": 0.17702448210922786, "grad_norm": 0.8669074177742004, "learning_rate": 8.100947368421053e-05, "loss": 0.4068, "step": 47 }, { "epoch": 0.1807909604519774, "grad_norm": 0.827057421207428, "learning_rate": 8.048000000000002e-05, "loss": 0.4575, "step": 48 }, { "epoch": 0.18455743879472694, "grad_norm": 0.8869360685348511, "learning_rate": 7.995052631578949e-05, "loss": 0.382, "step": 49 }, { "epoch": 0.18832391713747645, "grad_norm": 1.2598633766174316, "learning_rate": 7.942105263157896e-05, "loss": 0.1047, "step": 50 }, { "epoch": 0.18832391713747645, "eval_loss": 0.4519185423851013, "eval_runtime": 18.9348, "eval_samples_per_second": 5.915, "eval_steps_per_second": 1.479, "step": 50 }, { "epoch": 0.192090395480226, "grad_norm": 0.36862310767173767, "learning_rate": 7.889157894736843e-05, "loss": 0.4393, "step": 51 }, { "epoch": 0.1958568738229755, "grad_norm": 0.4028255343437195, "learning_rate": 7.83621052631579e-05, "loss": 0.4695, "step": 52 }, { "epoch": 0.19962335216572505, "grad_norm": 0.3694057762622833, "learning_rate": 7.783263157894737e-05, "loss": 0.4306, "step": 53 }, { "epoch": 0.2033898305084746, "grad_norm": 0.3954612910747528, "learning_rate": 7.730315789473684e-05, "loss": 0.4593, "step": 54 }, { "epoch": 0.2071563088512241, "grad_norm": 0.36936596035957336, "learning_rate": 7.677368421052632e-05, "loss": 0.5009, "step": 55 }, { "epoch": 0.21092278719397364, "grad_norm": 0.3590075373649597, "learning_rate": 7.624421052631579e-05, "loss": 0.4587, "step": 56 }, { "epoch": 0.21468926553672316, "grad_norm": 0.33008989691734314, "learning_rate": 7.571473684210526e-05, "loss": 0.4058, "step": 57 }, { "epoch": 0.2184557438794727, "grad_norm": 0.29254990816116333, "learning_rate": 7.518526315789475e-05, "loss": 0.3881, "step": 58 }, { "epoch": 0.2222222222222222, "grad_norm": 0.3461231291294098, "learning_rate": 7.465578947368422e-05, "loss": 0.4481, "step": 59 }, { "epoch": 0.22598870056497175, "grad_norm": 0.3462350368499756, "learning_rate": 7.412631578947369e-05, "loss": 0.4543, "step": 60 }, { "epoch": 0.2297551789077213, "grad_norm": 0.36895984411239624, "learning_rate": 7.359684210526317e-05, "loss": 0.4236, "step": 61 }, { "epoch": 0.2335216572504708, "grad_norm": 0.3754134476184845, "learning_rate": 7.306736842105264e-05, "loss": 0.4276, "step": 62 }, { "epoch": 0.23728813559322035, "grad_norm": 0.33340367674827576, "learning_rate": 7.253789473684211e-05, "loss": 0.4225, "step": 63 }, { "epoch": 0.24105461393596986, "grad_norm": 0.33888867497444153, "learning_rate": 7.200842105263158e-05, "loss": 0.3903, "step": 64 }, { "epoch": 0.2448210922787194, "grad_norm": 0.33951789140701294, "learning_rate": 7.147894736842105e-05, "loss": 0.3879, "step": 65 }, { "epoch": 0.24858757062146894, "grad_norm": 0.35544970631599426, "learning_rate": 7.094947368421052e-05, "loss": 0.3854, "step": 66 }, { "epoch": 0.2523540489642185, "grad_norm": 0.36719751358032227, "learning_rate": 7.042e-05, "loss": 0.4063, "step": 67 }, { "epoch": 0.256120527306968, "grad_norm": 0.3732960820198059, "learning_rate": 6.989052631578948e-05, "loss": 0.435, "step": 68 }, { "epoch": 0.2598870056497175, "grad_norm": 0.43498024344444275, "learning_rate": 6.936105263157896e-05, "loss": 0.469, "step": 69 }, { "epoch": 0.263653483992467, "grad_norm": 0.30631107091903687, "learning_rate": 6.883157894736843e-05, "loss": 0.3327, "step": 70 }, { "epoch": 0.2674199623352166, "grad_norm": 0.3484049439430237, "learning_rate": 6.83021052631579e-05, "loss": 0.361, "step": 71 }, { "epoch": 0.2711864406779661, "grad_norm": 0.3027707040309906, "learning_rate": 6.777263157894737e-05, "loss": 0.3155, "step": 72 }, { "epoch": 0.2749529190207156, "grad_norm": 0.34501898288726807, "learning_rate": 6.724315789473684e-05, "loss": 0.3608, "step": 73 }, { "epoch": 0.2787193973634652, "grad_norm": 0.3562847971916199, "learning_rate": 6.671368421052631e-05, "loss": 0.4125, "step": 74 }, { "epoch": 0.2824858757062147, "grad_norm": 0.3857191503047943, "learning_rate": 6.61842105263158e-05, "loss": 0.4066, "step": 75 }, { "epoch": 0.2862523540489642, "grad_norm": 0.36726120114326477, "learning_rate": 6.565473684210527e-05, "loss": 0.2932, "step": 76 }, { "epoch": 0.2900188323917137, "grad_norm": 0.3790563642978668, "learning_rate": 6.512526315789474e-05, "loss": 0.3213, "step": 77 }, { "epoch": 0.2937853107344633, "grad_norm": 0.43795183300971985, "learning_rate": 6.459578947368421e-05, "loss": 0.4281, "step": 78 }, { "epoch": 0.2975517890772128, "grad_norm": 0.37892189621925354, "learning_rate": 6.406631578947369e-05, "loss": 0.352, "step": 79 }, { "epoch": 0.3013182674199623, "grad_norm": 0.4480699598789215, "learning_rate": 6.353684210526316e-05, "loss": 0.4302, "step": 80 }, { "epoch": 0.3050847457627119, "grad_norm": 0.44733160734176636, "learning_rate": 6.300736842105263e-05, "loss": 0.3919, "step": 81 }, { "epoch": 0.3088512241054614, "grad_norm": 0.41077038645744324, "learning_rate": 6.247789473684212e-05, "loss": 0.3728, "step": 82 }, { "epoch": 0.3126177024482109, "grad_norm": 0.44138315320014954, "learning_rate": 6.194842105263159e-05, "loss": 0.3604, "step": 83 }, { "epoch": 0.3163841807909605, "grad_norm": 0.4938478171825409, "learning_rate": 6.141894736842106e-05, "loss": 0.4533, "step": 84 }, { "epoch": 0.32015065913371, "grad_norm": 0.42112767696380615, "learning_rate": 6.088947368421053e-05, "loss": 0.3367, "step": 85 }, { "epoch": 0.3239171374764595, "grad_norm": 0.4219391644001007, "learning_rate": 6.036e-05, "loss": 0.3571, "step": 86 }, { "epoch": 0.327683615819209, "grad_norm": 0.45725205540657043, "learning_rate": 5.9830526315789475e-05, "loss": 0.3193, "step": 87 }, { "epoch": 0.3314500941619586, "grad_norm": 0.5028024315834045, "learning_rate": 5.9301052631578946e-05, "loss": 0.3816, "step": 88 }, { "epoch": 0.3352165725047081, "grad_norm": 0.5676722526550293, "learning_rate": 5.877157894736843e-05, "loss": 0.4219, "step": 89 }, { "epoch": 0.3389830508474576, "grad_norm": 0.5122228860855103, "learning_rate": 5.82421052631579e-05, "loss": 0.334, "step": 90 }, { "epoch": 0.3427495291902072, "grad_norm": 0.5356689691543579, "learning_rate": 5.771263157894737e-05, "loss": 0.4263, "step": 91 }, { "epoch": 0.3465160075329567, "grad_norm": 0.5746489763259888, "learning_rate": 5.718315789473685e-05, "loss": 0.4444, "step": 92 }, { "epoch": 0.3502824858757062, "grad_norm": 0.6603770852088928, "learning_rate": 5.665368421052632e-05, "loss": 0.3311, "step": 93 }, { "epoch": 0.3540489642184557, "grad_norm": 0.6168034672737122, "learning_rate": 5.612421052631579e-05, "loss": 0.3723, "step": 94 }, { "epoch": 0.3578154425612053, "grad_norm": 0.6045776605606079, "learning_rate": 5.559473684210527e-05, "loss": 0.3088, "step": 95 }, { "epoch": 0.3615819209039548, "grad_norm": 0.753142237663269, "learning_rate": 5.506526315789474e-05, "loss": 0.3691, "step": 96 }, { "epoch": 0.3653483992467043, "grad_norm": 0.6493861675262451, "learning_rate": 5.453578947368421e-05, "loss": 0.2952, "step": 97 }, { "epoch": 0.3691148775894539, "grad_norm": 1.0952403545379639, "learning_rate": 5.400631578947369e-05, "loss": 0.5704, "step": 98 }, { "epoch": 0.3728813559322034, "grad_norm": 0.9987799525260925, "learning_rate": 5.347684210526316e-05, "loss": 0.3471, "step": 99 }, { "epoch": 0.3766478342749529, "grad_norm": 1.2371445894241333, "learning_rate": 5.294736842105263e-05, "loss": 0.4076, "step": 100 }, { "epoch": 0.3766478342749529, "eval_loss": 0.4110027253627777, "eval_runtime": 19.5904, "eval_samples_per_second": 5.717, "eval_steps_per_second": 1.429, "step": 100 }, { "epoch": 0.3804143126177024, "grad_norm": 0.3038599193096161, "learning_rate": 5.24178947368421e-05, "loss": 0.396, "step": 101 }, { "epoch": 0.384180790960452, "grad_norm": 0.296657919883728, "learning_rate": 5.1888421052631585e-05, "loss": 0.4235, "step": 102 }, { "epoch": 0.3879472693032015, "grad_norm": 0.32019734382629395, "learning_rate": 5.135894736842106e-05, "loss": 0.5136, "step": 103 }, { "epoch": 0.391713747645951, "grad_norm": 0.3396560847759247, "learning_rate": 5.082947368421053e-05, "loss": 0.4513, "step": 104 }, { "epoch": 0.3954802259887006, "grad_norm": 0.27841681241989136, "learning_rate": 5.03e-05, "loss": 0.401, "step": 105 }, { "epoch": 0.3992467043314501, "grad_norm": 0.3084738552570343, "learning_rate": 4.977052631578947e-05, "loss": 0.4566, "step": 106 }, { "epoch": 0.4030131826741996, "grad_norm": 0.2915210425853729, "learning_rate": 4.924105263157895e-05, "loss": 0.4317, "step": 107 }, { "epoch": 0.4067796610169492, "grad_norm": 0.3045331537723541, "learning_rate": 4.871157894736843e-05, "loss": 0.4416, "step": 108 }, { "epoch": 0.4105461393596987, "grad_norm": 0.2777758538722992, "learning_rate": 4.81821052631579e-05, "loss": 0.4247, "step": 109 }, { "epoch": 0.4143126177024482, "grad_norm": 0.31940633058547974, "learning_rate": 4.765263157894737e-05, "loss": 0.4379, "step": 110 }, { "epoch": 0.4180790960451977, "grad_norm": 0.2750423550605774, "learning_rate": 4.7123157894736845e-05, "loss": 0.3353, "step": 111 }, { "epoch": 0.4218455743879473, "grad_norm": 0.26961493492126465, "learning_rate": 4.6593684210526316e-05, "loss": 0.3279, "step": 112 }, { "epoch": 0.4256120527306968, "grad_norm": 0.316545307636261, "learning_rate": 4.606421052631579e-05, "loss": 0.3892, "step": 113 }, { "epoch": 0.4293785310734463, "grad_norm": 0.31937259435653687, "learning_rate": 4.553473684210527e-05, "loss": 0.4269, "step": 114 }, { "epoch": 0.4331450094161959, "grad_norm": 0.3130375146865845, "learning_rate": 4.500526315789474e-05, "loss": 0.426, "step": 115 }, { "epoch": 0.4369114877589454, "grad_norm": 0.3353985846042633, "learning_rate": 4.447578947368421e-05, "loss": 0.5051, "step": 116 }, { "epoch": 0.4406779661016949, "grad_norm": 0.36258843541145325, "learning_rate": 4.394631578947369e-05, "loss": 0.3839, "step": 117 }, { "epoch": 0.4444444444444444, "grad_norm": 0.33029577136039734, "learning_rate": 4.341684210526316e-05, "loss": 0.3584, "step": 118 }, { "epoch": 0.448210922787194, "grad_norm": 0.387517511844635, "learning_rate": 4.2887368421052636e-05, "loss": 0.4863, "step": 119 }, { "epoch": 0.4519774011299435, "grad_norm": 0.3572627007961273, "learning_rate": 4.2357894736842106e-05, "loss": 0.4378, "step": 120 }, { "epoch": 0.455743879472693, "grad_norm": 0.3738924562931061, "learning_rate": 4.182842105263158e-05, "loss": 0.4304, "step": 121 }, { "epoch": 0.4595103578154426, "grad_norm": 0.34912800788879395, "learning_rate": 4.1298947368421053e-05, "loss": 0.3864, "step": 122 }, { "epoch": 0.4632768361581921, "grad_norm": 0.3680974841117859, "learning_rate": 4.0769473684210524e-05, "loss": 0.4111, "step": 123 }, { "epoch": 0.4670433145009416, "grad_norm": 0.33224138617515564, "learning_rate": 4.024000000000001e-05, "loss": 0.3555, "step": 124 }, { "epoch": 0.4708097928436911, "grad_norm": 0.36883029341697693, "learning_rate": 3.971052631578948e-05, "loss": 0.347, "step": 125 }, { "epoch": 0.4745762711864407, "grad_norm": 0.38540467619895935, "learning_rate": 3.918105263157895e-05, "loss": 0.3472, "step": 126 }, { "epoch": 0.4783427495291902, "grad_norm": 0.37976881861686707, "learning_rate": 3.865157894736842e-05, "loss": 0.3864, "step": 127 }, { "epoch": 0.4821092278719397, "grad_norm": 0.3586125671863556, "learning_rate": 3.8122105263157896e-05, "loss": 0.3373, "step": 128 }, { "epoch": 0.4858757062146893, "grad_norm": 0.42825913429260254, "learning_rate": 3.759263157894737e-05, "loss": 0.4054, "step": 129 }, { "epoch": 0.4896421845574388, "grad_norm": 0.43774721026420593, "learning_rate": 3.7063157894736844e-05, "loss": 0.4975, "step": 130 }, { "epoch": 0.4934086629001883, "grad_norm": 0.4187086820602417, "learning_rate": 3.653368421052632e-05, "loss": 0.366, "step": 131 }, { "epoch": 0.4971751412429379, "grad_norm": 0.39257967472076416, "learning_rate": 3.600421052631579e-05, "loss": 0.3611, "step": 132 }, { "epoch": 0.5009416195856874, "grad_norm": 0.44108664989471436, "learning_rate": 3.547473684210526e-05, "loss": 0.3394, "step": 133 }, { "epoch": 0.504708097928437, "grad_norm": 0.3622487187385559, "learning_rate": 3.494526315789474e-05, "loss": 0.3364, "step": 134 }, { "epoch": 0.5084745762711864, "grad_norm": 0.395108163356781, "learning_rate": 3.4415789473684216e-05, "loss": 0.2752, "step": 135 }, { "epoch": 0.512241054613936, "grad_norm": 0.5722901821136475, "learning_rate": 3.3886315789473686e-05, "loss": 0.3307, "step": 136 }, { "epoch": 0.5160075329566854, "grad_norm": 0.42553991079330444, "learning_rate": 3.3356842105263156e-05, "loss": 0.3167, "step": 137 }, { "epoch": 0.519774011299435, "grad_norm": 0.4664463400840759, "learning_rate": 3.2827368421052634e-05, "loss": 0.3743, "step": 138 }, { "epoch": 0.5235404896421846, "grad_norm": 0.42213213443756104, "learning_rate": 3.2297894736842104e-05, "loss": 0.2897, "step": 139 }, { "epoch": 0.527306967984934, "grad_norm": 0.5967556238174438, "learning_rate": 3.176842105263158e-05, "loss": 0.4282, "step": 140 }, { "epoch": 0.5310734463276836, "grad_norm": 0.5029946565628052, "learning_rate": 3.123894736842106e-05, "loss": 0.3133, "step": 141 }, { "epoch": 0.5348399246704332, "grad_norm": 0.6357027888298035, "learning_rate": 3.070947368421053e-05, "loss": 0.3568, "step": 142 }, { "epoch": 0.5386064030131826, "grad_norm": 0.6102229952812195, "learning_rate": 3.018e-05, "loss": 0.3661, "step": 143 }, { "epoch": 0.5423728813559322, "grad_norm": 0.5800431370735168, "learning_rate": 2.9650526315789473e-05, "loss": 0.3332, "step": 144 }, { "epoch": 0.5461393596986818, "grad_norm": 0.6075107455253601, "learning_rate": 2.912105263157895e-05, "loss": 0.2608, "step": 145 }, { "epoch": 0.5499058380414312, "grad_norm": 0.6365634799003601, "learning_rate": 2.8591578947368424e-05, "loss": 0.3447, "step": 146 }, { "epoch": 0.5536723163841808, "grad_norm": 0.7621574997901917, "learning_rate": 2.8062105263157894e-05, "loss": 0.377, "step": 147 }, { "epoch": 0.5574387947269304, "grad_norm": 0.7059007287025452, "learning_rate": 2.753263157894737e-05, "loss": 0.269, "step": 148 }, { "epoch": 0.5612052730696798, "grad_norm": 1.0961761474609375, "learning_rate": 2.7003157894736845e-05, "loss": 0.4317, "step": 149 }, { "epoch": 0.5649717514124294, "grad_norm": 0.9471625089645386, "learning_rate": 2.6473684210526315e-05, "loss": 0.2856, "step": 150 }, { "epoch": 0.5649717514124294, "eval_loss": 0.389584481716156, "eval_runtime": 19.3315, "eval_samples_per_second": 5.794, "eval_steps_per_second": 1.448, "step": 150 }, { "epoch": 0.568738229755179, "grad_norm": 0.26727038621902466, "learning_rate": 2.5944210526315793e-05, "loss": 0.4036, "step": 151 }, { "epoch": 0.5725047080979284, "grad_norm": 0.24589797854423523, "learning_rate": 2.5414736842105266e-05, "loss": 0.3443, "step": 152 }, { "epoch": 0.576271186440678, "grad_norm": 0.3061113953590393, "learning_rate": 2.4885263157894737e-05, "loss": 0.4185, "step": 153 }, { "epoch": 0.5800376647834274, "grad_norm": 0.290894091129303, "learning_rate": 2.4355789473684214e-05, "loss": 0.4565, "step": 154 }, { "epoch": 0.583804143126177, "grad_norm": 0.3391144275665283, "learning_rate": 2.3826315789473684e-05, "loss": 0.4667, "step": 155 }, { "epoch": 0.5875706214689266, "grad_norm": 0.30165934562683105, "learning_rate": 2.3296842105263158e-05, "loss": 0.4308, "step": 156 }, { "epoch": 0.591337099811676, "grad_norm": 0.2832431495189667, "learning_rate": 2.2767368421052635e-05, "loss": 0.3862, "step": 157 }, { "epoch": 0.5951035781544256, "grad_norm": 0.28951308131217957, "learning_rate": 2.2237894736842105e-05, "loss": 0.3645, "step": 158 }, { "epoch": 0.5988700564971752, "grad_norm": 0.33745133876800537, "learning_rate": 2.170842105263158e-05, "loss": 0.4001, "step": 159 }, { "epoch": 0.6026365348399246, "grad_norm": 0.31061139702796936, "learning_rate": 2.1178947368421053e-05, "loss": 0.3586, "step": 160 }, { "epoch": 0.6064030131826742, "grad_norm": 0.3397316038608551, "learning_rate": 2.0649473684210527e-05, "loss": 0.4179, "step": 161 }, { "epoch": 0.6101694915254238, "grad_norm": 0.3633480966091156, "learning_rate": 2.0120000000000004e-05, "loss": 0.458, "step": 162 }, { "epoch": 0.6139359698681732, "grad_norm": 0.3560391068458557, "learning_rate": 1.9590526315789474e-05, "loss": 0.3905, "step": 163 }, { "epoch": 0.6177024482109228, "grad_norm": 0.37587136030197144, "learning_rate": 1.9061052631578948e-05, "loss": 0.4476, "step": 164 }, { "epoch": 0.6214689265536724, "grad_norm": 0.3361068665981293, "learning_rate": 1.8531578947368422e-05, "loss": 0.4138, "step": 165 }, { "epoch": 0.6252354048964218, "grad_norm": 0.3378278911113739, "learning_rate": 1.8002105263157896e-05, "loss": 0.3866, "step": 166 }, { "epoch": 0.6290018832391714, "grad_norm": 0.38653016090393066, "learning_rate": 1.747263157894737e-05, "loss": 0.4981, "step": 167 }, { "epoch": 0.632768361581921, "grad_norm": 0.35619762539863586, "learning_rate": 1.6943157894736843e-05, "loss": 0.3997, "step": 168 }, { "epoch": 0.6365348399246704, "grad_norm": 0.3622172474861145, "learning_rate": 1.6413684210526317e-05, "loss": 0.4018, "step": 169 }, { "epoch": 0.64030131826742, "grad_norm": 0.3765234351158142, "learning_rate": 1.588421052631579e-05, "loss": 0.302, "step": 170 }, { "epoch": 0.6440677966101694, "grad_norm": 0.36978664994239807, "learning_rate": 1.5354736842105264e-05, "loss": 0.3831, "step": 171 }, { "epoch": 0.647834274952919, "grad_norm": 0.35858282446861267, "learning_rate": 1.4825263157894736e-05, "loss": 0.3773, "step": 172 }, { "epoch": 0.6516007532956686, "grad_norm": 0.32193654775619507, "learning_rate": 1.4295789473684212e-05, "loss": 0.3053, "step": 173 }, { "epoch": 0.655367231638418, "grad_norm": 0.37463074922561646, "learning_rate": 1.3766315789473686e-05, "loss": 0.3728, "step": 174 }, { "epoch": 0.6591337099811676, "grad_norm": 0.3786180019378662, "learning_rate": 1.3236842105263158e-05, "loss": 0.3157, "step": 175 }, { "epoch": 0.6629001883239172, "grad_norm": 0.3969653248786926, "learning_rate": 1.2707368421052633e-05, "loss": 0.4222, "step": 176 }, { "epoch": 0.6666666666666666, "grad_norm": 0.38620397448539734, "learning_rate": 1.2177894736842107e-05, "loss": 0.3351, "step": 177 }, { "epoch": 0.6704331450094162, "grad_norm": 0.3854876756668091, "learning_rate": 1.1648421052631579e-05, "loss": 0.3766, "step": 178 }, { "epoch": 0.6741996233521658, "grad_norm": 0.45001623034477234, "learning_rate": 1.1118947368421053e-05, "loss": 0.5356, "step": 179 }, { "epoch": 0.6779661016949152, "grad_norm": 0.5555963516235352, "learning_rate": 1.0589473684210526e-05, "loss": 0.3512, "step": 180 }, { "epoch": 0.6817325800376648, "grad_norm": 0.385834276676178, "learning_rate": 1.0060000000000002e-05, "loss": 0.3159, "step": 181 }, { "epoch": 0.6854990583804144, "grad_norm": 0.4277232587337494, "learning_rate": 9.530526315789474e-06, "loss": 0.3922, "step": 182 }, { "epoch": 0.6892655367231638, "grad_norm": 0.4338568449020386, "learning_rate": 9.001052631578948e-06, "loss": 0.3263, "step": 183 }, { "epoch": 0.6930320150659134, "grad_norm": 0.4069223701953888, "learning_rate": 8.471578947368422e-06, "loss": 0.3175, "step": 184 }, { "epoch": 0.696798493408663, "grad_norm": 0.47060683369636536, "learning_rate": 7.942105263157895e-06, "loss": 0.3415, "step": 185 }, { "epoch": 0.7005649717514124, "grad_norm": 0.43340185284614563, "learning_rate": 7.412631578947368e-06, "loss": 0.3047, "step": 186 }, { "epoch": 0.704331450094162, "grad_norm": 0.5115451812744141, "learning_rate": 6.883157894736843e-06, "loss": 0.3594, "step": 187 }, { "epoch": 0.7080979284369114, "grad_norm": 0.4953676164150238, "learning_rate": 6.3536842105263166e-06, "loss": 0.4087, "step": 188 }, { "epoch": 0.711864406779661, "grad_norm": 0.531154990196228, "learning_rate": 5.8242105263157895e-06, "loss": 0.2525, "step": 189 }, { "epoch": 0.7156308851224106, "grad_norm": 0.4638945758342743, "learning_rate": 5.294736842105263e-06, "loss": 0.2697, "step": 190 }, { "epoch": 0.71939736346516, "grad_norm": 0.5047771334648132, "learning_rate": 4.765263157894737e-06, "loss": 0.3704, "step": 191 }, { "epoch": 0.7231638418079096, "grad_norm": 0.5970916152000427, "learning_rate": 4.235789473684211e-06, "loss": 0.2877, "step": 192 }, { "epoch": 0.7269303201506592, "grad_norm": 0.49476003646850586, "learning_rate": 3.706315789473684e-06, "loss": 0.3429, "step": 193 }, { "epoch": 0.7306967984934086, "grad_norm": 0.5486244559288025, "learning_rate": 3.1768421052631583e-06, "loss": 0.3828, "step": 194 }, { "epoch": 0.7344632768361582, "grad_norm": 0.6069429516792297, "learning_rate": 2.6473684210526316e-06, "loss": 0.2767, "step": 195 }, { "epoch": 0.7382297551789078, "grad_norm": 0.6400809288024902, "learning_rate": 2.1178947368421054e-06, "loss": 0.3056, "step": 196 }, { "epoch": 0.7419962335216572, "grad_norm": 0.5533387064933777, "learning_rate": 1.5884210526315791e-06, "loss": 0.2929, "step": 197 }, { "epoch": 0.7457627118644068, "grad_norm": 0.68741774559021, "learning_rate": 1.0589473684210527e-06, "loss": 0.2843, "step": 198 }, { "epoch": 0.7495291902071564, "grad_norm": 0.8008204102516174, "learning_rate": 5.294736842105263e-07, "loss": 0.2842, "step": 199 }, { "epoch": 0.7532956685499058, "grad_norm": 0.8741005659103394, "learning_rate": 0.0, "loss": 0.2125, "step": 200 }, { "epoch": 0.7532956685499058, "eval_loss": 0.3771364390850067, "eval_runtime": 18.5487, "eval_samples_per_second": 6.038, "eval_steps_per_second": 1.51, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.7886300094464e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }