|
{ |
|
"best_metric": 1.2174347639083862, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.02666133439978671, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00013330667199893355, |
|
"grad_norm": 1.0745736360549927, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4603, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00013330667199893355, |
|
"eval_loss": 1.7412927150726318, |
|
"eval_runtime": 893.9334, |
|
"eval_samples_per_second": 14.133, |
|
"eval_steps_per_second": 3.534, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0002666133439978671, |
|
"grad_norm": 1.1597387790679932, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3804, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0003999200159968006, |
|
"grad_norm": 1.2239619493484497, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4915, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0005332266879957342, |
|
"grad_norm": 1.4500244855880737, |
|
"learning_rate": 4e-05, |
|
"loss": 1.6533, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0006665333599946677, |
|
"grad_norm": 1.293084740638733, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4773, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0007998400319936012, |
|
"grad_norm": 1.2047165632247925, |
|
"learning_rate": 6e-05, |
|
"loss": 1.3702, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0009331467039925348, |
|
"grad_norm": 0.8491549491882324, |
|
"learning_rate": 7e-05, |
|
"loss": 1.3265, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0010664533759914684, |
|
"grad_norm": 0.9282706379890442, |
|
"learning_rate": 8e-05, |
|
"loss": 1.2982, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0011997600479904018, |
|
"grad_norm": 0.9100210666656494, |
|
"learning_rate": 9e-05, |
|
"loss": 1.3398, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0013330667199893355, |
|
"grad_norm": 0.9498695731163025, |
|
"learning_rate": 0.0001, |
|
"loss": 1.1851, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.001466373391988269, |
|
"grad_norm": 0.9965104460716248, |
|
"learning_rate": 9.999316524962345e-05, |
|
"loss": 1.2516, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0015996800639872025, |
|
"grad_norm": 0.8492946028709412, |
|
"learning_rate": 9.997266286704631e-05, |
|
"loss": 1.2752, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0017329867359861361, |
|
"grad_norm": 0.968284547328949, |
|
"learning_rate": 9.993849845741524e-05, |
|
"loss": 1.2821, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0018662934079850695, |
|
"grad_norm": 0.7806897163391113, |
|
"learning_rate": 9.989068136093873e-05, |
|
"loss": 1.1962, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.001999600079984003, |
|
"grad_norm": 0.8861862421035767, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 1.304, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.002132906751982937, |
|
"grad_norm": 0.8310438394546509, |
|
"learning_rate": 9.975414512725057e-05, |
|
"loss": 1.2358, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0022662134239818704, |
|
"grad_norm": 0.8757510781288147, |
|
"learning_rate": 9.966546331768191e-05, |
|
"loss": 1.4375, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0023995200959808036, |
|
"grad_norm": 1.1226414442062378, |
|
"learning_rate": 9.956320346634876e-05, |
|
"loss": 1.3794, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0025328267679797373, |
|
"grad_norm": 0.8201644420623779, |
|
"learning_rate": 9.944739353007344e-05, |
|
"loss": 1.2039, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.002666133439978671, |
|
"grad_norm": 0.854809045791626, |
|
"learning_rate": 9.931806517013612e-05, |
|
"loss": 1.2542, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0027994401119776045, |
|
"grad_norm": 0.9954699873924255, |
|
"learning_rate": 9.917525374361912e-05, |
|
"loss": 1.3179, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.002932746783976538, |
|
"grad_norm": 0.823428213596344, |
|
"learning_rate": 9.901899829374047e-05, |
|
"loss": 1.1712, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0030660534559754714, |
|
"grad_norm": 0.8914350271224976, |
|
"learning_rate": 9.884934153917997e-05, |
|
"loss": 1.1579, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.003199360127974405, |
|
"grad_norm": 0.8348805904388428, |
|
"learning_rate": 9.86663298624003e-05, |
|
"loss": 1.1415, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0033326667999733386, |
|
"grad_norm": 1.009377360343933, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 1.4218, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0034659734719722723, |
|
"grad_norm": 0.9172560572624207, |
|
"learning_rate": 9.826044551386744e-05, |
|
"loss": 1.2804, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.003599280143971206, |
|
"grad_norm": 0.826400876045227, |
|
"learning_rate": 9.803768380684242e-05, |
|
"loss": 1.1378, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.003732586815970139, |
|
"grad_norm": 0.9295492172241211, |
|
"learning_rate": 9.780178907671789e-05, |
|
"loss": 1.3835, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0038658934879690727, |
|
"grad_norm": 0.8225899934768677, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 1.1273, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.003999200159968006, |
|
"grad_norm": 0.8576282858848572, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 1.1702, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00413250683196694, |
|
"grad_norm": 0.926065981388092, |
|
"learning_rate": 9.701596950580806e-05, |
|
"loss": 1.2486, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.004265813503965874, |
|
"grad_norm": 0.8948851823806763, |
|
"learning_rate": 9.672822322997305e-05, |
|
"loss": 1.35, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.004399120175964807, |
|
"grad_norm": 0.9296471476554871, |
|
"learning_rate": 9.642770192448536e-05, |
|
"loss": 1.2109, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.004532426847963741, |
|
"grad_norm": 0.9554625153541565, |
|
"learning_rate": 9.611448774886924e-05, |
|
"loss": 1.2104, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0046657335199626745, |
|
"grad_norm": 0.9973741769790649, |
|
"learning_rate": 9.578866633275288e-05, |
|
"loss": 1.2216, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.004799040191961607, |
|
"grad_norm": 0.9852436780929565, |
|
"learning_rate": 9.545032675245813e-05, |
|
"loss": 1.2423, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.004932346863960541, |
|
"grad_norm": 1.0341085195541382, |
|
"learning_rate": 9.509956150664796e-05, |
|
"loss": 1.1927, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0050656535359594745, |
|
"grad_norm": 1.0837221145629883, |
|
"learning_rate": 9.473646649103818e-05, |
|
"loss": 1.2843, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.005198960207958408, |
|
"grad_norm": 1.0713090896606445, |
|
"learning_rate": 9.43611409721806e-05, |
|
"loss": 1.1197, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.005332266879957342, |
|
"grad_norm": 1.299558401107788, |
|
"learning_rate": 9.397368756032445e-05, |
|
"loss": 1.4558, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0054655735519562754, |
|
"grad_norm": 1.3224207162857056, |
|
"learning_rate": 9.357421218136386e-05, |
|
"loss": 1.5213, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.005598880223955209, |
|
"grad_norm": 1.0918453931808472, |
|
"learning_rate": 9.316282404787871e-05, |
|
"loss": 1.2473, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.005732186895954143, |
|
"grad_norm": 1.2147316932678223, |
|
"learning_rate": 9.273963562927695e-05, |
|
"loss": 1.2924, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.005865493567953076, |
|
"grad_norm": 1.1717326641082764, |
|
"learning_rate": 9.230476262104677e-05, |
|
"loss": 1.1122, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.00599880023995201, |
|
"grad_norm": 1.5002002716064453, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 1.3528, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.006132106911950943, |
|
"grad_norm": 1.1809282302856445, |
|
"learning_rate": 9.140044155740101e-05, |
|
"loss": 1.0353, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.006265413583949876, |
|
"grad_norm": 1.3819034099578857, |
|
"learning_rate": 9.093124073433463e-05, |
|
"loss": 1.3988, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.00639872025594881, |
|
"grad_norm": 2.4021060466766357, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 1.4357, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.006532026927947744, |
|
"grad_norm": 3.4780421257019043, |
|
"learning_rate": 8.995939984474624e-05, |
|
"loss": 1.3491, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.006665333599946677, |
|
"grad_norm": 4.60449743270874, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 1.6521, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.006665333599946677, |
|
"eval_loss": 1.2686829566955566, |
|
"eval_runtime": 897.6977, |
|
"eval_samples_per_second": 14.074, |
|
"eval_steps_per_second": 3.519, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.006798640271945611, |
|
"grad_norm": 0.8523968458175659, |
|
"learning_rate": 8.894386393810563e-05, |
|
"loss": 1.207, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0069319469439445445, |
|
"grad_norm": 0.8501446843147278, |
|
"learning_rate": 8.842005554284296e-05, |
|
"loss": 1.4451, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.007065253615943478, |
|
"grad_norm": 0.6356542110443115, |
|
"learning_rate": 8.788574348801675e-05, |
|
"loss": 1.126, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.007198560287942412, |
|
"grad_norm": 0.7071014642715454, |
|
"learning_rate": 8.73410738492077e-05, |
|
"loss": 1.3005, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.007331866959941345, |
|
"grad_norm": 0.7470281720161438, |
|
"learning_rate": 8.678619553365659e-05, |
|
"loss": 1.1553, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.007465173631940278, |
|
"grad_norm": 0.7439374327659607, |
|
"learning_rate": 8.622126023955446e-05, |
|
"loss": 1.2164, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.007598480303939212, |
|
"grad_norm": 0.789055585861206, |
|
"learning_rate": 8.564642241456986e-05, |
|
"loss": 1.3809, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0077317869759381454, |
|
"grad_norm": 0.7421045303344727, |
|
"learning_rate": 8.506183921362443e-05, |
|
"loss": 1.3091, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.007865093647937079, |
|
"grad_norm": 0.7292497754096985, |
|
"learning_rate": 8.44676704559283e-05, |
|
"loss": 1.1928, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.007998400319936013, |
|
"grad_norm": 0.7535648345947266, |
|
"learning_rate": 8.386407858128706e-05, |
|
"loss": 1.3245, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.008131706991934946, |
|
"grad_norm": 0.7184985280036926, |
|
"learning_rate": 8.32512286056924e-05, |
|
"loss": 1.2755, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.00826501366393388, |
|
"grad_norm": 0.705776035785675, |
|
"learning_rate": 8.262928807620843e-05, |
|
"loss": 1.1553, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.008398320335932814, |
|
"grad_norm": 0.680077850818634, |
|
"learning_rate": 8.199842702516583e-05, |
|
"loss": 1.0244, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.008531627007931747, |
|
"grad_norm": 0.7714347839355469, |
|
"learning_rate": 8.135881792367686e-05, |
|
"loss": 1.2602, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.00866493367993068, |
|
"grad_norm": 0.8244078755378723, |
|
"learning_rate": 8.07106356344834e-05, |
|
"loss": 1.2375, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.008798240351929614, |
|
"grad_norm": 0.8791713714599609, |
|
"learning_rate": 8.005405736415126e-05, |
|
"loss": 1.2654, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.008931547023928548, |
|
"grad_norm": 0.8415705561637878, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 1.2845, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.009064853695927482, |
|
"grad_norm": 0.7405332326889038, |
|
"learning_rate": 7.871643313414718e-05, |
|
"loss": 1.1162, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.009198160367926415, |
|
"grad_norm": 0.7569639682769775, |
|
"learning_rate": 7.803575286758364e-05, |
|
"loss": 1.1168, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.009331467039925349, |
|
"grad_norm": 0.8448635935783386, |
|
"learning_rate": 7.734740790612136e-05, |
|
"loss": 1.3316, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.009464773711924283, |
|
"grad_norm": 0.8111772537231445, |
|
"learning_rate": 7.66515864363997e-05, |
|
"loss": 1.085, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.009598080383923215, |
|
"grad_norm": 0.8686326742172241, |
|
"learning_rate": 7.594847868906076e-05, |
|
"loss": 1.3275, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.009731387055922148, |
|
"grad_norm": 0.8204224109649658, |
|
"learning_rate": 7.52382768867422e-05, |
|
"loss": 1.203, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.009864693727921082, |
|
"grad_norm": 0.7859241962432861, |
|
"learning_rate": 7.452117519152542e-05, |
|
"loss": 1.1295, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.009998000399920015, |
|
"grad_norm": 0.8578535914421082, |
|
"learning_rate": 7.379736965185368e-05, |
|
"loss": 1.2775, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.010131307071918949, |
|
"grad_norm": 0.8531782031059265, |
|
"learning_rate": 7.30670581489344e-05, |
|
"loss": 1.1732, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.010264613743917883, |
|
"grad_norm": 0.7920311093330383, |
|
"learning_rate": 7.233044034264034e-05, |
|
"loss": 1.2004, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.010397920415916816, |
|
"grad_norm": 0.8537325263023376, |
|
"learning_rate": 7.158771761692464e-05, |
|
"loss": 1.2338, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.01053122708791575, |
|
"grad_norm": 0.8466278910636902, |
|
"learning_rate": 7.083909302476453e-05, |
|
"loss": 1.1185, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.010664533759914684, |
|
"grad_norm": 0.9058700203895569, |
|
"learning_rate": 7.008477123264848e-05, |
|
"loss": 1.229, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.010797840431913617, |
|
"grad_norm": 0.8695547580718994, |
|
"learning_rate": 6.932495846462261e-05, |
|
"loss": 1.2438, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.010931147103912551, |
|
"grad_norm": 0.9314249157905579, |
|
"learning_rate": 6.855986244591104e-05, |
|
"loss": 1.2332, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.011064453775911485, |
|
"grad_norm": 0.9494042992591858, |
|
"learning_rate": 6.778969234612584e-05, |
|
"loss": 1.2074, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.011197760447910418, |
|
"grad_norm": 0.9084473252296448, |
|
"learning_rate": 6.701465872208216e-05, |
|
"loss": 1.1922, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.011331067119909352, |
|
"grad_norm": 0.9706754088401794, |
|
"learning_rate": 6.623497346023418e-05, |
|
"loss": 1.1319, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.011464373791908285, |
|
"grad_norm": 0.9327569603919983, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 1.3322, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.011597680463907219, |
|
"grad_norm": 1.0464143753051758, |
|
"learning_rate": 6.466250186922325e-05, |
|
"loss": 1.287, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.011730987135906153, |
|
"grad_norm": 1.3841379880905151, |
|
"learning_rate": 6.387014543809223e-05, |
|
"loss": 1.214, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.011864293807905086, |
|
"grad_norm": 1.0763241052627563, |
|
"learning_rate": 6.307399704769099e-05, |
|
"loss": 1.1554, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.01199760047990402, |
|
"grad_norm": 0.9503827095031738, |
|
"learning_rate": 6.227427435703997e-05, |
|
"loss": 1.1839, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.012130907151902954, |
|
"grad_norm": 1.066009759902954, |
|
"learning_rate": 6.147119600233758e-05, |
|
"loss": 1.3296, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.012264213823901885, |
|
"grad_norm": 1.0369770526885986, |
|
"learning_rate": 6.066498153718735e-05, |
|
"loss": 1.153, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.012397520495900819, |
|
"grad_norm": 1.1142950057983398, |
|
"learning_rate": 5.985585137257401e-05, |
|
"loss": 1.2537, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.012530827167899753, |
|
"grad_norm": 1.1302510499954224, |
|
"learning_rate": 5.90440267166055e-05, |
|
"loss": 1.227, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.012664133839898686, |
|
"grad_norm": 1.324735403060913, |
|
"learning_rate": 5.8229729514036705e-05, |
|
"loss": 1.375, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.01279744051189762, |
|
"grad_norm": 1.0952991247177124, |
|
"learning_rate": 5.74131823855921e-05, |
|
"loss": 1.0373, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.012930747183896554, |
|
"grad_norm": 1.7994344234466553, |
|
"learning_rate": 5.6594608567103456e-05, |
|
"loss": 1.531, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.013064053855895487, |
|
"grad_norm": 1.9991344213485718, |
|
"learning_rate": 5.577423184847932e-05, |
|
"loss": 1.2441, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.013197360527894421, |
|
"grad_norm": 1.8803925514221191, |
|
"learning_rate": 5.495227651252315e-05, |
|
"loss": 1.238, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.013330667199893355, |
|
"grad_norm": 2.514798164367676, |
|
"learning_rate": 5.4128967273616625e-05, |
|
"loss": 1.2592, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.013330667199893355, |
|
"eval_loss": 1.235968828201294, |
|
"eval_runtime": 897.8126, |
|
"eval_samples_per_second": 14.072, |
|
"eval_steps_per_second": 3.519, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.013463973871892288, |
|
"grad_norm": 0.6205466985702515, |
|
"learning_rate": 5.330452921628497e-05, |
|
"loss": 1.2382, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.013597280543891222, |
|
"grad_norm": 0.6360917091369629, |
|
"learning_rate": 5.247918773366112e-05, |
|
"loss": 1.0933, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.013730587215890155, |
|
"grad_norm": 0.6676458716392517, |
|
"learning_rate": 5.165316846586541e-05, |
|
"loss": 1.2596, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.013863893887889089, |
|
"grad_norm": 0.6764618158340454, |
|
"learning_rate": 5.0826697238317935e-05, |
|
"loss": 1.2296, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.013997200559888023, |
|
"grad_norm": 0.6883326768875122, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1956, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.014130507231886956, |
|
"grad_norm": 0.6515330672264099, |
|
"learning_rate": 4.917330276168208e-05, |
|
"loss": 1.1945, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.01426381390388589, |
|
"grad_norm": 0.7443110346794128, |
|
"learning_rate": 4.834683153413459e-05, |
|
"loss": 1.2503, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.014397120575884824, |
|
"grad_norm": 0.6871118545532227, |
|
"learning_rate": 4.7520812266338885e-05, |
|
"loss": 1.1663, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.014530427247883757, |
|
"grad_norm": 0.7479511499404907, |
|
"learning_rate": 4.669547078371504e-05, |
|
"loss": 1.4153, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.01466373391988269, |
|
"grad_norm": 0.6945416927337646, |
|
"learning_rate": 4.5871032726383386e-05, |
|
"loss": 1.2392, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.014797040591881624, |
|
"grad_norm": 0.6980699300765991, |
|
"learning_rate": 4.504772348747687e-05, |
|
"loss": 1.3174, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.014930347263880556, |
|
"grad_norm": 0.7077943086624146, |
|
"learning_rate": 4.4225768151520694e-05, |
|
"loss": 1.201, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.01506365393587949, |
|
"grad_norm": 0.7695159316062927, |
|
"learning_rate": 4.3405391432896555e-05, |
|
"loss": 1.386, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.015196960607878424, |
|
"grad_norm": 0.6997578144073486, |
|
"learning_rate": 4.2586817614407895e-05, |
|
"loss": 1.1997, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.015330267279877357, |
|
"grad_norm": 0.7252641916275024, |
|
"learning_rate": 4.17702704859633e-05, |
|
"loss": 1.1792, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.015463573951876291, |
|
"grad_norm": 0.7371767163276672, |
|
"learning_rate": 4.095597328339452e-05, |
|
"loss": 1.2095, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.015596880623875225, |
|
"grad_norm": 0.7858007550239563, |
|
"learning_rate": 4.0144148627425993e-05, |
|
"loss": 1.3076, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.015730187295874158, |
|
"grad_norm": 0.7438164353370667, |
|
"learning_rate": 3.933501846281267e-05, |
|
"loss": 1.2489, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.015863493967873094, |
|
"grad_norm": 0.7789096236228943, |
|
"learning_rate": 3.852880399766243e-05, |
|
"loss": 1.1782, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.015996800639872025, |
|
"grad_norm": 0.7536317110061646, |
|
"learning_rate": 3.772572564296005e-05, |
|
"loss": 1.1551, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01613010731187096, |
|
"grad_norm": 0.8432468771934509, |
|
"learning_rate": 3.6926002952309016e-05, |
|
"loss": 1.3212, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.016263413983869893, |
|
"grad_norm": 0.8032596707344055, |
|
"learning_rate": 3.612985456190778e-05, |
|
"loss": 1.2541, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.016396720655868825, |
|
"grad_norm": 0.8486832976341248, |
|
"learning_rate": 3.533749813077677e-05, |
|
"loss": 1.3336, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.01653002732786776, |
|
"grad_norm": 0.7489065527915955, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 1.057, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.016663333999866692, |
|
"grad_norm": 0.9419240355491638, |
|
"learning_rate": 3.3765026539765834e-05, |
|
"loss": 1.3496, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.016796640671865627, |
|
"grad_norm": 0.8470003008842468, |
|
"learning_rate": 3.298534127791785e-05, |
|
"loss": 1.1591, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.01692994734386456, |
|
"grad_norm": 0.8417698740959167, |
|
"learning_rate": 3.221030765387417e-05, |
|
"loss": 1.1579, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.017063254015863494, |
|
"grad_norm": 0.8789008855819702, |
|
"learning_rate": 3.144013755408895e-05, |
|
"loss": 1.2494, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.017196560687862426, |
|
"grad_norm": 0.8741716742515564, |
|
"learning_rate": 3.0675041535377405e-05, |
|
"loss": 1.2255, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.01732986735986136, |
|
"grad_norm": 0.8465598821640015, |
|
"learning_rate": 2.991522876735154e-05, |
|
"loss": 1.2017, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.017463174031860294, |
|
"grad_norm": 0.8675022125244141, |
|
"learning_rate": 2.916090697523549e-05, |
|
"loss": 1.1874, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.01759648070385923, |
|
"grad_norm": 0.9815752506256104, |
|
"learning_rate": 2.8412282383075363e-05, |
|
"loss": 1.4558, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.01772978737585816, |
|
"grad_norm": 0.8390645384788513, |
|
"learning_rate": 2.766955965735968e-05, |
|
"loss": 1.2183, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.017863094047857096, |
|
"grad_norm": 0.9450867772102356, |
|
"learning_rate": 2.693294185106562e-05, |
|
"loss": 1.2721, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.017996400719856028, |
|
"grad_norm": 0.8508359789848328, |
|
"learning_rate": 2.6202630348146324e-05, |
|
"loss": 1.1076, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.018129707391854964, |
|
"grad_norm": 0.9367918372154236, |
|
"learning_rate": 2.547882480847461e-05, |
|
"loss": 1.1246, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.018263014063853895, |
|
"grad_norm": 0.9508082270622253, |
|
"learning_rate": 2.476172311325783e-05, |
|
"loss": 1.1096, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.01839632073585283, |
|
"grad_norm": 0.9181338548660278, |
|
"learning_rate": 2.405152131093926e-05, |
|
"loss": 1.1072, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.018529627407851763, |
|
"grad_norm": 0.9841213226318359, |
|
"learning_rate": 2.3348413563600325e-05, |
|
"loss": 1.1864, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.018662934079850698, |
|
"grad_norm": 0.9663724303245544, |
|
"learning_rate": 2.2652592093878666e-05, |
|
"loss": 1.1096, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01879624075184963, |
|
"grad_norm": 0.9832901358604431, |
|
"learning_rate": 2.196424713241637e-05, |
|
"loss": 1.2234, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.018929547423848565, |
|
"grad_norm": 1.0503119230270386, |
|
"learning_rate": 2.128356686585282e-05, |
|
"loss": 1.2802, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.019062854095847497, |
|
"grad_norm": 1.010730504989624, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 1.1259, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.01919616076784643, |
|
"grad_norm": 0.9640291929244995, |
|
"learning_rate": 1.9945942635848748e-05, |
|
"loss": 1.0263, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.019329467439845364, |
|
"grad_norm": 1.2106753587722778, |
|
"learning_rate": 1.928936436551661e-05, |
|
"loss": 1.34, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.019462774111844296, |
|
"grad_norm": 1.2731579542160034, |
|
"learning_rate": 1.8641182076323148e-05, |
|
"loss": 1.235, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.019596080783843232, |
|
"grad_norm": 1.3919060230255127, |
|
"learning_rate": 1.800157297483417e-05, |
|
"loss": 1.2402, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.019729387455842164, |
|
"grad_norm": 1.4603452682495117, |
|
"learning_rate": 1.7370711923791567e-05, |
|
"loss": 1.115, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.0198626941278411, |
|
"grad_norm": 2.7993760108947754, |
|
"learning_rate": 1.6748771394307585e-05, |
|
"loss": 1.3494, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.01999600079984003, |
|
"grad_norm": 2.829930543899536, |
|
"learning_rate": 1.6135921418712956e-05, |
|
"loss": 1.0537, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01999600079984003, |
|
"eval_loss": 1.2199597358703613, |
|
"eval_runtime": 897.4455, |
|
"eval_samples_per_second": 14.078, |
|
"eval_steps_per_second": 3.52, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.020129307471838966, |
|
"grad_norm": 0.5059590339660645, |
|
"learning_rate": 1.553232954407171e-05, |
|
"loss": 1.0344, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.020262614143837898, |
|
"grad_norm": 0.6080087423324585, |
|
"learning_rate": 1.4938160786375572e-05, |
|
"loss": 1.2128, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.020395920815836834, |
|
"grad_norm": 0.7243260145187378, |
|
"learning_rate": 1.435357758543015e-05, |
|
"loss": 1.3855, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.020529227487835765, |
|
"grad_norm": 0.6286957859992981, |
|
"learning_rate": 1.3778739760445552e-05, |
|
"loss": 1.2555, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.0206625341598347, |
|
"grad_norm": 0.6662344336509705, |
|
"learning_rate": 1.3213804466343421e-05, |
|
"loss": 1.1873, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.020795840831833633, |
|
"grad_norm": 0.6518131494522095, |
|
"learning_rate": 1.2658926150792322e-05, |
|
"loss": 1.2023, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.020929147503832568, |
|
"grad_norm": 0.6527888178825378, |
|
"learning_rate": 1.2114256511983274e-05, |
|
"loss": 1.1803, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.0210624541758315, |
|
"grad_norm": 0.721267580986023, |
|
"learning_rate": 1.157994445715706e-05, |
|
"loss": 1.362, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.021195760847830435, |
|
"grad_norm": 0.7137548327445984, |
|
"learning_rate": 1.1056136061894384e-05, |
|
"loss": 1.4078, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.021329067519829367, |
|
"grad_norm": 0.7020995616912842, |
|
"learning_rate": 1.0542974530180327e-05, |
|
"loss": 1.2114, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.021462374191828303, |
|
"grad_norm": 0.7052568793296814, |
|
"learning_rate": 1.0040600155253765e-05, |
|
"loss": 1.2552, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.021595680863827234, |
|
"grad_norm": 0.702303946018219, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 1.0904, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.021728987535826166, |
|
"grad_norm": 0.7139044404029846, |
|
"learning_rate": 9.068759265665384e-06, |
|
"loss": 1.2769, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.021862294207825102, |
|
"grad_norm": 0.795620322227478, |
|
"learning_rate": 8.599558442598998e-06, |
|
"loss": 1.2366, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.021995600879824034, |
|
"grad_norm": 0.7649041414260864, |
|
"learning_rate": 8.141676086873572e-06, |
|
"loss": 1.135, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.02212890755182297, |
|
"grad_norm": 0.8941980004310608, |
|
"learning_rate": 7.695237378953223e-06, |
|
"loss": 1.4213, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.0222622142238219, |
|
"grad_norm": 0.7104661464691162, |
|
"learning_rate": 7.260364370723044e-06, |
|
"loss": 1.1501, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.022395520895820836, |
|
"grad_norm": 0.767971396446228, |
|
"learning_rate": 6.837175952121306e-06, |
|
"loss": 1.2285, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.022528827567819768, |
|
"grad_norm": 0.8345792889595032, |
|
"learning_rate": 6.425787818636131e-06, |
|
"loss": 1.3538, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.022662134239818704, |
|
"grad_norm": 0.7592571973800659, |
|
"learning_rate": 6.026312439675552e-06, |
|
"loss": 1.138, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.022795440911817635, |
|
"grad_norm": 0.7913587689399719, |
|
"learning_rate": 5.6388590278194096e-06, |
|
"loss": 1.274, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.02292874758381657, |
|
"grad_norm": 0.8551506400108337, |
|
"learning_rate": 5.263533508961827e-06, |
|
"loss": 1.3539, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.023062054255815503, |
|
"grad_norm": 0.826088547706604, |
|
"learning_rate": 4.900438493352055e-06, |
|
"loss": 1.2791, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.023195360927814438, |
|
"grad_norm": 0.8231903314590454, |
|
"learning_rate": 4.549673247541875e-06, |
|
"loss": 1.2643, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.02332866759981337, |
|
"grad_norm": 0.8636263012886047, |
|
"learning_rate": 4.2113336672471245e-06, |
|
"loss": 1.1534, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.023461974271812305, |
|
"grad_norm": 0.8509513735771179, |
|
"learning_rate": 3.885512251130763e-06, |
|
"loss": 1.1875, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.023595280943811237, |
|
"grad_norm": 0.8056614995002747, |
|
"learning_rate": 3.5722980755146517e-06, |
|
"loss": 1.2276, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.023728587615810173, |
|
"grad_norm": 0.906745433807373, |
|
"learning_rate": 3.271776770026963e-06, |
|
"loss": 1.2333, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.023861894287809104, |
|
"grad_norm": 0.8647125959396362, |
|
"learning_rate": 2.9840304941919415e-06, |
|
"loss": 1.1878, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.02399520095980804, |
|
"grad_norm": 1.030241847038269, |
|
"learning_rate": 2.7091379149682685e-06, |
|
"loss": 1.3258, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.024128507631806972, |
|
"grad_norm": 0.8133326172828674, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 1.1028, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.024261814303805907, |
|
"grad_norm": 0.9469070434570312, |
|
"learning_rate": 2.1982109232821178e-06, |
|
"loss": 1.2033, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.02439512097580484, |
|
"grad_norm": 1.0034525394439697, |
|
"learning_rate": 1.962316193157593e-06, |
|
"loss": 1.3276, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.02452842764780377, |
|
"grad_norm": 0.9274887442588806, |
|
"learning_rate": 1.7395544861325718e-06, |
|
"loss": 1.2858, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.024661734319802706, |
|
"grad_norm": 0.9067505598068237, |
|
"learning_rate": 1.5299867030334814e-06, |
|
"loss": 1.128, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.024795040991801638, |
|
"grad_norm": 0.8848114609718323, |
|
"learning_rate": 1.333670137599713e-06, |
|
"loss": 1.2095, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.024928347663800574, |
|
"grad_norm": 1.0555644035339355, |
|
"learning_rate": 1.1506584608200367e-06, |
|
"loss": 1.3337, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.025061654335799505, |
|
"grad_norm": 0.9022650718688965, |
|
"learning_rate": 9.810017062595322e-07, |
|
"loss": 1.0523, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.02519496100779844, |
|
"grad_norm": 1.038295865058899, |
|
"learning_rate": 8.247462563808817e-07, |
|
"loss": 1.2955, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.025328267679797373, |
|
"grad_norm": 1.0671110153198242, |
|
"learning_rate": 6.819348298638839e-07, |
|
"loss": 1.2303, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.025461574351796308, |
|
"grad_norm": 1.0591447353363037, |
|
"learning_rate": 5.526064699265753e-07, |
|
"loss": 1.2003, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.02559488102379524, |
|
"grad_norm": 1.053215742111206, |
|
"learning_rate": 4.367965336512403e-07, |
|
"loss": 1.2151, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.025728187695794175, |
|
"grad_norm": 1.0813398361206055, |
|
"learning_rate": 3.3453668231809286e-07, |
|
"loss": 1.2032, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.025861494367793107, |
|
"grad_norm": 1.0762261152267456, |
|
"learning_rate": 2.458548727494292e-07, |
|
"loss": 1.075, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.025994801039792043, |
|
"grad_norm": 1.1010886430740356, |
|
"learning_rate": 1.7077534966650766e-07, |
|
"loss": 1.1568, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.026128107711790974, |
|
"grad_norm": 1.2374165058135986, |
|
"learning_rate": 1.0931863906127327e-07, |
|
"loss": 1.2323, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.02626141438378991, |
|
"grad_norm": 1.644420862197876, |
|
"learning_rate": 6.150154258476315e-08, |
|
"loss": 1.2802, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.026394721055788842, |
|
"grad_norm": 1.7157775163650513, |
|
"learning_rate": 2.7337132953697554e-08, |
|
"loss": 1.2428, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.026528027727787777, |
|
"grad_norm": 2.5402917861938477, |
|
"learning_rate": 6.834750376549792e-09, |
|
"loss": 1.2513, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.02666133439978671, |
|
"grad_norm": 3.6814701557159424, |
|
"learning_rate": 0.0, |
|
"loss": 1.0864, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02666133439978671, |
|
"eval_loss": 1.2174347639083862, |
|
"eval_runtime": 897.4909, |
|
"eval_samples_per_second": 14.077, |
|
"eval_steps_per_second": 3.52, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.84377737068544e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|