|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.03965551772481474, |
|
"eval_steps": 50, |
|
"global_step": 99, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0004005607850991388, |
|
"grad_norm": 2.4344990253448486, |
|
"learning_rate": 5e-05, |
|
"loss": 3.1012, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0004005607850991388, |
|
"eval_loss": 3.0820372104644775, |
|
"eval_runtime": 28.6956, |
|
"eval_samples_per_second": 36.661, |
|
"eval_steps_per_second": 18.33, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0008011215701982776, |
|
"grad_norm": 1.7689874172210693, |
|
"learning_rate": 0.0001, |
|
"loss": 2.9772, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0012016823552974164, |
|
"grad_norm": 2.805964708328247, |
|
"learning_rate": 0.00015, |
|
"loss": 3.4926, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0016022431403965552, |
|
"grad_norm": 2.956672430038452, |
|
"learning_rate": 0.0002, |
|
"loss": 3.6622, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.002002803925495694, |
|
"grad_norm": 2.5529518127441406, |
|
"learning_rate": 0.00025, |
|
"loss": 2.9273, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.002403364710594833, |
|
"grad_norm": 1.831852912902832, |
|
"learning_rate": 0.0003, |
|
"loss": 2.5788, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0028039254956939716, |
|
"grad_norm": 1.6226868629455566, |
|
"learning_rate": 0.00035, |
|
"loss": 2.5177, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0032044862807931104, |
|
"grad_norm": 2.354496955871582, |
|
"learning_rate": 0.0004, |
|
"loss": 2.5655, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0036050470658922492, |
|
"grad_norm": 2.2879300117492676, |
|
"learning_rate": 0.00045000000000000004, |
|
"loss": 2.5502, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.004005607850991388, |
|
"grad_norm": 2.574815034866333, |
|
"learning_rate": 0.0005, |
|
"loss": 2.2454, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004406168636090527, |
|
"grad_norm": 1.6621110439300537, |
|
"learning_rate": 0.0004998442655654946, |
|
"loss": 1.8034, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.004806729421189666, |
|
"grad_norm": 1.984192132949829, |
|
"learning_rate": 0.0004993772562876909, |
|
"loss": 2.6323, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0052072902062888045, |
|
"grad_norm": 2.288874864578247, |
|
"learning_rate": 0.0004985995540019955, |
|
"loss": 1.9088, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.005607850991387943, |
|
"grad_norm": 1.5363634824752808, |
|
"learning_rate": 0.0004975121276286136, |
|
"loss": 2.0859, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.006008411776487082, |
|
"grad_norm": 1.1328516006469727, |
|
"learning_rate": 0.0004961163319653958, |
|
"loss": 1.9693, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.006408972561586221, |
|
"grad_norm": 1.3997936248779297, |
|
"learning_rate": 0.0004944139059999286, |
|
"loss": 1.9882, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00680953334668536, |
|
"grad_norm": 1.317238688468933, |
|
"learning_rate": 0.000492406970742972, |
|
"loss": 1.9312, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0072100941317844985, |
|
"grad_norm": 1.458006501197815, |
|
"learning_rate": 0.0004900980265859448, |
|
"loss": 2.4625, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.007610654916883637, |
|
"grad_norm": 0.970038115978241, |
|
"learning_rate": 0.0004874899501857477, |
|
"loss": 2.2326, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.008011215701982776, |
|
"grad_norm": 1.6142843961715698, |
|
"learning_rate": 0.00048458599088080736, |
|
"loss": 1.9539, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.008411776487081914, |
|
"grad_norm": 1.353042483329773, |
|
"learning_rate": 0.0004813897666428053, |
|
"loss": 2.335, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.008812337272181054, |
|
"grad_norm": 1.423920750617981, |
|
"learning_rate": 0.00047790525956913543, |
|
"loss": 1.7413, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.009212898057280192, |
|
"grad_norm": 1.5239485502243042, |
|
"learning_rate": 0.0004741368109217071, |
|
"loss": 2.2131, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.009613458842379331, |
|
"grad_norm": 1.8239572048187256, |
|
"learning_rate": 0.00047008911571827283, |
|
"loss": 1.7396, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01001401962747847, |
|
"grad_norm": 1.0884772539138794, |
|
"learning_rate": 0.00046576721688302105, |
|
"loss": 2.2057, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.010414580412577609, |
|
"grad_norm": 1.4631233215332031, |
|
"learning_rate": 0.0004611764989637205, |
|
"loss": 1.9047, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.010815141197676747, |
|
"grad_norm": 1.1582131385803223, |
|
"learning_rate": 0.0004563226814232444, |
|
"loss": 2.5048, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.011215701982775887, |
|
"grad_norm": 1.416812777519226, |
|
"learning_rate": 0.0004512118115138315, |
|
"loss": 1.8986, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.011616262767875024, |
|
"grad_norm": 1.1690479516983032, |
|
"learning_rate": 0.0004458502567429631, |
|
"loss": 2.1514, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.012016823552974164, |
|
"grad_norm": 1.16459059715271, |
|
"learning_rate": 0.00044024469694024196, |
|
"loss": 1.8639, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.012417384338073302, |
|
"grad_norm": 1.056779384613037, |
|
"learning_rate": 0.00043440211593515554, |
|
"loss": 1.9256, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.012817945123172442, |
|
"grad_norm": 1.5517312288284302, |
|
"learning_rate": 0.0004283297928560951, |
|
"loss": 1.9297, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01321850590827158, |
|
"grad_norm": 1.3826098442077637, |
|
"learning_rate": 0.0004220352930614672, |
|
"loss": 2.282, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.01361906669337072, |
|
"grad_norm": 1.194398283958435, |
|
"learning_rate": 0.00041552645871420013, |
|
"loss": 2.2492, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.014019627478469857, |
|
"grad_norm": 1.8520417213439941, |
|
"learning_rate": 0.00040881139901138467, |
|
"loss": 2.4, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.014420188263568997, |
|
"grad_norm": 1.278959035873413, |
|
"learning_rate": 0.00040189848008122475, |
|
"loss": 2.1325, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.014820749048668135, |
|
"grad_norm": 1.786801815032959, |
|
"learning_rate": 0.00039479631455988334, |
|
"loss": 2.0348, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.015221309833767275, |
|
"grad_norm": 1.3184572458267212, |
|
"learning_rate": 0.0003875137508612103, |
|
"loss": 2.0454, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.015621870618866412, |
|
"grad_norm": 1.1530405282974243, |
|
"learning_rate": 0.00038005986215272055, |
|
"loss": 2.2434, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.016022431403965552, |
|
"grad_norm": 1.4723411798477173, |
|
"learning_rate": 0.0003724439350515571, |
|
"loss": 1.8833, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.016422992189064692, |
|
"grad_norm": 1.6857566833496094, |
|
"learning_rate": 0.0003646754580545226, |
|
"loss": 2.2384, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.016823552974163828, |
|
"grad_norm": 1.2251979112625122, |
|
"learning_rate": 0.000356764109716594, |
|
"loss": 2.5669, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.017224113759262968, |
|
"grad_norm": 1.5817160606384277, |
|
"learning_rate": 0.00034871974659264783, |
|
"loss": 2.5998, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.017624674544362107, |
|
"grad_norm": 1.326270580291748, |
|
"learning_rate": 0.0003405523909574206, |
|
"loss": 2.2238, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.018025235329461247, |
|
"grad_norm": 0.9494209885597229, |
|
"learning_rate": 0.0003322722183190025, |
|
"loss": 2.0566, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.018425796114560383, |
|
"grad_norm": 1.1728250980377197, |
|
"learning_rate": 0.0003238895447414211, |
|
"loss": 1.697, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.018826356899659523, |
|
"grad_norm": 1.5986175537109375, |
|
"learning_rate": 0.0003154148139921102, |
|
"loss": 1.7549, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.019226917684758663, |
|
"grad_norm": 1.1737697124481201, |
|
"learning_rate": 0.00030685858453027663, |
|
"loss": 1.7618, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.019627478469857802, |
|
"grad_norm": 1.0616875886917114, |
|
"learning_rate": 0.0002982315163523742, |
|
"loss": 2.3967, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.02002803925495694, |
|
"grad_norm": 1.4347580671310425, |
|
"learning_rate": 0.000289544357711076, |
|
"loss": 1.7634, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02002803925495694, |
|
"eval_loss": 2.012052536010742, |
|
"eval_runtime": 28.5887, |
|
"eval_samples_per_second": 36.798, |
|
"eval_steps_per_second": 18.399, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.020428600040056078, |
|
"grad_norm": 1.5467963218688965, |
|
"learning_rate": 0.0002808079317242896, |
|
"loss": 2.0926, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.020829160825155218, |
|
"grad_norm": 1.1608525514602661, |
|
"learning_rate": 0.0002720331228909005, |
|
"loss": 1.9368, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.021229721610254357, |
|
"grad_norm": 1.5125212669372559, |
|
"learning_rate": 0.00026323086353004075, |
|
"loss": 2.2591, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.021630282395353494, |
|
"grad_norm": 1.141327142715454, |
|
"learning_rate": 0.0002544121201607822, |
|
"loss": 1.8238, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.022030843180452633, |
|
"grad_norm": 1.1864770650863647, |
|
"learning_rate": 0.00024558787983921783, |
|
"loss": 1.9923, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.022431403965551773, |
|
"grad_norm": 1.2866952419281006, |
|
"learning_rate": 0.0002367691364699592, |
|
"loss": 1.9187, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.022831964750650913, |
|
"grad_norm": 1.362595558166504, |
|
"learning_rate": 0.00022796687710909964, |
|
"loss": 2.0923, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.02323252553575005, |
|
"grad_norm": 1.3138153553009033, |
|
"learning_rate": 0.00021919206827571036, |
|
"loss": 2.0055, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.02363308632084919, |
|
"grad_norm": 1.1482343673706055, |
|
"learning_rate": 0.00021045564228892402, |
|
"loss": 1.9882, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.024033647105948328, |
|
"grad_norm": 1.2140475511550903, |
|
"learning_rate": 0.00020176848364762578, |
|
"loss": 1.725, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.024434207891047468, |
|
"grad_norm": 1.5149836540222168, |
|
"learning_rate": 0.00019314141546972343, |
|
"loss": 2.1587, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.024834768676146604, |
|
"grad_norm": 1.5307202339172363, |
|
"learning_rate": 0.00018458518600788986, |
|
"loss": 2.1868, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.025235329461245744, |
|
"grad_norm": 1.1949517726898193, |
|
"learning_rate": 0.00017611045525857898, |
|
"loss": 2.0899, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.025635890246344883, |
|
"grad_norm": 1.2019050121307373, |
|
"learning_rate": 0.0001677277816809975, |
|
"loss": 1.7959, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.026036451031444023, |
|
"grad_norm": 1.137641429901123, |
|
"learning_rate": 0.00015944760904257942, |
|
"loss": 1.9071, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.02643701181654316, |
|
"grad_norm": 1.4122196435928345, |
|
"learning_rate": 0.0001512802534073522, |
|
"loss": 1.9875, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0268375726016423, |
|
"grad_norm": 1.3938771486282349, |
|
"learning_rate": 0.00014323589028340596, |
|
"loss": 1.9302, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.02723813338674144, |
|
"grad_norm": 1.1219323873519897, |
|
"learning_rate": 0.00013532454194547733, |
|
"loss": 1.9583, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.02763869417184058, |
|
"grad_norm": 1.221145749092102, |
|
"learning_rate": 0.00012755606494844294, |
|
"loss": 2.1028, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.028039254956939715, |
|
"grad_norm": 1.6395137310028076, |
|
"learning_rate": 0.00011994013784727947, |
|
"loss": 1.955, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.028439815742038854, |
|
"grad_norm": 1.0482176542282104, |
|
"learning_rate": 0.00011248624913878966, |
|
"loss": 2.112, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.028840376527137994, |
|
"grad_norm": 1.2491412162780762, |
|
"learning_rate": 0.0001052036854401166, |
|
"loss": 2.2217, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.029240937312237134, |
|
"grad_norm": 1.3600395917892456, |
|
"learning_rate": 9.810151991877531e-05, |
|
"loss": 2.2656, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.02964149809733627, |
|
"grad_norm": 1.2350081205368042, |
|
"learning_rate": 9.118860098861537e-05, |
|
"loss": 2.0685, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.03004205888243541, |
|
"grad_norm": 1.2208542823791504, |
|
"learning_rate": 8.44735412857999e-05, |
|
"loss": 1.539, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03044261966753455, |
|
"grad_norm": 1.370618462562561, |
|
"learning_rate": 7.79647069385328e-05, |
|
"loss": 2.0545, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.03084318045263369, |
|
"grad_norm": 1.1490741968154907, |
|
"learning_rate": 7.167020714390501e-05, |
|
"loss": 2.0587, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.031243741237732825, |
|
"grad_norm": 1.220718502998352, |
|
"learning_rate": 6.559788406484446e-05, |
|
"loss": 1.8599, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.031644302022831965, |
|
"grad_norm": 1.64824378490448, |
|
"learning_rate": 5.975530305975807e-05, |
|
"loss": 2.3586, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.032044862807931104, |
|
"grad_norm": 1.495094656944275, |
|
"learning_rate": 5.414974325703686e-05, |
|
"loss": 2.1085, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.032445423593030244, |
|
"grad_norm": 1.1038875579833984, |
|
"learning_rate": 4.8788188486168616e-05, |
|
"loss": 2.1465, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.032845984378129384, |
|
"grad_norm": 1.450072169303894, |
|
"learning_rate": 4.367731857675569e-05, |
|
"loss": 1.9168, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.03324654516322852, |
|
"grad_norm": 1.2426930665969849, |
|
"learning_rate": 3.882350103627952e-05, |
|
"loss": 2.0372, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.033647105948327656, |
|
"grad_norm": 1.189127802848816, |
|
"learning_rate": 3.423278311697897e-05, |
|
"loss": 1.6364, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.034047666733426796, |
|
"grad_norm": 1.3084362745285034, |
|
"learning_rate": 2.9910884281727225e-05, |
|
"loss": 2.0304, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.034448227518525935, |
|
"grad_norm": 0.7917243838310242, |
|
"learning_rate": 2.586318907829291e-05, |
|
"loss": 2.3032, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.034848788303625075, |
|
"grad_norm": 1.2357211112976074, |
|
"learning_rate": 2.209474043086457e-05, |
|
"loss": 1.8531, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.035249349088724215, |
|
"grad_norm": 1.5522675514221191, |
|
"learning_rate": 1.861023335719475e-05, |
|
"loss": 2.0952, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.035649909873823354, |
|
"grad_norm": 1.166305422782898, |
|
"learning_rate": 1.5414009119192633e-05, |
|
"loss": 1.7682, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.036050470658922494, |
|
"grad_norm": 1.2451746463775635, |
|
"learning_rate": 1.25100498142523e-05, |
|
"loss": 1.9993, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03645103144402163, |
|
"grad_norm": 1.251763105392456, |
|
"learning_rate": 9.901973414055187e-06, |
|
"loss": 1.9152, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.036851592229120766, |
|
"grad_norm": 1.1293752193450928, |
|
"learning_rate": 7.593029257027956e-06, |
|
"loss": 2.0776, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.037252153014219906, |
|
"grad_norm": 1.1258478164672852, |
|
"learning_rate": 5.5860940000714015e-06, |
|
"loss": 2.1167, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.037652713799319046, |
|
"grad_norm": 1.185185432434082, |
|
"learning_rate": 3.8836680346041594e-06, |
|
"loss": 1.8718, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.038053274584418185, |
|
"grad_norm": 1.2053008079528809, |
|
"learning_rate": 2.487872371386424e-06, |
|
"loss": 2.3429, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.038453835369517325, |
|
"grad_norm": 0.8643404245376587, |
|
"learning_rate": 1.4004459980045125e-06, |
|
"loss": 1.8221, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.038854396154616465, |
|
"grad_norm": 1.4989515542984009, |
|
"learning_rate": 6.22743712309054e-07, |
|
"loss": 1.8639, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.039254956939715605, |
|
"grad_norm": 1.2497055530548096, |
|
"learning_rate": 1.557344345054501e-07, |
|
"loss": 2.279, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.03965551772481474, |
|
"grad_norm": 1.5901930332183838, |
|
"learning_rate": 0.0, |
|
"loss": 2.0944, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 99, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 300, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5038480606887936.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|