|
{ |
|
"best_metric": 0.555241584777832, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 3.007518796992481, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.015037593984962405, |
|
"grad_norm": 0.11457784473896027, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6089, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.015037593984962405, |
|
"eval_loss": 0.8226256966590881, |
|
"eval_runtime": 3.38, |
|
"eval_samples_per_second": 33.136, |
|
"eval_steps_per_second": 8.284, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03007518796992481, |
|
"grad_norm": 0.1543322652578354, |
|
"learning_rate": 2e-05, |
|
"loss": 0.8686, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.045112781954887216, |
|
"grad_norm": 0.16240979731082916, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8141, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.06015037593984962, |
|
"grad_norm": 0.17936722934246063, |
|
"learning_rate": 4e-05, |
|
"loss": 0.7206, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.07518796992481203, |
|
"grad_norm": 0.1907089203596115, |
|
"learning_rate": 5e-05, |
|
"loss": 0.784, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09022556390977443, |
|
"grad_norm": 0.1974342316389084, |
|
"learning_rate": 6e-05, |
|
"loss": 0.75, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.10526315789473684, |
|
"grad_norm": 0.23462049663066864, |
|
"learning_rate": 7e-05, |
|
"loss": 0.8442, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.12030075187969924, |
|
"grad_norm": 0.2285780906677246, |
|
"learning_rate": 8e-05, |
|
"loss": 0.7375, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.13533834586466165, |
|
"grad_norm": 0.23215308785438538, |
|
"learning_rate": 9e-05, |
|
"loss": 0.7081, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.15037593984962405, |
|
"grad_norm": 0.25710222125053406, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6915, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16541353383458646, |
|
"grad_norm": 0.23554426431655884, |
|
"learning_rate": 9.999316524962345e-05, |
|
"loss": 0.6363, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.18045112781954886, |
|
"grad_norm": 0.26785606145858765, |
|
"learning_rate": 9.997266286704631e-05, |
|
"loss": 0.5488, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.19548872180451127, |
|
"grad_norm": 0.300786554813385, |
|
"learning_rate": 9.993849845741524e-05, |
|
"loss": 0.6614, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.21052631578947367, |
|
"grad_norm": 0.34388741850852966, |
|
"learning_rate": 9.989068136093873e-05, |
|
"loss": 0.6807, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.22556390977443608, |
|
"grad_norm": 0.43668121099472046, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 0.6145, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.24060150375939848, |
|
"grad_norm": 0.674685001373291, |
|
"learning_rate": 9.975414512725057e-05, |
|
"loss": 0.7951, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.2556390977443609, |
|
"grad_norm": 0.3028257191181183, |
|
"learning_rate": 9.966546331768191e-05, |
|
"loss": 0.7573, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.2706766917293233, |
|
"grad_norm": 0.284080445766449, |
|
"learning_rate": 9.956320346634876e-05, |
|
"loss": 0.7908, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 0.26206010580062866, |
|
"learning_rate": 9.944739353007344e-05, |
|
"loss": 0.7506, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.3007518796992481, |
|
"grad_norm": 0.22683000564575195, |
|
"learning_rate": 9.931806517013612e-05, |
|
"loss": 0.6703, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3157894736842105, |
|
"grad_norm": 0.20807921886444092, |
|
"learning_rate": 9.917525374361912e-05, |
|
"loss": 0.6345, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.3308270676691729, |
|
"grad_norm": 0.2084735780954361, |
|
"learning_rate": 9.901899829374047e-05, |
|
"loss": 0.604, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.3458646616541353, |
|
"grad_norm": 0.20663465559482574, |
|
"learning_rate": 9.884934153917997e-05, |
|
"loss": 0.6068, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.3609022556390977, |
|
"grad_norm": 0.2244303822517395, |
|
"learning_rate": 9.86663298624003e-05, |
|
"loss": 0.6732, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.37593984962406013, |
|
"grad_norm": 0.2055593729019165, |
|
"learning_rate": 9.847001329696653e-05, |
|
"loss": 0.5607, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.39097744360902253, |
|
"grad_norm": 0.2082129716873169, |
|
"learning_rate": 9.826044551386744e-05, |
|
"loss": 0.5831, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.40601503759398494, |
|
"grad_norm": 0.1980503648519516, |
|
"learning_rate": 9.803768380684242e-05, |
|
"loss": 0.5023, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.42105263157894735, |
|
"grad_norm": 0.2241385579109192, |
|
"learning_rate": 9.780178907671789e-05, |
|
"loss": 0.5327, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.43609022556390975, |
|
"grad_norm": 0.23611101508140564, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 0.5463, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.45112781954887216, |
|
"grad_norm": 0.2858544588088989, |
|
"learning_rate": 9.729086208503174e-05, |
|
"loss": 0.632, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.46616541353383456, |
|
"grad_norm": 0.3600847125053406, |
|
"learning_rate": 9.701596950580806e-05, |
|
"loss": 0.5857, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.48120300751879697, |
|
"grad_norm": 0.634533703327179, |
|
"learning_rate": 9.672822322997305e-05, |
|
"loss": 0.7072, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.49624060150375937, |
|
"grad_norm": 0.11360874772071838, |
|
"learning_rate": 9.642770192448536e-05, |
|
"loss": 0.6829, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.5112781954887218, |
|
"grad_norm": 0.12521837651729584, |
|
"learning_rate": 9.611448774886924e-05, |
|
"loss": 0.6568, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 0.1402267962694168, |
|
"learning_rate": 9.578866633275288e-05, |
|
"loss": 0.6213, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5413533834586466, |
|
"grad_norm": 0.14699652791023254, |
|
"learning_rate": 9.545032675245813e-05, |
|
"loss": 0.6715, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.556390977443609, |
|
"grad_norm": 0.154379740357399, |
|
"learning_rate": 9.509956150664796e-05, |
|
"loss": 0.6248, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 0.18539367616176605, |
|
"learning_rate": 9.473646649103818e-05, |
|
"loss": 0.6054, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.5864661654135338, |
|
"grad_norm": 0.19980503618717194, |
|
"learning_rate": 9.43611409721806e-05, |
|
"loss": 0.5943, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.6015037593984962, |
|
"grad_norm": 0.20120373368263245, |
|
"learning_rate": 9.397368756032445e-05, |
|
"loss": 0.6088, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6165413533834586, |
|
"grad_norm": 0.20035739243030548, |
|
"learning_rate": 9.357421218136386e-05, |
|
"loss": 0.5723, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.631578947368421, |
|
"grad_norm": 0.20934194326400757, |
|
"learning_rate": 9.316282404787871e-05, |
|
"loss": 0.5627, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.6466165413533834, |
|
"grad_norm": 0.2100801318883896, |
|
"learning_rate": 9.273963562927695e-05, |
|
"loss": 0.5499, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.6616541353383458, |
|
"grad_norm": 0.2176390141248703, |
|
"learning_rate": 9.230476262104677e-05, |
|
"loss": 0.5382, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.6766917293233082, |
|
"grad_norm": 0.22677041590213776, |
|
"learning_rate": 9.185832391312644e-05, |
|
"loss": 0.5968, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6917293233082706, |
|
"grad_norm": 0.24823607504367828, |
|
"learning_rate": 9.140044155740101e-05, |
|
"loss": 0.4171, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.706766917293233, |
|
"grad_norm": 0.29296374320983887, |
|
"learning_rate": 9.093124073433463e-05, |
|
"loss": 0.5258, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.7218045112781954, |
|
"grad_norm": 0.4760672152042389, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 0.5269, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.7368421052631579, |
|
"grad_norm": 0.1132049560546875, |
|
"learning_rate": 8.995939984474624e-05, |
|
"loss": 0.6381, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.7518796992481203, |
|
"grad_norm": 0.12979930639266968, |
|
"learning_rate": 8.945702546981969e-05, |
|
"loss": 0.6685, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7518796992481203, |
|
"eval_loss": 0.5862260460853577, |
|
"eval_runtime": 3.3821, |
|
"eval_samples_per_second": 33.116, |
|
"eval_steps_per_second": 8.279, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7669172932330827, |
|
"grad_norm": 0.14075040817260742, |
|
"learning_rate": 8.894386393810563e-05, |
|
"loss": 0.6873, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.7819548872180451, |
|
"grad_norm": 0.1557828038930893, |
|
"learning_rate": 8.842005554284296e-05, |
|
"loss": 0.6984, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.7969924812030075, |
|
"grad_norm": 0.1478954255580902, |
|
"learning_rate": 8.788574348801675e-05, |
|
"loss": 0.5438, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.8120300751879699, |
|
"grad_norm": 0.15712638199329376, |
|
"learning_rate": 8.73410738492077e-05, |
|
"loss": 0.4774, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.8270676691729323, |
|
"grad_norm": 0.17720134556293488, |
|
"learning_rate": 8.678619553365659e-05, |
|
"loss": 0.507, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"grad_norm": 0.1685304194688797, |
|
"learning_rate": 8.622126023955446e-05, |
|
"loss": 0.4597, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 0.21216347813606262, |
|
"learning_rate": 8.564642241456986e-05, |
|
"loss": 0.6305, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.8721804511278195, |
|
"grad_norm": 0.21171170473098755, |
|
"learning_rate": 8.506183921362443e-05, |
|
"loss": 0.5459, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.8872180451127819, |
|
"grad_norm": 0.22091779112815857, |
|
"learning_rate": 8.44676704559283e-05, |
|
"loss": 0.5401, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.9022556390977443, |
|
"grad_norm": 0.23896510899066925, |
|
"learning_rate": 8.386407858128706e-05, |
|
"loss": 0.5378, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9172932330827067, |
|
"grad_norm": 0.24744990468025208, |
|
"learning_rate": 8.32512286056924e-05, |
|
"loss": 0.477, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.9323308270676691, |
|
"grad_norm": 0.31011462211608887, |
|
"learning_rate": 8.262928807620843e-05, |
|
"loss": 0.514, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.9473684210526315, |
|
"grad_norm": 0.38164022564888, |
|
"learning_rate": 8.199842702516583e-05, |
|
"loss": 0.4897, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.9624060150375939, |
|
"grad_norm": 0.7132401466369629, |
|
"learning_rate": 8.135881792367686e-05, |
|
"loss": 0.4955, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.9774436090225563, |
|
"grad_norm": 0.14969220757484436, |
|
"learning_rate": 8.07106356344834e-05, |
|
"loss": 0.5485, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.9924812030075187, |
|
"grad_norm": 0.22407089173793793, |
|
"learning_rate": 8.005405736415126e-05, |
|
"loss": 0.4904, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.0075187969924813, |
|
"grad_norm": 0.8124484419822693, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 1.0379, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.0225563909774436, |
|
"grad_norm": 0.12400493770837784, |
|
"learning_rate": 7.871643313414718e-05, |
|
"loss": 0.6444, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.037593984962406, |
|
"grad_norm": 0.14174407720565796, |
|
"learning_rate": 7.803575286758364e-05, |
|
"loss": 0.6231, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.0526315789473684, |
|
"grad_norm": 0.14666052162647247, |
|
"learning_rate": 7.734740790612136e-05, |
|
"loss": 0.5752, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0676691729323309, |
|
"grad_norm": 0.16379950940608978, |
|
"learning_rate": 7.66515864363997e-05, |
|
"loss": 0.6372, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.0827067669172932, |
|
"grad_norm": 0.17941914498806, |
|
"learning_rate": 7.594847868906076e-05, |
|
"loss": 0.6128, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.0977443609022557, |
|
"grad_norm": 0.17348399758338928, |
|
"learning_rate": 7.52382768867422e-05, |
|
"loss": 0.4972, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.112781954887218, |
|
"grad_norm": 0.19245783984661102, |
|
"learning_rate": 7.452117519152542e-05, |
|
"loss": 0.5319, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.1278195488721805, |
|
"grad_norm": 0.1955268234014511, |
|
"learning_rate": 7.379736965185368e-05, |
|
"loss": 0.4335, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 0.22424666583538055, |
|
"learning_rate": 7.30670581489344e-05, |
|
"loss": 0.5399, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.1578947368421053, |
|
"grad_norm": 0.20298990607261658, |
|
"learning_rate": 7.233044034264034e-05, |
|
"loss": 0.4551, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.1729323308270676, |
|
"grad_norm": 0.21570706367492676, |
|
"learning_rate": 7.158771761692464e-05, |
|
"loss": 0.4494, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.1879699248120301, |
|
"grad_norm": 0.242059126496315, |
|
"learning_rate": 7.083909302476453e-05, |
|
"loss": 0.4621, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.2030075187969924, |
|
"grad_norm": 0.22083941102027893, |
|
"learning_rate": 7.008477123264848e-05, |
|
"loss": 0.3466, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.218045112781955, |
|
"grad_norm": 0.29217585921287537, |
|
"learning_rate": 6.932495846462261e-05, |
|
"loss": 0.4276, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.2330827067669172, |
|
"grad_norm": 0.3911180794239044, |
|
"learning_rate": 6.855986244591104e-05, |
|
"loss": 0.3831, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.2481203007518797, |
|
"grad_norm": 0.2208462655544281, |
|
"learning_rate": 6.778969234612584e-05, |
|
"loss": 0.4069, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.263157894736842, |
|
"grad_norm": 0.14193277060985565, |
|
"learning_rate": 6.701465872208216e-05, |
|
"loss": 0.6454, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.2781954887218046, |
|
"grad_norm": 0.17254970967769623, |
|
"learning_rate": 6.623497346023418e-05, |
|
"loss": 0.6492, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.2932330827067668, |
|
"grad_norm": 0.17795664072036743, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 0.5842, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.3082706766917294, |
|
"grad_norm": 0.21064266562461853, |
|
"learning_rate": 6.466250186922325e-05, |
|
"loss": 0.6568, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.3233082706766917, |
|
"grad_norm": 0.21046878397464752, |
|
"learning_rate": 6.387014543809223e-05, |
|
"loss": 0.564, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.3383458646616542, |
|
"grad_norm": 0.2297947257757187, |
|
"learning_rate": 6.307399704769099e-05, |
|
"loss": 0.5719, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.3533834586466165, |
|
"grad_norm": 0.21308641135692596, |
|
"learning_rate": 6.227427435703997e-05, |
|
"loss": 0.4958, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.368421052631579, |
|
"grad_norm": 0.21259456872940063, |
|
"learning_rate": 6.147119600233758e-05, |
|
"loss": 0.4928, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.3834586466165413, |
|
"grad_norm": 0.23126700520515442, |
|
"learning_rate": 6.066498153718735e-05, |
|
"loss": 0.4984, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.3984962406015038, |
|
"grad_norm": 0.2473018318414688, |
|
"learning_rate": 5.985585137257401e-05, |
|
"loss": 0.502, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.413533834586466, |
|
"grad_norm": 0.22947290539741516, |
|
"learning_rate": 5.90440267166055e-05, |
|
"loss": 0.4036, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 0.2785811126232147, |
|
"learning_rate": 5.8229729514036705e-05, |
|
"loss": 0.4428, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.443609022556391, |
|
"grad_norm": 0.28426018357276917, |
|
"learning_rate": 5.74131823855921e-05, |
|
"loss": 0.3525, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.4586466165413534, |
|
"grad_norm": 0.3277798295021057, |
|
"learning_rate": 5.6594608567103456e-05, |
|
"loss": 0.4164, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.4736842105263157, |
|
"grad_norm": 0.5964440703392029, |
|
"learning_rate": 5.577423184847932e-05, |
|
"loss": 0.5171, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.4887218045112782, |
|
"grad_norm": 0.28781628608703613, |
|
"learning_rate": 5.495227651252315e-05, |
|
"loss": 0.455, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.5037593984962405, |
|
"grad_norm": 0.13746999204158783, |
|
"learning_rate": 5.4128967273616625e-05, |
|
"loss": 0.6356, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.5037593984962405, |
|
"eval_loss": 0.5638276934623718, |
|
"eval_runtime": 3.3731, |
|
"eval_samples_per_second": 33.204, |
|
"eval_steps_per_second": 8.301, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.518796992481203, |
|
"grad_norm": 0.16840910911560059, |
|
"learning_rate": 5.330452921628497e-05, |
|
"loss": 0.7206, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.5338345864661656, |
|
"grad_norm": 0.19142265617847443, |
|
"learning_rate": 5.247918773366112e-05, |
|
"loss": 0.6365, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.5488721804511278, |
|
"grad_norm": 0.19640135765075684, |
|
"learning_rate": 5.165316846586541e-05, |
|
"loss": 0.5281, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.5639097744360901, |
|
"grad_norm": 0.20392058789730072, |
|
"learning_rate": 5.0826697238317935e-05, |
|
"loss": 0.5404, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.5789473684210527, |
|
"grad_norm": 0.21644632518291473, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5099, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.5939849624060152, |
|
"grad_norm": 0.24281011521816254, |
|
"learning_rate": 4.917330276168208e-05, |
|
"loss": 0.5477, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.6090225563909775, |
|
"grad_norm": 0.24991512298583984, |
|
"learning_rate": 4.834683153413459e-05, |
|
"loss": 0.4954, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.6240601503759398, |
|
"grad_norm": 0.25631803274154663, |
|
"learning_rate": 4.7520812266338885e-05, |
|
"loss": 0.4676, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.6390977443609023, |
|
"grad_norm": 0.25062280893325806, |
|
"learning_rate": 4.669547078371504e-05, |
|
"loss": 0.4063, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.6541353383458648, |
|
"grad_norm": 0.26639077067375183, |
|
"learning_rate": 4.5871032726383386e-05, |
|
"loss": 0.4253, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.669172932330827, |
|
"grad_norm": 0.2973855137825012, |
|
"learning_rate": 4.504772348747687e-05, |
|
"loss": 0.5089, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.6842105263157894, |
|
"grad_norm": 0.2878214120864868, |
|
"learning_rate": 4.4225768151520694e-05, |
|
"loss": 0.3851, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.699248120300752, |
|
"grad_norm": 0.3631870746612549, |
|
"learning_rate": 4.3405391432896555e-05, |
|
"loss": 0.3705, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"grad_norm": 0.48780307173728943, |
|
"learning_rate": 4.2586817614407895e-05, |
|
"loss": 0.327, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.7293233082706767, |
|
"grad_norm": 0.319959819316864, |
|
"learning_rate": 4.17702704859633e-05, |
|
"loss": 0.4293, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.744360902255639, |
|
"grad_norm": 0.14660124480724335, |
|
"learning_rate": 4.095597328339452e-05, |
|
"loss": 0.628, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.7593984962406015, |
|
"grad_norm": 0.17272670567035675, |
|
"learning_rate": 4.0144148627425993e-05, |
|
"loss": 0.5909, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.774436090225564, |
|
"grad_norm": 0.18323040008544922, |
|
"learning_rate": 3.933501846281267e-05, |
|
"loss": 0.6245, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.7894736842105263, |
|
"grad_norm": 0.19948799908161163, |
|
"learning_rate": 3.852880399766243e-05, |
|
"loss": 0.5076, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.8045112781954886, |
|
"grad_norm": 0.21641798317432404, |
|
"learning_rate": 3.772572564296005e-05, |
|
"loss": 0.492, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.8195488721804511, |
|
"grad_norm": 0.21880875527858734, |
|
"learning_rate": 3.6926002952309016e-05, |
|
"loss": 0.4592, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.8345864661654137, |
|
"grad_norm": 0.24364233016967773, |
|
"learning_rate": 3.612985456190778e-05, |
|
"loss": 0.5003, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.849624060150376, |
|
"grad_norm": 0.259734570980072, |
|
"learning_rate": 3.533749813077677e-05, |
|
"loss": 0.511, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.8646616541353382, |
|
"grad_norm": 0.26867565512657166, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 0.4958, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.8796992481203008, |
|
"grad_norm": 0.2885945737361908, |
|
"learning_rate": 3.3765026539765834e-05, |
|
"loss": 0.4937, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.8947368421052633, |
|
"grad_norm": 0.2841680943965912, |
|
"learning_rate": 3.298534127791785e-05, |
|
"loss": 0.3929, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.9097744360902256, |
|
"grad_norm": 0.33479347825050354, |
|
"learning_rate": 3.221030765387417e-05, |
|
"loss": 0.4029, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.9248120300751879, |
|
"grad_norm": 0.3607374131679535, |
|
"learning_rate": 3.144013755408895e-05, |
|
"loss": 0.4149, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.9398496240601504, |
|
"grad_norm": 0.4023395776748657, |
|
"learning_rate": 3.0675041535377405e-05, |
|
"loss": 0.3412, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.954887218045113, |
|
"grad_norm": 0.5105811357498169, |
|
"learning_rate": 2.991522876735154e-05, |
|
"loss": 0.3745, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.9699248120300752, |
|
"grad_norm": 0.3421552777290344, |
|
"learning_rate": 2.916090697523549e-05, |
|
"loss": 0.5052, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.9849624060150375, |
|
"grad_norm": 0.20374922454357147, |
|
"learning_rate": 2.8412282383075363e-05, |
|
"loss": 0.4647, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.6035916209220886, |
|
"learning_rate": 2.766955965735968e-05, |
|
"loss": 0.4439, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 2.0150375939849625, |
|
"grad_norm": 0.1350356936454773, |
|
"learning_rate": 2.693294185106562e-05, |
|
"loss": 0.6368, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.030075187969925, |
|
"grad_norm": 0.15917502343654633, |
|
"learning_rate": 2.6202630348146324e-05, |
|
"loss": 0.6403, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.045112781954887, |
|
"grad_norm": 0.1806429624557495, |
|
"learning_rate": 2.547882480847461e-05, |
|
"loss": 0.6467, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.0601503759398496, |
|
"grad_norm": 0.19085431098937988, |
|
"learning_rate": 2.476172311325783e-05, |
|
"loss": 0.5759, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 2.075187969924812, |
|
"grad_norm": 0.19593051075935364, |
|
"learning_rate": 2.405152131093926e-05, |
|
"loss": 0.5562, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.090225563909774, |
|
"grad_norm": 0.20538921654224396, |
|
"learning_rate": 2.3348413563600325e-05, |
|
"loss": 0.4145, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 0.22212770581245422, |
|
"learning_rate": 2.2652592093878666e-05, |
|
"loss": 0.4293, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.1203007518796992, |
|
"grad_norm": 0.24110625684261322, |
|
"learning_rate": 2.196424713241637e-05, |
|
"loss": 0.49, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 2.1353383458646618, |
|
"grad_norm": 0.25020718574523926, |
|
"learning_rate": 2.128356686585282e-05, |
|
"loss": 0.4459, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.1503759398496243, |
|
"grad_norm": 0.2709634602069855, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 0.4458, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 2.1654135338345863, |
|
"grad_norm": 0.28327417373657227, |
|
"learning_rate": 1.9945942635848748e-05, |
|
"loss": 0.4517, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.180451127819549, |
|
"grad_norm": 0.2878214120864868, |
|
"learning_rate": 1.928936436551661e-05, |
|
"loss": 0.3378, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.1954887218045114, |
|
"grad_norm": 0.29141637682914734, |
|
"learning_rate": 1.8641182076323148e-05, |
|
"loss": 0.3382, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 2.2105263157894735, |
|
"grad_norm": 0.30836647748947144, |
|
"learning_rate": 1.800157297483417e-05, |
|
"loss": 0.2915, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.225563909774436, |
|
"grad_norm": 0.3543986976146698, |
|
"learning_rate": 1.7370711923791567e-05, |
|
"loss": 0.2575, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 2.2406015037593985, |
|
"grad_norm": 0.5044798851013184, |
|
"learning_rate": 1.6748771394307585e-05, |
|
"loss": 0.2565, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 2.255639097744361, |
|
"grad_norm": 0.14180827140808105, |
|
"learning_rate": 1.6135921418712956e-05, |
|
"loss": 0.5729, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.255639097744361, |
|
"eval_loss": 0.555241584777832, |
|
"eval_runtime": 3.3881, |
|
"eval_samples_per_second": 33.057, |
|
"eval_steps_per_second": 8.264, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.2706766917293235, |
|
"grad_norm": 0.162590891122818, |
|
"learning_rate": 1.553232954407171e-05, |
|
"loss": 0.6461, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 2.2857142857142856, |
|
"grad_norm": 0.17810818552970886, |
|
"learning_rate": 1.4938160786375572e-05, |
|
"loss": 0.5882, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 2.300751879699248, |
|
"grad_norm": 0.1994241625070572, |
|
"learning_rate": 1.435357758543015e-05, |
|
"loss": 0.5266, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.3157894736842106, |
|
"grad_norm": 0.20960594713687897, |
|
"learning_rate": 1.3778739760445552e-05, |
|
"loss": 0.5328, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 2.3308270676691727, |
|
"grad_norm": 0.23722250759601593, |
|
"learning_rate": 1.3213804466343421e-05, |
|
"loss": 0.4816, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.345864661654135, |
|
"grad_norm": 0.2501429319381714, |
|
"learning_rate": 1.2658926150792322e-05, |
|
"loss": 0.4603, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.3609022556390977, |
|
"grad_norm": 0.2598116397857666, |
|
"learning_rate": 1.2114256511983274e-05, |
|
"loss": 0.4422, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 2.3759398496240602, |
|
"grad_norm": 0.26544246077537537, |
|
"learning_rate": 1.157994445715706e-05, |
|
"loss": 0.4352, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 2.3909774436090228, |
|
"grad_norm": 0.28918272256851196, |
|
"learning_rate": 1.1056136061894384e-05, |
|
"loss": 0.4043, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 2.406015037593985, |
|
"grad_norm": 0.2864091694355011, |
|
"learning_rate": 1.0542974530180327e-05, |
|
"loss": 0.3689, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.4210526315789473, |
|
"grad_norm": 0.3314032256603241, |
|
"learning_rate": 1.0040600155253765e-05, |
|
"loss": 0.3901, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 2.43609022556391, |
|
"grad_norm": 0.2950114905834198, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 0.2565, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.451127819548872, |
|
"grad_norm": 0.36117634177207947, |
|
"learning_rate": 9.068759265665384e-06, |
|
"loss": 0.3079, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 2.4661654135338344, |
|
"grad_norm": 0.40990445017814636, |
|
"learning_rate": 8.599558442598998e-06, |
|
"loss": 0.3051, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 2.481203007518797, |
|
"grad_norm": 0.435947984457016, |
|
"learning_rate": 8.141676086873572e-06, |
|
"loss": 0.1591, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.4962406015037595, |
|
"grad_norm": 0.15773995220661163, |
|
"learning_rate": 7.695237378953223e-06, |
|
"loss": 0.6107, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 2.511278195488722, |
|
"grad_norm": 0.18130731582641602, |
|
"learning_rate": 7.260364370723044e-06, |
|
"loss": 0.6521, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 2.526315789473684, |
|
"grad_norm": 0.1928928941488266, |
|
"learning_rate": 6.837175952121306e-06, |
|
"loss": 0.584, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 2.5413533834586466, |
|
"grad_norm": 0.22154775261878967, |
|
"learning_rate": 6.425787818636131e-06, |
|
"loss": 0.536, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 2.556390977443609, |
|
"grad_norm": 0.24150554835796356, |
|
"learning_rate": 6.026312439675552e-06, |
|
"loss": 0.5197, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.571428571428571, |
|
"grad_norm": 0.2482544332742691, |
|
"learning_rate": 5.6388590278194096e-06, |
|
"loss": 0.4437, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 2.5864661654135337, |
|
"grad_norm": 0.2693168818950653, |
|
"learning_rate": 5.263533508961827e-06, |
|
"loss": 0.4924, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 2.601503759398496, |
|
"grad_norm": 0.26239049434661865, |
|
"learning_rate": 4.900438493352055e-06, |
|
"loss": 0.3955, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 2.6165413533834587, |
|
"grad_norm": 0.28949567675590515, |
|
"learning_rate": 4.549673247541875e-06, |
|
"loss": 0.4173, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 2.6315789473684212, |
|
"grad_norm": 0.2859938144683838, |
|
"learning_rate": 4.2113336672471245e-06, |
|
"loss": 0.388, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.6466165413533833, |
|
"grad_norm": 0.2940588891506195, |
|
"learning_rate": 3.885512251130763e-06, |
|
"loss": 0.3952, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 2.661654135338346, |
|
"grad_norm": 0.32735714316368103, |
|
"learning_rate": 3.5722980755146517e-06, |
|
"loss": 0.3912, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 2.6766917293233083, |
|
"grad_norm": 0.32457244396209717, |
|
"learning_rate": 3.271776770026963e-06, |
|
"loss": 0.318, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 2.6917293233082704, |
|
"grad_norm": 0.3507591485977173, |
|
"learning_rate": 2.9840304941919415e-06, |
|
"loss": 0.3252, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 2.706766917293233, |
|
"grad_norm": 0.41136404871940613, |
|
"learning_rate": 2.7091379149682685e-06, |
|
"loss": 0.3047, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.7218045112781954, |
|
"grad_norm": 0.4661613702774048, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 0.1978, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 2.736842105263158, |
|
"grad_norm": 0.14506597816944122, |
|
"learning_rate": 2.1982109232821178e-06, |
|
"loss": 0.5766, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 2.7518796992481205, |
|
"grad_norm": 0.1723233461380005, |
|
"learning_rate": 1.962316193157593e-06, |
|
"loss": 0.612, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 2.7669172932330826, |
|
"grad_norm": 0.18912146985530853, |
|
"learning_rate": 1.7395544861325718e-06, |
|
"loss": 0.55, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 2.781954887218045, |
|
"grad_norm": 0.2082255482673645, |
|
"learning_rate": 1.5299867030334814e-06, |
|
"loss": 0.6049, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.7969924812030076, |
|
"grad_norm": 0.21885396540164948, |
|
"learning_rate": 1.333670137599713e-06, |
|
"loss": 0.5299, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 2.8120300751879697, |
|
"grad_norm": 0.23263628780841827, |
|
"learning_rate": 1.1506584608200367e-06, |
|
"loss": 0.5243, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 2.827067669172932, |
|
"grad_norm": 0.24706439673900604, |
|
"learning_rate": 9.810017062595322e-07, |
|
"loss": 0.4521, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 2.8421052631578947, |
|
"grad_norm": 0.26898086071014404, |
|
"learning_rate": 8.247462563808817e-07, |
|
"loss": 0.4638, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 0.277619332075119, |
|
"learning_rate": 6.819348298638839e-07, |
|
"loss": 0.4816, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.8721804511278197, |
|
"grad_norm": 0.28311529755592346, |
|
"learning_rate": 5.526064699265753e-07, |
|
"loss": 0.4499, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 2.887218045112782, |
|
"grad_norm": 0.2803266644477844, |
|
"learning_rate": 4.367965336512403e-07, |
|
"loss": 0.3319, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.9022556390977443, |
|
"grad_norm": 0.32511386275291443, |
|
"learning_rate": 3.3453668231809286e-07, |
|
"loss": 0.4078, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 2.917293233082707, |
|
"grad_norm": 0.35642704367637634, |
|
"learning_rate": 2.458548727494292e-07, |
|
"loss": 0.3913, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 2.932330827067669, |
|
"grad_norm": 0.3581446707248688, |
|
"learning_rate": 1.7077534966650766e-07, |
|
"loss": 0.3243, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.9473684210526314, |
|
"grad_norm": 0.4329220950603485, |
|
"learning_rate": 1.0931863906127327e-07, |
|
"loss": 0.277, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 2.962406015037594, |
|
"grad_norm": 0.45859774947166443, |
|
"learning_rate": 6.150154258476315e-08, |
|
"loss": 0.1649, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 2.9774436090225564, |
|
"grad_norm": 0.19787120819091797, |
|
"learning_rate": 2.7337132953697554e-08, |
|
"loss": 0.5944, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 2.992481203007519, |
|
"grad_norm": 0.2929736077785492, |
|
"learning_rate": 6.834750376549792e-09, |
|
"loss": 0.3918, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 3.007518796992481, |
|
"grad_norm": 0.6807894706726074, |
|
"learning_rate": 0.0, |
|
"loss": 0.4638, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.007518796992481, |
|
"eval_loss": 0.5553240776062012, |
|
"eval_runtime": 3.3823, |
|
"eval_samples_per_second": 33.113, |
|
"eval_steps_per_second": 8.278, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 1 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.542925435928576e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|