|
{ |
|
"best_metric": 0.032022152096033096, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 3.004424778761062, |
|
"eval_steps": 50, |
|
"global_step": 169, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.017699115044247787, |
|
"grad_norm": 1.0721659660339355, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9325, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.017699115044247787, |
|
"eval_loss": 1.3935922384262085, |
|
"eval_runtime": 6.5562, |
|
"eval_samples_per_second": 14.49, |
|
"eval_steps_per_second": 3.661, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.035398230088495575, |
|
"grad_norm": 1.2187706232070923, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.05309734513274336, |
|
"grad_norm": 1.1939740180969238, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0582, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.07079646017699115, |
|
"grad_norm": 1.1176855564117432, |
|
"learning_rate": 4e-05, |
|
"loss": 1.0807, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.08849557522123894, |
|
"grad_norm": 1.3982747793197632, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2339, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.10619469026548672, |
|
"grad_norm": 1.3325220346450806, |
|
"learning_rate": 6e-05, |
|
"loss": 1.1153, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.12389380530973451, |
|
"grad_norm": 1.265844702720642, |
|
"learning_rate": 7e-05, |
|
"loss": 0.958, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1415929203539823, |
|
"grad_norm": 1.1271923780441284, |
|
"learning_rate": 8e-05, |
|
"loss": 0.7678, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.1592920353982301, |
|
"grad_norm": 1.0350992679595947, |
|
"learning_rate": 9e-05, |
|
"loss": 0.586, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.17699115044247787, |
|
"grad_norm": 1.1207027435302734, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5873, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.19469026548672566, |
|
"grad_norm": 1.0533980131149292, |
|
"learning_rate": 9.999024041442456e-05, |
|
"loss": 0.4849, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.21238938053097345, |
|
"grad_norm": 1.0255162715911865, |
|
"learning_rate": 9.99609654676786e-05, |
|
"loss": 0.4021, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.23008849557522124, |
|
"grad_norm": 1.1924771070480347, |
|
"learning_rate": 9.991218658821608e-05, |
|
"loss": 0.4131, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.24778761061946902, |
|
"grad_norm": 1.1136771440505981, |
|
"learning_rate": 9.984392281850293e-05, |
|
"loss": 0.2954, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.26548672566371684, |
|
"grad_norm": 0.9027830958366394, |
|
"learning_rate": 9.97562008075832e-05, |
|
"loss": 0.2755, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2831858407079646, |
|
"grad_norm": 0.6252666711807251, |
|
"learning_rate": 9.964905480067586e-05, |
|
"loss": 0.1862, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.3008849557522124, |
|
"grad_norm": 0.5391494631767273, |
|
"learning_rate": 9.952252662580579e-05, |
|
"loss": 0.1796, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.3185840707964602, |
|
"grad_norm": 0.5780326724052429, |
|
"learning_rate": 9.937666567747501e-05, |
|
"loss": 0.1702, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.336283185840708, |
|
"grad_norm": 0.579827070236206, |
|
"learning_rate": 9.921152889737984e-05, |
|
"loss": 0.1355, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.35398230088495575, |
|
"grad_norm": 0.5776816010475159, |
|
"learning_rate": 9.902718075218176e-05, |
|
"loss": 0.1651, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.37168141592920356, |
|
"grad_norm": 0.5652853846549988, |
|
"learning_rate": 9.882369320834069e-05, |
|
"loss": 0.1378, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.3893805309734513, |
|
"grad_norm": 0.5807967782020569, |
|
"learning_rate": 9.860114570402054e-05, |
|
"loss": 0.1444, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.40707964601769914, |
|
"grad_norm": 0.46384990215301514, |
|
"learning_rate": 9.835962511807786e-05, |
|
"loss": 0.1163, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.4247787610619469, |
|
"grad_norm": 0.5239526033401489, |
|
"learning_rate": 9.809922573614569e-05, |
|
"loss": 0.1363, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.4424778761061947, |
|
"grad_norm": 0.5285527110099792, |
|
"learning_rate": 9.782004921382612e-05, |
|
"loss": 0.1366, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.46017699115044247, |
|
"grad_norm": 0.6417925953865051, |
|
"learning_rate": 9.752220453700556e-05, |
|
"loss": 0.1262, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.4778761061946903, |
|
"grad_norm": 0.5849817395210266, |
|
"learning_rate": 9.720580797930845e-05, |
|
"loss": 0.1117, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.49557522123893805, |
|
"grad_norm": 0.6580327749252319, |
|
"learning_rate": 9.687098305670605e-05, |
|
"loss": 0.1371, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.5132743362831859, |
|
"grad_norm": 0.43546798825263977, |
|
"learning_rate": 9.651786047929773e-05, |
|
"loss": 0.1155, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.5309734513274337, |
|
"grad_norm": 0.42823654413223267, |
|
"learning_rate": 9.614657810028402e-05, |
|
"loss": 0.1186, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5486725663716814, |
|
"grad_norm": 0.39037245512008667, |
|
"learning_rate": 9.575728086215092e-05, |
|
"loss": 0.1127, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.5663716814159292, |
|
"grad_norm": 0.3906686305999756, |
|
"learning_rate": 9.535012074008687e-05, |
|
"loss": 0.0985, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.584070796460177, |
|
"grad_norm": 0.32296302914619446, |
|
"learning_rate": 9.492525668265399e-05, |
|
"loss": 0.0729, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.6017699115044248, |
|
"grad_norm": 0.3804223835468292, |
|
"learning_rate": 9.448285454973738e-05, |
|
"loss": 0.0887, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.6194690265486725, |
|
"grad_norm": 0.528093695640564, |
|
"learning_rate": 9.402308704779599e-05, |
|
"loss": 0.0873, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.6371681415929203, |
|
"grad_norm": 0.43243497610092163, |
|
"learning_rate": 9.354613366244108e-05, |
|
"loss": 0.1039, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.6548672566371682, |
|
"grad_norm": 0.33139994740486145, |
|
"learning_rate": 9.305218058836778e-05, |
|
"loss": 0.0861, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.672566371681416, |
|
"grad_norm": 0.5364376902580261, |
|
"learning_rate": 9.254142065666801e-05, |
|
"loss": 0.0946, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.6902654867256637, |
|
"grad_norm": 0.3789397180080414, |
|
"learning_rate": 9.201405325955221e-05, |
|
"loss": 0.0787, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.7079646017699115, |
|
"grad_norm": 0.445186585187912, |
|
"learning_rate": 9.14702842725101e-05, |
|
"loss": 0.0822, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.7256637168141593, |
|
"grad_norm": 0.3613705039024353, |
|
"learning_rate": 9.091032597394012e-05, |
|
"loss": 0.0774, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.7433628318584071, |
|
"grad_norm": 0.3099222779273987, |
|
"learning_rate": 9.033439696227965e-05, |
|
"loss": 0.0521, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.7610619469026548, |
|
"grad_norm": 0.27817392349243164, |
|
"learning_rate": 8.974272207066767e-05, |
|
"loss": 0.0867, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.7787610619469026, |
|
"grad_norm": 0.2684073746204376, |
|
"learning_rate": 8.913553227917367e-05, |
|
"loss": 0.061, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.7964601769911505, |
|
"grad_norm": 0.2929267883300781, |
|
"learning_rate": 8.851306462462688e-05, |
|
"loss": 0.0698, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.8141592920353983, |
|
"grad_norm": 0.3591836392879486, |
|
"learning_rate": 8.787556210808101e-05, |
|
"loss": 0.0653, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.831858407079646, |
|
"grad_norm": 0.36023128032684326, |
|
"learning_rate": 8.722327359995064e-05, |
|
"loss": 0.0733, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.8495575221238938, |
|
"grad_norm": 0.34157848358154297, |
|
"learning_rate": 8.655645374285637e-05, |
|
"loss": 0.0611, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.8672566371681416, |
|
"grad_norm": 0.3172074854373932, |
|
"learning_rate": 8.587536285221656e-05, |
|
"loss": 0.0689, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.8849557522123894, |
|
"grad_norm": 0.27993178367614746, |
|
"learning_rate": 8.518026681462448e-05, |
|
"loss": 0.0566, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8849557522123894, |
|
"eval_loss": 0.06235470622777939, |
|
"eval_runtime": 6.691, |
|
"eval_samples_per_second": 14.198, |
|
"eval_steps_per_second": 3.587, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9026548672566371, |
|
"grad_norm": 0.21425798535346985, |
|
"learning_rate": 8.44714369840506e-05, |
|
"loss": 0.0481, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.9203539823008849, |
|
"grad_norm": 0.2822091579437256, |
|
"learning_rate": 8.374915007591053e-05, |
|
"loss": 0.0539, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.9380530973451328, |
|
"grad_norm": 0.3757535517215729, |
|
"learning_rate": 8.301368805903988e-05, |
|
"loss": 0.0578, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.9557522123893806, |
|
"grad_norm": 0.36872968077659607, |
|
"learning_rate": 8.226533804561827e-05, |
|
"loss": 0.0614, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.9734513274336283, |
|
"grad_norm": 0.4218049943447113, |
|
"learning_rate": 8.150439217908556e-05, |
|
"loss": 0.0822, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.9911504424778761, |
|
"grad_norm": 0.5659615993499756, |
|
"learning_rate": 8.073114752009387e-05, |
|
"loss": 0.0642, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.0132743362831858, |
|
"grad_norm": 0.9227009415626526, |
|
"learning_rate": 7.994590593054001e-05, |
|
"loss": 0.127, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.0309734513274336, |
|
"grad_norm": 0.20931123197078705, |
|
"learning_rate": 7.91489739557236e-05, |
|
"loss": 0.0463, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.0486725663716814, |
|
"grad_norm": 0.25171875953674316, |
|
"learning_rate": 7.83406627046769e-05, |
|
"loss": 0.0586, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.0663716814159292, |
|
"grad_norm": 0.24994538724422455, |
|
"learning_rate": 7.752128772871292e-05, |
|
"loss": 0.0424, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.084070796460177, |
|
"grad_norm": 0.23162053525447845, |
|
"learning_rate": 7.669116889823955e-05, |
|
"loss": 0.0436, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.1017699115044248, |
|
"grad_norm": 0.2777061462402344, |
|
"learning_rate": 7.585063027788731e-05, |
|
"loss": 0.043, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.1194690265486726, |
|
"grad_norm": 0.2399117797613144, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.0475, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.1371681415929205, |
|
"grad_norm": 0.2516414225101471, |
|
"learning_rate": 7.413961013653726e-05, |
|
"loss": 0.0434, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.154867256637168, |
|
"grad_norm": 0.15977510809898376, |
|
"learning_rate": 7.326979656943906e-05, |
|
"loss": 0.0282, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.1725663716814159, |
|
"grad_norm": 0.23954959213733673, |
|
"learning_rate": 7.239089885950316e-05, |
|
"loss": 0.0324, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.1902654867256637, |
|
"grad_norm": 0.15419161319732666, |
|
"learning_rate": 7.150326011382604e-05, |
|
"loss": 0.0235, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.2079646017699115, |
|
"grad_norm": 0.2992177903652191, |
|
"learning_rate": 7.060722685185961e-05, |
|
"loss": 0.0317, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.2256637168141593, |
|
"grad_norm": 0.2721240222454071, |
|
"learning_rate": 6.970314887013584e-05, |
|
"loss": 0.0441, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.2433628318584071, |
|
"grad_norm": 0.19432015717029572, |
|
"learning_rate": 6.879137910571191e-05, |
|
"loss": 0.0235, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.261061946902655, |
|
"grad_norm": 0.2833891212940216, |
|
"learning_rate": 6.787227349838947e-05, |
|
"loss": 0.0408, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.2787610619469025, |
|
"grad_norm": 0.24525463581085205, |
|
"learning_rate": 6.694619085176159e-05, |
|
"loss": 0.0469, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.2964601769911503, |
|
"grad_norm": 0.2467905730009079, |
|
"learning_rate": 6.601349269314188e-05, |
|
"loss": 0.0446, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.3141592920353982, |
|
"grad_norm": 0.18387003242969513, |
|
"learning_rate": 6.507454313243015e-05, |
|
"loss": 0.0334, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.331858407079646, |
|
"grad_norm": 0.22559407353401184, |
|
"learning_rate": 6.412970871996995e-05, |
|
"loss": 0.0292, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.3495575221238938, |
|
"grad_norm": 0.18541377782821655, |
|
"learning_rate": 6.317935830345338e-05, |
|
"loss": 0.0351, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.3672566371681416, |
|
"grad_norm": 0.17538294196128845, |
|
"learning_rate": 6.222386288392913e-05, |
|
"loss": 0.0329, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.3849557522123894, |
|
"grad_norm": 0.21896906197071075, |
|
"learning_rate": 6.126359547096975e-05, |
|
"loss": 0.0313, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.4026548672566372, |
|
"grad_norm": 0.1702110767364502, |
|
"learning_rate": 6.029893093705492e-05, |
|
"loss": 0.0168, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.420353982300885, |
|
"grad_norm": 0.279254674911499, |
|
"learning_rate": 5.9330245871227454e-05, |
|
"loss": 0.0371, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.4380530973451329, |
|
"grad_norm": 0.20467723906040192, |
|
"learning_rate": 5.835791843207916e-05, |
|
"loss": 0.0251, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.4557522123893805, |
|
"grad_norm": 0.23105274140834808, |
|
"learning_rate": 5.738232820012407e-05, |
|
"loss": 0.0243, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.4734513274336283, |
|
"grad_norm": 0.2554505467414856, |
|
"learning_rate": 5.640385602961634e-05, |
|
"loss": 0.0226, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.491150442477876, |
|
"grad_norm": 0.3279307186603546, |
|
"learning_rate": 5.5422883899871284e-05, |
|
"loss": 0.0181, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.508849557522124, |
|
"grad_norm": 0.23609845340251923, |
|
"learning_rate": 5.4439794766146746e-05, |
|
"loss": 0.03, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.5265486725663717, |
|
"grad_norm": 0.19427117705345154, |
|
"learning_rate": 5.34549724101439e-05, |
|
"loss": 0.0283, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.5442477876106193, |
|
"grad_norm": 0.21057020127773285, |
|
"learning_rate": 5.246880129018516e-05, |
|
"loss": 0.0328, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.5619469026548671, |
|
"grad_norm": 0.157065749168396, |
|
"learning_rate": 5.148166639112799e-05, |
|
"loss": 0.0265, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.579646017699115, |
|
"grad_norm": 0.21837662160396576, |
|
"learning_rate": 5.049395307407329e-05, |
|
"loss": 0.0353, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.5973451327433628, |
|
"grad_norm": 0.2608608305454254, |
|
"learning_rate": 4.950604692592672e-05, |
|
"loss": 0.0445, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6150442477876106, |
|
"grad_norm": 0.29214149713516235, |
|
"learning_rate": 4.851833360887201e-05, |
|
"loss": 0.0319, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.6327433628318584, |
|
"grad_norm": 0.3026266396045685, |
|
"learning_rate": 4.7531198709814854e-05, |
|
"loss": 0.0387, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.6504424778761062, |
|
"grad_norm": 0.1753360629081726, |
|
"learning_rate": 4.654502758985611e-05, |
|
"loss": 0.0178, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.668141592920354, |
|
"grad_norm": 0.29309213161468506, |
|
"learning_rate": 4.5560205233853266e-05, |
|
"loss": 0.0279, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.6858407079646018, |
|
"grad_norm": 0.1859235018491745, |
|
"learning_rate": 4.4577116100128735e-05, |
|
"loss": 0.0225, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.7035398230088497, |
|
"grad_norm": 0.21251869201660156, |
|
"learning_rate": 4.3596143970383664e-05, |
|
"loss": 0.028, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.7212389380530975, |
|
"grad_norm": 0.2775147259235382, |
|
"learning_rate": 4.2617671799875944e-05, |
|
"loss": 0.0323, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.7389380530973453, |
|
"grad_norm": 0.3155283033847809, |
|
"learning_rate": 4.1642081567920846e-05, |
|
"loss": 0.0194, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.7566371681415929, |
|
"grad_norm": 0.13303042948246002, |
|
"learning_rate": 4.066975412877255e-05, |
|
"loss": 0.0211, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.7743362831858407, |
|
"grad_norm": 0.19871264696121216, |
|
"learning_rate": 3.970106906294509e-05, |
|
"loss": 0.0302, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.7743362831858407, |
|
"eval_loss": 0.0379580594599247, |
|
"eval_runtime": 6.6861, |
|
"eval_samples_per_second": 14.209, |
|
"eval_steps_per_second": 3.59, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.7920353982300885, |
|
"grad_norm": 0.20991753041744232, |
|
"learning_rate": 3.873640452903026e-05, |
|
"loss": 0.0253, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.8097345132743363, |
|
"grad_norm": 0.3313141167163849, |
|
"learning_rate": 3.777613711607087e-05, |
|
"loss": 0.0289, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.827433628318584, |
|
"grad_norm": 0.19206620752811432, |
|
"learning_rate": 3.682064169654663e-05, |
|
"loss": 0.0265, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.8451327433628317, |
|
"grad_norm": 0.4013370871543884, |
|
"learning_rate": 3.587029128003006e-05, |
|
"loss": 0.0308, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.8628318584070795, |
|
"grad_norm": 0.18024751543998718, |
|
"learning_rate": 3.492545686756986e-05, |
|
"loss": 0.0162, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.8805309734513274, |
|
"grad_norm": 0.2305198758840561, |
|
"learning_rate": 3.3986507306858125e-05, |
|
"loss": 0.0284, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.8982300884955752, |
|
"grad_norm": 0.19050440192222595, |
|
"learning_rate": 3.3053809148238426e-05, |
|
"loss": 0.0227, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.915929203539823, |
|
"grad_norm": 0.15006108582019806, |
|
"learning_rate": 3.212772650161056e-05, |
|
"loss": 0.0191, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.9336283185840708, |
|
"grad_norm": 0.2283119261264801, |
|
"learning_rate": 3.12086208942881e-05, |
|
"loss": 0.0227, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.9513274336283186, |
|
"grad_norm": 0.2260059416294098, |
|
"learning_rate": 3.0296851129864168e-05, |
|
"loss": 0.0253, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.9690265486725664, |
|
"grad_norm": 0.23939450085163116, |
|
"learning_rate": 2.9392773148140408e-05, |
|
"loss": 0.0265, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.9867256637168142, |
|
"grad_norm": 0.1815921664237976, |
|
"learning_rate": 2.8496739886173995e-05, |
|
"loss": 0.0216, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.0088495575221237, |
|
"grad_norm": 0.24775980412960052, |
|
"learning_rate": 2.7609101140496863e-05, |
|
"loss": 0.0232, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 2.0265486725663715, |
|
"grad_norm": 0.12413739413022995, |
|
"learning_rate": 2.6730203430560947e-05, |
|
"loss": 0.0206, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.0442477876106193, |
|
"grad_norm": 0.1339850276708603, |
|
"learning_rate": 2.5860389863462765e-05, |
|
"loss": 0.0196, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.061946902654867, |
|
"grad_norm": 0.13854354619979858, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 0.0159, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.079646017699115, |
|
"grad_norm": 0.1154838278889656, |
|
"learning_rate": 2.414936972211272e-05, |
|
"loss": 0.0142, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 2.0973451327433628, |
|
"grad_norm": 0.10285928845405579, |
|
"learning_rate": 2.3308831101760486e-05, |
|
"loss": 0.01, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.1150442477876106, |
|
"grad_norm": 0.1806727647781372, |
|
"learning_rate": 2.247871227128709e-05, |
|
"loss": 0.0187, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 2.1327433628318584, |
|
"grad_norm": 0.19010406732559204, |
|
"learning_rate": 2.1659337295323118e-05, |
|
"loss": 0.0235, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.150442477876106, |
|
"grad_norm": 0.1291082799434662, |
|
"learning_rate": 2.0851026044276406e-05, |
|
"loss": 0.0144, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 2.168141592920354, |
|
"grad_norm": 0.16923344135284424, |
|
"learning_rate": 2.005409406946e-05, |
|
"loss": 0.0138, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.185840707964602, |
|
"grad_norm": 0.14110802114009857, |
|
"learning_rate": 1.9268852479906147e-05, |
|
"loss": 0.0119, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 2.2035398230088497, |
|
"grad_norm": 0.10906057059764862, |
|
"learning_rate": 1.849560782091445e-05, |
|
"loss": 0.009, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.2212389380530975, |
|
"grad_norm": 0.1177993193268776, |
|
"learning_rate": 1.7734661954381754e-05, |
|
"loss": 0.014, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.2389380530973453, |
|
"grad_norm": 0.10654302686452866, |
|
"learning_rate": 1.6986311940960147e-05, |
|
"loss": 0.0101, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.256637168141593, |
|
"grad_norm": 0.16150303184986115, |
|
"learning_rate": 1.6250849924089484e-05, |
|
"loss": 0.0168, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.274336283185841, |
|
"grad_norm": 0.12198394536972046, |
|
"learning_rate": 1.552856301594942e-05, |
|
"loss": 0.0136, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.2920353982300883, |
|
"grad_norm": 0.16058827936649323, |
|
"learning_rate": 1.4819733185375534e-05, |
|
"loss": 0.0196, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.309734513274336, |
|
"grad_norm": 0.19370707869529724, |
|
"learning_rate": 1.4124637147783432e-05, |
|
"loss": 0.0205, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.327433628318584, |
|
"grad_norm": 0.0963333398103714, |
|
"learning_rate": 1.3443546257143624e-05, |
|
"loss": 0.011, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 2.3451327433628317, |
|
"grad_norm": 0.1633400171995163, |
|
"learning_rate": 1.277672640004936e-05, |
|
"loss": 0.0208, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.3628318584070795, |
|
"grad_norm": 0.1224859431385994, |
|
"learning_rate": 1.2124437891918993e-05, |
|
"loss": 0.015, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 2.3805309734513274, |
|
"grad_norm": 0.10458586364984512, |
|
"learning_rate": 1.1486935375373126e-05, |
|
"loss": 0.0101, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.398230088495575, |
|
"grad_norm": 0.15571683645248413, |
|
"learning_rate": 1.0864467720826343e-05, |
|
"loss": 0.0162, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.415929203539823, |
|
"grad_norm": 0.08478286117315292, |
|
"learning_rate": 1.0257277929332332e-05, |
|
"loss": 0.0072, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.433628318584071, |
|
"grad_norm": 0.10365074872970581, |
|
"learning_rate": 9.66560303772035e-06, |
|
"loss": 0.0095, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 2.4513274336283186, |
|
"grad_norm": 0.07361367344856262, |
|
"learning_rate": 9.08967402605988e-06, |
|
"loss": 0.0076, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.4690265486725664, |
|
"grad_norm": 0.13091503083705902, |
|
"learning_rate": 8.529715727489912e-06, |
|
"loss": 0.0082, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 2.4867256637168142, |
|
"grad_norm": 0.11134269833564758, |
|
"learning_rate": 7.985946740447791e-06, |
|
"loss": 0.0079, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.504424778761062, |
|
"grad_norm": 0.18341496586799622, |
|
"learning_rate": 7.458579343331995e-06, |
|
"loss": 0.0129, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 2.52212389380531, |
|
"grad_norm": 0.11337222903966904, |
|
"learning_rate": 6.947819411632223e-06, |
|
"loss": 0.0173, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.5398230088495577, |
|
"grad_norm": 0.20701636373996735, |
|
"learning_rate": 6.45386633755894e-06, |
|
"loss": 0.0175, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 2.557522123893805, |
|
"grad_norm": 0.20868322253227234, |
|
"learning_rate": 5.976912952204017e-06, |
|
"loss": 0.0147, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.5752212389380533, |
|
"grad_norm": 0.10272178053855896, |
|
"learning_rate": 5.51714545026264e-06, |
|
"loss": 0.0128, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.5929203539823007, |
|
"grad_norm": 0.14340701699256897, |
|
"learning_rate": 5.074743317346009e-06, |
|
"loss": 0.0136, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 2.6106194690265485, |
|
"grad_norm": 0.12963014841079712, |
|
"learning_rate": 4.649879259913137e-06, |
|
"loss": 0.0093, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.6283185840707963, |
|
"grad_norm": 0.12100816518068314, |
|
"learning_rate": 4.242719137849077e-06, |
|
"loss": 0.0107, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 2.646017699115044, |
|
"grad_norm": 0.09687142819166183, |
|
"learning_rate": 3.853421899715992e-06, |
|
"loss": 0.0082, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 2.663716814159292, |
|
"grad_norm": 0.09663711488246918, |
|
"learning_rate": 3.4821395207022766e-06, |
|
"loss": 0.0081, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.663716814159292, |
|
"eval_loss": 0.032022152096033096, |
|
"eval_runtime": 6.6983, |
|
"eval_samples_per_second": 14.183, |
|
"eval_steps_per_second": 3.583, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.6814159292035398, |
|
"grad_norm": 0.12883880734443665, |
|
"learning_rate": 3.1290169432939553e-06, |
|
"loss": 0.0114, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 2.6991150442477876, |
|
"grad_norm": 0.16089491546154022, |
|
"learning_rate": 2.794192020691544e-06, |
|
"loss": 0.0165, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 2.7168141592920354, |
|
"grad_norm": 0.25050851702690125, |
|
"learning_rate": 2.4777954629944477e-06, |
|
"loss": 0.0182, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.734513274336283, |
|
"grad_norm": 0.09175170958042145, |
|
"learning_rate": 2.179950786173879e-06, |
|
"loss": 0.0057, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 2.752212389380531, |
|
"grad_norm": 0.1262611448764801, |
|
"learning_rate": 1.9007742638543102e-06, |
|
"loss": 0.0119, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.769911504424779, |
|
"grad_norm": 0.13375544548034668, |
|
"learning_rate": 1.6403748819221466e-06, |
|
"loss": 0.0125, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.7876106194690267, |
|
"grad_norm": 0.09264933317899704, |
|
"learning_rate": 1.3988542959794627e-06, |
|
"loss": 0.0109, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 2.8053097345132745, |
|
"grad_norm": 0.10276864469051361, |
|
"learning_rate": 1.1763067916593262e-06, |
|
"loss": 0.0125, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 2.823008849557522, |
|
"grad_norm": 0.08319110423326492, |
|
"learning_rate": 9.728192478182574e-07, |
|
"loss": 0.0082, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 2.84070796460177, |
|
"grad_norm": 0.168908029794693, |
|
"learning_rate": 7.884711026201585e-07, |
|
"loss": 0.0144, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.8584070796460175, |
|
"grad_norm": 0.1352635771036148, |
|
"learning_rate": 6.233343225249933e-07, |
|
"loss": 0.0158, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 2.8761061946902657, |
|
"grad_norm": 0.09505495429039001, |
|
"learning_rate": 4.774733741942206e-07, |
|
"loss": 0.0099, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.893805309734513, |
|
"grad_norm": 0.0929616317152977, |
|
"learning_rate": 3.5094519932415417e-07, |
|
"loss": 0.0078, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 2.911504424778761, |
|
"grad_norm": 0.10294952243566513, |
|
"learning_rate": 2.437991924167937e-07, |
|
"loss": 0.012, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 2.9292035398230087, |
|
"grad_norm": 0.125913605093956, |
|
"learning_rate": 1.560771814970885e-07, |
|
"loss": 0.0088, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.9469026548672566, |
|
"grad_norm": 0.14280952513217926, |
|
"learning_rate": 8.781341178393244e-08, |
|
"loss": 0.011, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 2.9646017699115044, |
|
"grad_norm": 0.1293206512928009, |
|
"learning_rate": 3.9034532321408076e-08, |
|
"loss": 0.0099, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 2.982300884955752, |
|
"grad_norm": 0.1486680805683136, |
|
"learning_rate": 9.75958557545842e-09, |
|
"loss": 0.0107, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 3.004424778761062, |
|
"grad_norm": 0.1905655860900879, |
|
"learning_rate": 0.0, |
|
"loss": 0.0132, |
|
"step": 169 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 169, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.248537858501509e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|