|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 200, |
|
"global_step": 922, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0021691973969631237, |
|
"grad_norm": 0.18814353593860253, |
|
"learning_rate": 1.0752688172043011e-06, |
|
"loss": 0.7804, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004338394793926247, |
|
"grad_norm": 0.14783898318898897, |
|
"learning_rate": 2.1505376344086023e-06, |
|
"loss": 0.6624, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006507592190889371, |
|
"grad_norm": 0.19503814642198758, |
|
"learning_rate": 3.225806451612903e-06, |
|
"loss": 0.7092, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.008676789587852495, |
|
"grad_norm": 0.1618902610161204, |
|
"learning_rate": 4.3010752688172045e-06, |
|
"loss": 0.7077, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.010845986984815618, |
|
"grad_norm": 0.12013865404820073, |
|
"learning_rate": 5.376344086021506e-06, |
|
"loss": 0.6043, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.013015184381778741, |
|
"grad_norm": 0.1374084610184008, |
|
"learning_rate": 6.451612903225806e-06, |
|
"loss": 0.6217, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.015184381778741865, |
|
"grad_norm": 0.14626420092970213, |
|
"learning_rate": 7.526881720430108e-06, |
|
"loss": 0.6813, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01735357917570499, |
|
"grad_norm": 0.13817440123812896, |
|
"learning_rate": 8.602150537634409e-06, |
|
"loss": 0.5804, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.019522776572668113, |
|
"grad_norm": 0.15287513050585858, |
|
"learning_rate": 9.67741935483871e-06, |
|
"loss": 0.623, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.021691973969631236, |
|
"grad_norm": 0.1598803439887024, |
|
"learning_rate": 1.0752688172043012e-05, |
|
"loss": 0.6743, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02386117136659436, |
|
"grad_norm": 0.19385413161643053, |
|
"learning_rate": 1.1827956989247313e-05, |
|
"loss": 0.7609, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.026030368763557483, |
|
"grad_norm": 0.22754758020045657, |
|
"learning_rate": 1.2903225806451613e-05, |
|
"loss": 0.6324, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.028199566160520606, |
|
"grad_norm": 0.17059715163003164, |
|
"learning_rate": 1.3978494623655914e-05, |
|
"loss": 0.6333, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03036876355748373, |
|
"grad_norm": 0.18873401138092946, |
|
"learning_rate": 1.5053763440860215e-05, |
|
"loss": 0.7387, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03253796095444685, |
|
"grad_norm": 0.18496670943100807, |
|
"learning_rate": 1.6129032258064517e-05, |
|
"loss": 0.6586, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03470715835140998, |
|
"grad_norm": 0.17005766008579865, |
|
"learning_rate": 1.7204301075268818e-05, |
|
"loss": 0.6018, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0368763557483731, |
|
"grad_norm": 0.19850157253361694, |
|
"learning_rate": 1.827956989247312e-05, |
|
"loss": 0.7417, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.039045553145336226, |
|
"grad_norm": 0.16224315084212368, |
|
"learning_rate": 1.935483870967742e-05, |
|
"loss": 0.4922, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04121475054229935, |
|
"grad_norm": 0.17598848375010048, |
|
"learning_rate": 2.0430107526881722e-05, |
|
"loss": 0.6445, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04338394793926247, |
|
"grad_norm": 0.18904672614562715, |
|
"learning_rate": 2.1505376344086024e-05, |
|
"loss": 0.6504, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0455531453362256, |
|
"grad_norm": 0.17265224349161137, |
|
"learning_rate": 2.258064516129032e-05, |
|
"loss": 0.6245, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.04772234273318872, |
|
"grad_norm": 0.2017634897229929, |
|
"learning_rate": 2.3655913978494626e-05, |
|
"loss": 0.6795, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.049891540130151846, |
|
"grad_norm": 0.18337107237337277, |
|
"learning_rate": 2.4731182795698928e-05, |
|
"loss": 0.5461, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.052060737527114966, |
|
"grad_norm": 0.1819879312384368, |
|
"learning_rate": 2.5806451612903226e-05, |
|
"loss": 0.6217, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05422993492407809, |
|
"grad_norm": 0.1796615077383316, |
|
"learning_rate": 2.6881720430107527e-05, |
|
"loss": 0.6185, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05639913232104121, |
|
"grad_norm": 0.154004978022556, |
|
"learning_rate": 2.7956989247311828e-05, |
|
"loss": 0.5814, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05856832971800434, |
|
"grad_norm": 0.15691846371505647, |
|
"learning_rate": 2.9032258064516133e-05, |
|
"loss": 0.5873, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06073752711496746, |
|
"grad_norm": 0.13885516449257118, |
|
"learning_rate": 3.010752688172043e-05, |
|
"loss": 0.5345, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.06290672451193059, |
|
"grad_norm": 0.15573868807503372, |
|
"learning_rate": 3.118279569892473e-05, |
|
"loss": 0.5894, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0650759219088937, |
|
"grad_norm": 0.14860751998326854, |
|
"learning_rate": 3.2258064516129034e-05, |
|
"loss": 0.5983, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06724511930585683, |
|
"grad_norm": 0.12573553934841997, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.5542, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06941431670281996, |
|
"grad_norm": 0.12949334468283782, |
|
"learning_rate": 3.4408602150537636e-05, |
|
"loss": 0.5369, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.07158351409978309, |
|
"grad_norm": 0.1541384187233925, |
|
"learning_rate": 3.548387096774194e-05, |
|
"loss": 0.6545, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0737527114967462, |
|
"grad_norm": 0.1342436339209465, |
|
"learning_rate": 3.655913978494624e-05, |
|
"loss": 0.5753, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07592190889370933, |
|
"grad_norm": 0.13687890281351986, |
|
"learning_rate": 3.763440860215054e-05, |
|
"loss": 0.5251, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07809110629067245, |
|
"grad_norm": 0.14224847495035575, |
|
"learning_rate": 3.870967741935484e-05, |
|
"loss": 0.5494, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08026030368763558, |
|
"grad_norm": 0.1368222255571023, |
|
"learning_rate": 3.978494623655914e-05, |
|
"loss": 0.5793, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0824295010845987, |
|
"grad_norm": 0.14032242415063437, |
|
"learning_rate": 4.0860215053763444e-05, |
|
"loss": 0.5738, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08459869848156182, |
|
"grad_norm": 0.1077744610907224, |
|
"learning_rate": 4.1935483870967746e-05, |
|
"loss": 0.4712, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.08676789587852494, |
|
"grad_norm": 0.1269827092811396, |
|
"learning_rate": 4.301075268817205e-05, |
|
"loss": 0.5071, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08893709327548807, |
|
"grad_norm": 0.15098582104711297, |
|
"learning_rate": 4.408602150537635e-05, |
|
"loss": 0.5202, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0911062906724512, |
|
"grad_norm": 0.13316194280918697, |
|
"learning_rate": 4.516129032258064e-05, |
|
"loss": 0.4469, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09327548806941431, |
|
"grad_norm": 0.13385436180609817, |
|
"learning_rate": 4.6236559139784944e-05, |
|
"loss": 0.4936, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.09544468546637744, |
|
"grad_norm": 0.1456999546987616, |
|
"learning_rate": 4.731182795698925e-05, |
|
"loss": 0.4559, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09761388286334056, |
|
"grad_norm": 0.13569281275379386, |
|
"learning_rate": 4.8387096774193554e-05, |
|
"loss": 0.4314, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09978308026030369, |
|
"grad_norm": 0.15212176484932607, |
|
"learning_rate": 4.9462365591397855e-05, |
|
"loss": 0.4876, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1019522776572668, |
|
"grad_norm": 0.12839200001749063, |
|
"learning_rate": 5.053763440860215e-05, |
|
"loss": 0.4748, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.10412147505422993, |
|
"grad_norm": 0.12862351761485963, |
|
"learning_rate": 5.161290322580645e-05, |
|
"loss": 0.4125, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.10629067245119306, |
|
"grad_norm": 0.12676342192056295, |
|
"learning_rate": 5.268817204301075e-05, |
|
"loss": 0.425, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.10845986984815618, |
|
"grad_norm": 0.12433212919621899, |
|
"learning_rate": 5.3763440860215054e-05, |
|
"loss": 0.4082, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11062906724511931, |
|
"grad_norm": 0.13762864881467574, |
|
"learning_rate": 5.4838709677419355e-05, |
|
"loss": 0.4342, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.11279826464208242, |
|
"grad_norm": 0.16375581387185012, |
|
"learning_rate": 5.5913978494623656e-05, |
|
"loss": 0.5276, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.11496746203904555, |
|
"grad_norm": 0.14903663671440864, |
|
"learning_rate": 5.6989247311827965e-05, |
|
"loss": 0.4966, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.11713665943600868, |
|
"grad_norm": 0.13085616750795798, |
|
"learning_rate": 5.8064516129032266e-05, |
|
"loss": 0.4177, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1193058568329718, |
|
"grad_norm": 0.1399410088881321, |
|
"learning_rate": 5.913978494623657e-05, |
|
"loss": 0.4759, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12147505422993492, |
|
"grad_norm": 0.17552603285175208, |
|
"learning_rate": 6.021505376344086e-05, |
|
"loss": 0.3852, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.12364425162689804, |
|
"grad_norm": 0.150563045551466, |
|
"learning_rate": 6.129032258064517e-05, |
|
"loss": 0.4456, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.12581344902386118, |
|
"grad_norm": 0.15103853247096755, |
|
"learning_rate": 6.236559139784946e-05, |
|
"loss": 0.486, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.1279826464208243, |
|
"grad_norm": 0.13897437214670214, |
|
"learning_rate": 6.344086021505376e-05, |
|
"loss": 0.393, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1301518438177874, |
|
"grad_norm": 0.13998124057548186, |
|
"learning_rate": 6.451612903225807e-05, |
|
"loss": 0.3703, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13232104121475055, |
|
"grad_norm": 0.14910024036690483, |
|
"learning_rate": 6.559139784946236e-05, |
|
"loss": 0.4163, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.13449023861171366, |
|
"grad_norm": 0.1426571111554546, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.3021, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.13665943600867678, |
|
"grad_norm": 0.2255435027702441, |
|
"learning_rate": 6.774193548387096e-05, |
|
"loss": 0.348, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.13882863340563992, |
|
"grad_norm": 0.15326407709554848, |
|
"learning_rate": 6.881720430107527e-05, |
|
"loss": 0.4446, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.14099783080260303, |
|
"grad_norm": 0.15096250596140368, |
|
"learning_rate": 6.989247311827958e-05, |
|
"loss": 0.4356, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14316702819956617, |
|
"grad_norm": 0.14931162214298302, |
|
"learning_rate": 7.096774193548388e-05, |
|
"loss": 0.3921, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.14533622559652928, |
|
"grad_norm": 0.13357955043603964, |
|
"learning_rate": 7.204301075268818e-05, |
|
"loss": 0.285, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1475054229934924, |
|
"grad_norm": 0.14467881960033757, |
|
"learning_rate": 7.311827956989248e-05, |
|
"loss": 0.3828, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.14967462039045554, |
|
"grad_norm": 0.13526505831717678, |
|
"learning_rate": 7.419354838709677e-05, |
|
"loss": 0.3523, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.15184381778741865, |
|
"grad_norm": 0.17267391484274627, |
|
"learning_rate": 7.526881720430108e-05, |
|
"loss": 0.4506, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1540130151843818, |
|
"grad_norm": 0.15979585190145928, |
|
"learning_rate": 7.634408602150538e-05, |
|
"loss": 0.3862, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1561822125813449, |
|
"grad_norm": 0.15434731006679933, |
|
"learning_rate": 7.741935483870968e-05, |
|
"loss": 0.3694, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.15835140997830802, |
|
"grad_norm": 0.14815314471149627, |
|
"learning_rate": 7.849462365591398e-05, |
|
"loss": 0.3644, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.16052060737527116, |
|
"grad_norm": 0.15387792339587666, |
|
"learning_rate": 7.956989247311829e-05, |
|
"loss": 0.4095, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.16268980477223427, |
|
"grad_norm": 0.15135351623798715, |
|
"learning_rate": 8.064516129032258e-05, |
|
"loss": 0.4128, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1648590021691974, |
|
"grad_norm": 0.13006929468252665, |
|
"learning_rate": 8.172043010752689e-05, |
|
"loss": 0.2786, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.16702819956616052, |
|
"grad_norm": 0.17289441324627974, |
|
"learning_rate": 8.27956989247312e-05, |
|
"loss": 0.4148, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.16919739696312364, |
|
"grad_norm": 0.15172742838256395, |
|
"learning_rate": 8.387096774193549e-05, |
|
"loss": 0.3758, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.17136659436008678, |
|
"grad_norm": 0.16805860911767084, |
|
"learning_rate": 8.494623655913979e-05, |
|
"loss": 0.3755, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.1735357917570499, |
|
"grad_norm": 0.13731854156086779, |
|
"learning_rate": 8.60215053763441e-05, |
|
"loss": 0.3031, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.175704989154013, |
|
"grad_norm": 0.15898088745954764, |
|
"learning_rate": 8.709677419354839e-05, |
|
"loss": 0.3785, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.17787418655097614, |
|
"grad_norm": 0.14869088820217116, |
|
"learning_rate": 8.81720430107527e-05, |
|
"loss": 0.3831, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.18004338394793926, |
|
"grad_norm": 0.15285942249523343, |
|
"learning_rate": 8.924731182795699e-05, |
|
"loss": 0.3302, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.1822125813449024, |
|
"grad_norm": 0.15925128432243588, |
|
"learning_rate": 9.032258064516129e-05, |
|
"loss": 0.389, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1843817787418655, |
|
"grad_norm": 0.1538607349870438, |
|
"learning_rate": 9.13978494623656e-05, |
|
"loss": 0.366, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.18655097613882862, |
|
"grad_norm": 0.16016055121309444, |
|
"learning_rate": 9.247311827956989e-05, |
|
"loss": 0.3554, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.18872017353579176, |
|
"grad_norm": 0.14283942143425307, |
|
"learning_rate": 9.35483870967742e-05, |
|
"loss": 0.3336, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.19088937093275488, |
|
"grad_norm": 0.16354104148599777, |
|
"learning_rate": 9.46236559139785e-05, |
|
"loss": 0.4439, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.19305856832971802, |
|
"grad_norm": 0.18296179182683667, |
|
"learning_rate": 9.56989247311828e-05, |
|
"loss": 0.4064, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.19522776572668113, |
|
"grad_norm": 0.15460229153045282, |
|
"learning_rate": 9.677419354838711e-05, |
|
"loss": 0.3407, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19739696312364424, |
|
"grad_norm": 0.15215604875393682, |
|
"learning_rate": 9.78494623655914e-05, |
|
"loss": 0.3553, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.19956616052060738, |
|
"grad_norm": 0.1597213335615803, |
|
"learning_rate": 9.892473118279571e-05, |
|
"loss": 0.3458, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2017353579175705, |
|
"grad_norm": 0.15328246596239517, |
|
"learning_rate": 0.0001, |
|
"loss": 0.342, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2039045553145336, |
|
"grad_norm": 0.15921412490863757, |
|
"learning_rate": 9.999964097046781e-05, |
|
"loss": 0.3843, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.20607375271149675, |
|
"grad_norm": 0.15390306970937798, |
|
"learning_rate": 9.999856388702731e-05, |
|
"loss": 0.3442, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.20824295010845986, |
|
"grad_norm": 0.15618794488353185, |
|
"learning_rate": 9.999676876514667e-05, |
|
"loss": 0.3396, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.210412147505423, |
|
"grad_norm": 0.13701461086257688, |
|
"learning_rate": 9.999425563060602e-05, |
|
"loss": 0.2914, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.21258134490238612, |
|
"grad_norm": 0.17727970752759084, |
|
"learning_rate": 9.999102451949688e-05, |
|
"loss": 0.3815, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.21475054229934923, |
|
"grad_norm": 0.17606273323635682, |
|
"learning_rate": 9.998707547822186e-05, |
|
"loss": 0.377, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.21691973969631237, |
|
"grad_norm": 0.16285779873289172, |
|
"learning_rate": 9.998240856349383e-05, |
|
"loss": 0.3221, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21908893709327548, |
|
"grad_norm": 0.17385851069798777, |
|
"learning_rate": 9.997702384233523e-05, |
|
"loss": 0.3539, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.22125813449023862, |
|
"grad_norm": 0.15796293985116644, |
|
"learning_rate": 9.9970921392077e-05, |
|
"loss": 0.3407, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.22342733188720174, |
|
"grad_norm": 0.16210590012135875, |
|
"learning_rate": 9.996410130035751e-05, |
|
"loss": 0.3433, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.22559652928416485, |
|
"grad_norm": 0.1815484690495641, |
|
"learning_rate": 9.995656366512138e-05, |
|
"loss": 0.3576, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.227765726681128, |
|
"grad_norm": 0.16370570821991282, |
|
"learning_rate": 9.994830859461793e-05, |
|
"loss": 0.3713, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2299349240780911, |
|
"grad_norm": 0.16719534192377292, |
|
"learning_rate": 9.993933620739974e-05, |
|
"loss": 0.3868, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.23210412147505424, |
|
"grad_norm": 0.19714493676306685, |
|
"learning_rate": 9.992964663232086e-05, |
|
"loss": 0.3378, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.23427331887201736, |
|
"grad_norm": 0.14799736868617666, |
|
"learning_rate": 9.991924000853505e-05, |
|
"loss": 0.3252, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.23644251626898047, |
|
"grad_norm": 0.15459646771807045, |
|
"learning_rate": 9.990811648549374e-05, |
|
"loss": 0.349, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.2386117136659436, |
|
"grad_norm": 0.15814773704274712, |
|
"learning_rate": 9.989627622294384e-05, |
|
"loss": 0.3459, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24078091106290672, |
|
"grad_norm": 0.1573452355191302, |
|
"learning_rate": 9.988371939092551e-05, |
|
"loss": 0.3436, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.24295010845986983, |
|
"grad_norm": 0.17857552638412025, |
|
"learning_rate": 9.987044616976969e-05, |
|
"loss": 0.3515, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.24511930585683298, |
|
"grad_norm": 0.17548675076361434, |
|
"learning_rate": 9.985645675009551e-05, |
|
"loss": 0.4118, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.2472885032537961, |
|
"grad_norm": 0.16436582516347584, |
|
"learning_rate": 9.984175133280758e-05, |
|
"loss": 0.3623, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.24945770065075923, |
|
"grad_norm": 0.14320940017640504, |
|
"learning_rate": 9.982633012909304e-05, |
|
"loss": 0.296, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.25162689804772237, |
|
"grad_norm": 0.16287542910761935, |
|
"learning_rate": 9.981019336041861e-05, |
|
"loss": 0.296, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.25379609544468545, |
|
"grad_norm": 0.16861388898681715, |
|
"learning_rate": 9.979334125852735e-05, |
|
"loss": 0.337, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.2559652928416486, |
|
"grad_norm": 0.15898794759349474, |
|
"learning_rate": 9.977577406543535e-05, |
|
"loss": 0.3329, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.25813449023861174, |
|
"grad_norm": 0.1734384500805255, |
|
"learning_rate": 9.975749203342823e-05, |
|
"loss": 0.3463, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.2603036876355748, |
|
"grad_norm": 0.1687288542407359, |
|
"learning_rate": 9.97384954250576e-05, |
|
"loss": 0.3716, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26247288503253796, |
|
"grad_norm": 0.15571310481512463, |
|
"learning_rate": 9.971878451313719e-05, |
|
"loss": 0.3174, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.2646420824295011, |
|
"grad_norm": 0.1519598125985657, |
|
"learning_rate": 9.969835958073897e-05, |
|
"loss": 0.2869, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.2668112798264642, |
|
"grad_norm": 0.1631777318786355, |
|
"learning_rate": 9.967722092118909e-05, |
|
"loss": 0.3269, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.26898047722342733, |
|
"grad_norm": 0.15802300841876601, |
|
"learning_rate": 9.965536883806368e-05, |
|
"loss": 0.3369, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.27114967462039047, |
|
"grad_norm": 0.15777870559434587, |
|
"learning_rate": 9.963280364518448e-05, |
|
"loss": 0.3434, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27331887201735355, |
|
"grad_norm": 0.14460302054911775, |
|
"learning_rate": 9.96095256666143e-05, |
|
"loss": 0.2965, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.2754880694143167, |
|
"grad_norm": 0.17074141051395292, |
|
"learning_rate": 9.958553523665242e-05, |
|
"loss": 0.3627, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.27765726681127983, |
|
"grad_norm": 0.16289437443944546, |
|
"learning_rate": 9.956083269982973e-05, |
|
"loss": 0.3269, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.279826464208243, |
|
"grad_norm": 0.19184991332619222, |
|
"learning_rate": 9.953541841090388e-05, |
|
"loss": 0.3729, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.28199566160520606, |
|
"grad_norm": 0.14220941041693624, |
|
"learning_rate": 9.950929273485404e-05, |
|
"loss": 0.3038, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2841648590021692, |
|
"grad_norm": 0.15132399774722008, |
|
"learning_rate": 9.948245604687581e-05, |
|
"loss": 0.3162, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.28633405639913234, |
|
"grad_norm": 0.1567696241360261, |
|
"learning_rate": 9.945490873237571e-05, |
|
"loss": 0.3041, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.2885032537960954, |
|
"grad_norm": 0.14672088173624798, |
|
"learning_rate": 9.942665118696575e-05, |
|
"loss": 0.2985, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.29067245119305857, |
|
"grad_norm": 0.1769806338719645, |
|
"learning_rate": 9.939768381645762e-05, |
|
"loss": 0.3648, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.2928416485900217, |
|
"grad_norm": 0.1600455826423261, |
|
"learning_rate": 9.9368007036857e-05, |
|
"loss": 0.3428, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2950108459869848, |
|
"grad_norm": 0.15482109589656579, |
|
"learning_rate": 9.933762127435751e-05, |
|
"loss": 0.3211, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.29718004338394793, |
|
"grad_norm": 0.1376974659275616, |
|
"learning_rate": 9.930652696533458e-05, |
|
"loss": 0.2655, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.2993492407809111, |
|
"grad_norm": 0.1604514505800568, |
|
"learning_rate": 9.927472455633921e-05, |
|
"loss": 0.3424, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.30151843817787416, |
|
"grad_norm": 0.14831623076631825, |
|
"learning_rate": 9.92422145040916e-05, |
|
"loss": 0.2929, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3036876355748373, |
|
"grad_norm": 0.14497199913662426, |
|
"learning_rate": 9.920899727547446e-05, |
|
"loss": 0.3063, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30585683297180044, |
|
"grad_norm": 0.1686161432311634, |
|
"learning_rate": 9.917507334752647e-05, |
|
"loss": 0.3695, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3080260303687636, |
|
"grad_norm": 0.1415843035366969, |
|
"learning_rate": 9.914044320743527e-05, |
|
"loss": 0.2989, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.31019522776572667, |
|
"grad_norm": 0.1537154117560618, |
|
"learning_rate": 9.91051073525306e-05, |
|
"loss": 0.323, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.3123644251626898, |
|
"grad_norm": 0.19421465105677843, |
|
"learning_rate": 9.90690662902771e-05, |
|
"loss": 0.3331, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.31453362255965295, |
|
"grad_norm": 0.15822998528244542, |
|
"learning_rate": 9.903232053826696e-05, |
|
"loss": 0.2934, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.31670281995661603, |
|
"grad_norm": 0.15358991548479717, |
|
"learning_rate": 9.89948706242126e-05, |
|
"loss": 0.3206, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3188720173535792, |
|
"grad_norm": 0.14959200755836038, |
|
"learning_rate": 9.895671708593903e-05, |
|
"loss": 0.3114, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.3210412147505423, |
|
"grad_norm": 0.16473377943898643, |
|
"learning_rate": 9.891786047137615e-05, |
|
"loss": 0.3312, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3232104121475054, |
|
"grad_norm": 0.1648174790366256, |
|
"learning_rate": 9.887830133855079e-05, |
|
"loss": 0.3597, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.32537960954446854, |
|
"grad_norm": 0.1887880278076816, |
|
"learning_rate": 9.883804025557888e-05, |
|
"loss": 0.3968, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3275488069414317, |
|
"grad_norm": 0.17273118800696421, |
|
"learning_rate": 9.879707780065712e-05, |
|
"loss": 0.3731, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.3297180043383948, |
|
"grad_norm": 0.16253972479744438, |
|
"learning_rate": 9.875541456205473e-05, |
|
"loss": 0.3408, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3318872017353579, |
|
"grad_norm": 0.1583694488182022, |
|
"learning_rate": 9.871305113810505e-05, |
|
"loss": 0.3121, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.33405639913232105, |
|
"grad_norm": 0.16161186391084162, |
|
"learning_rate": 9.86699881371969e-05, |
|
"loss": 0.3331, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3362255965292842, |
|
"grad_norm": 0.15558997399940616, |
|
"learning_rate": 9.862622617776582e-05, |
|
"loss": 0.3136, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3383947939262473, |
|
"grad_norm": 0.14717176320577952, |
|
"learning_rate": 9.858176588828526e-05, |
|
"loss": 0.3227, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3405639913232104, |
|
"grad_norm": 0.15604482540053785, |
|
"learning_rate": 9.85366079072575e-05, |
|
"loss": 0.3093, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.34273318872017355, |
|
"grad_norm": 0.164872478623103, |
|
"learning_rate": 9.849075288320446e-05, |
|
"loss": 0.3231, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.34490238611713664, |
|
"grad_norm": 0.18011689706998146, |
|
"learning_rate": 9.84442014746585e-05, |
|
"loss": 0.3571, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.3470715835140998, |
|
"grad_norm": 0.2281829326423908, |
|
"learning_rate": 9.839695435015279e-05, |
|
"loss": 0.4504, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3492407809110629, |
|
"grad_norm": 0.17155618609511838, |
|
"learning_rate": 9.83490121882119e-05, |
|
"loss": 0.3318, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.351409978308026, |
|
"grad_norm": 0.1451699212552566, |
|
"learning_rate": 9.830037567734187e-05, |
|
"loss": 0.27, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.35357917570498915, |
|
"grad_norm": 0.16172715854603054, |
|
"learning_rate": 9.825104551602047e-05, |
|
"loss": 0.2993, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.3557483731019523, |
|
"grad_norm": 0.16625581355250432, |
|
"learning_rate": 9.820102241268708e-05, |
|
"loss": 0.3447, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.3579175704989154, |
|
"grad_norm": 0.17497945522123493, |
|
"learning_rate": 9.815030708573256e-05, |
|
"loss": 0.3604, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3600867678958785, |
|
"grad_norm": 0.16549978577941174, |
|
"learning_rate": 9.809890026348891e-05, |
|
"loss": 0.3325, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.36225596529284165, |
|
"grad_norm": 0.15123204350737743, |
|
"learning_rate": 9.804680268421885e-05, |
|
"loss": 0.3047, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3644251626898048, |
|
"grad_norm": 0.1653117817038064, |
|
"learning_rate": 9.799401509610511e-05, |
|
"loss": 0.3336, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.3665943600867679, |
|
"grad_norm": 0.15941088150160534, |
|
"learning_rate": 9.794053825723983e-05, |
|
"loss": 0.3415, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.368763557483731, |
|
"grad_norm": 0.1569489292887316, |
|
"learning_rate": 9.788637293561363e-05, |
|
"loss": 0.3086, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.37093275488069416, |
|
"grad_norm": 0.15521077438298633, |
|
"learning_rate": 9.783151990910446e-05, |
|
"loss": 0.3129, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.37310195227765725, |
|
"grad_norm": 0.15853168180990132, |
|
"learning_rate": 9.777597996546661e-05, |
|
"loss": 0.3197, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3752711496746204, |
|
"grad_norm": 0.17021980724333097, |
|
"learning_rate": 9.771975390231927e-05, |
|
"loss": 0.3438, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.3774403470715835, |
|
"grad_norm": 0.1467658403488705, |
|
"learning_rate": 9.766284252713511e-05, |
|
"loss": 0.2868, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.3796095444685466, |
|
"grad_norm": 0.19325560102675848, |
|
"learning_rate": 9.760524665722874e-05, |
|
"loss": 0.4445, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.38177874186550975, |
|
"grad_norm": 0.1676049985924069, |
|
"learning_rate": 9.754696711974486e-05, |
|
"loss": 0.3596, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.3839479392624729, |
|
"grad_norm": 0.1517046172036335, |
|
"learning_rate": 9.748800475164648e-05, |
|
"loss": 0.2814, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.38611713665943603, |
|
"grad_norm": 0.1499206779996939, |
|
"learning_rate": 9.742836039970287e-05, |
|
"loss": 0.3078, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.3882863340563991, |
|
"grad_norm": 0.16835317016600657, |
|
"learning_rate": 9.736803492047736e-05, |
|
"loss": 0.346, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.39045553145336226, |
|
"grad_norm": 0.1735161690202731, |
|
"learning_rate": 9.730702918031511e-05, |
|
"loss": 0.3339, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3926247288503254, |
|
"grad_norm": 0.178265268230418, |
|
"learning_rate": 9.724534405533061e-05, |
|
"loss": 0.3581, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.3947939262472885, |
|
"grad_norm": 0.15928651246621914, |
|
"learning_rate": 9.718298043139513e-05, |
|
"loss": 0.3158, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.3969631236442516, |
|
"grad_norm": 0.15680437561632446, |
|
"learning_rate": 9.711993920412395e-05, |
|
"loss": 0.3612, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.39913232104121477, |
|
"grad_norm": 0.16147834287905172, |
|
"learning_rate": 9.70562212788636e-05, |
|
"loss": 0.3213, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.40130151843817785, |
|
"grad_norm": 0.16810458922445323, |
|
"learning_rate": 9.699182757067875e-05, |
|
"loss": 0.3715, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.403470715835141, |
|
"grad_norm": 0.16273652493527896, |
|
"learning_rate": 9.69267590043391e-05, |
|
"loss": 0.2901, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.40563991323210413, |
|
"grad_norm": 0.17226018154021203, |
|
"learning_rate": 9.686101651430612e-05, |
|
"loss": 0.3774, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.4078091106290672, |
|
"grad_norm": 0.18016919749286983, |
|
"learning_rate": 9.679460104471965e-05, |
|
"loss": 0.3602, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.40997830802603036, |
|
"grad_norm": 0.15507719486138638, |
|
"learning_rate": 9.672751354938429e-05, |
|
"loss": 0.3129, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.4121475054229935, |
|
"grad_norm": 0.1582451132361843, |
|
"learning_rate": 9.66597549917557e-05, |
|
"loss": 0.3183, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.41431670281995664, |
|
"grad_norm": 0.14849683648502013, |
|
"learning_rate": 9.659132634492684e-05, |
|
"loss": 0.3103, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.4164859002169197, |
|
"grad_norm": 0.24758668306536105, |
|
"learning_rate": 9.652222859161388e-05, |
|
"loss": 0.3157, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.41865509761388287, |
|
"grad_norm": 0.17938378050442594, |
|
"learning_rate": 9.645246272414221e-05, |
|
"loss": 0.3715, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.420824295010846, |
|
"grad_norm": 0.14292889306164755, |
|
"learning_rate": 9.63820297444321e-05, |
|
"loss": 0.281, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.4229934924078091, |
|
"grad_norm": 0.15121757619907555, |
|
"learning_rate": 9.63109306639843e-05, |
|
"loss": 0.285, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42516268980477223, |
|
"grad_norm": 0.15479911159606385, |
|
"learning_rate": 9.623916650386564e-05, |
|
"loss": 0.333, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.42733188720173537, |
|
"grad_norm": 0.19245481852848167, |
|
"learning_rate": 9.61667382946942e-05, |
|
"loss": 0.3803, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.42950108459869846, |
|
"grad_norm": 0.16251333268925855, |
|
"learning_rate": 9.609364707662467e-05, |
|
"loss": 0.3326, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.4316702819956616, |
|
"grad_norm": 0.15843432631003965, |
|
"learning_rate": 9.601989389933323e-05, |
|
"loss": 0.3151, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.43383947939262474, |
|
"grad_norm": 0.1804329476464318, |
|
"learning_rate": 9.594547982200266e-05, |
|
"loss": 0.3436, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43383947939262474, |
|
"eval_loss": 0.33424264192581177, |
|
"eval_runtime": 39.7662, |
|
"eval_samples_per_second": 0.478, |
|
"eval_steps_per_second": 0.126, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4360086767895879, |
|
"grad_norm": 0.19645387562310979, |
|
"learning_rate": 9.5870405913307e-05, |
|
"loss": 0.3159, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.43817787418655096, |
|
"grad_norm": 0.1644546677880722, |
|
"learning_rate": 9.579467325139627e-05, |
|
"loss": 0.3315, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.4403470715835141, |
|
"grad_norm": 0.15812376696679412, |
|
"learning_rate": 9.571828292388096e-05, |
|
"loss": 0.3348, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.44251626898047725, |
|
"grad_norm": 0.1675206237509223, |
|
"learning_rate": 9.56412360278164e-05, |
|
"loss": 0.3721, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.44468546637744033, |
|
"grad_norm": 0.14308548515478076, |
|
"learning_rate": 9.556353366968705e-05, |
|
"loss": 0.2651, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44685466377440347, |
|
"grad_norm": 0.15386997755578125, |
|
"learning_rate": 9.548517696539054e-05, |
|
"loss": 0.2701, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4490238611713666, |
|
"grad_norm": 0.14640343506659434, |
|
"learning_rate": 9.540616704022173e-05, |
|
"loss": 0.2496, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.4511930585683297, |
|
"grad_norm": 0.16521172350141197, |
|
"learning_rate": 9.532650502885646e-05, |
|
"loss": 0.3337, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.45336225596529284, |
|
"grad_norm": 0.1540772430165373, |
|
"learning_rate": 9.524619207533532e-05, |
|
"loss": 0.3043, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.455531453362256, |
|
"grad_norm": 0.16165589280753936, |
|
"learning_rate": 9.516522933304721e-05, |
|
"loss": 0.3276, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45770065075921906, |
|
"grad_norm": 0.1883035011589188, |
|
"learning_rate": 9.508361796471272e-05, |
|
"loss": 0.3584, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.4598698481561822, |
|
"grad_norm": 0.14934674223988761, |
|
"learning_rate": 9.500135914236755e-05, |
|
"loss": 0.2946, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.46203904555314534, |
|
"grad_norm": 0.16930578402110355, |
|
"learning_rate": 9.491845404734551e-05, |
|
"loss": 0.3423, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.4642082429501085, |
|
"grad_norm": 0.16818902583571363, |
|
"learning_rate": 9.483490387026174e-05, |
|
"loss": 0.3245, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.46637744034707157, |
|
"grad_norm": 0.1646282158556431, |
|
"learning_rate": 9.475070981099545e-05, |
|
"loss": 0.3377, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4685466377440347, |
|
"grad_norm": 0.15601143737978804, |
|
"learning_rate": 9.466587307867281e-05, |
|
"loss": 0.3181, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.47071583514099785, |
|
"grad_norm": 0.16754840015499342, |
|
"learning_rate": 9.458039489164951e-05, |
|
"loss": 0.3682, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.47288503253796094, |
|
"grad_norm": 0.1426551523167691, |
|
"learning_rate": 9.449427647749328e-05, |
|
"loss": 0.267, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.4750542299349241, |
|
"grad_norm": 0.16599632915307785, |
|
"learning_rate": 9.440751907296628e-05, |
|
"loss": 0.3181, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.4772234273318872, |
|
"grad_norm": 0.1620746064207198, |
|
"learning_rate": 9.432012392400733e-05, |
|
"loss": 0.3676, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4793926247288503, |
|
"grad_norm": 0.15120817805826722, |
|
"learning_rate": 9.423209228571398e-05, |
|
"loss": 0.3158, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.48156182212581344, |
|
"grad_norm": 0.14803905174097284, |
|
"learning_rate": 9.414342542232462e-05, |
|
"loss": 0.2836, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.4837310195227766, |
|
"grad_norm": 0.15220763579005372, |
|
"learning_rate": 9.405412460720006e-05, |
|
"loss": 0.2827, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.48590021691973967, |
|
"grad_norm": 0.15404544864962286, |
|
"learning_rate": 9.396419112280555e-05, |
|
"loss": 0.3025, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.4880694143167028, |
|
"grad_norm": 0.15073679537979376, |
|
"learning_rate": 9.387362626069216e-05, |
|
"loss": 0.3144, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.49023861171366595, |
|
"grad_norm": 0.1610803970826281, |
|
"learning_rate": 9.378243132147825e-05, |
|
"loss": 0.2977, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.4924078091106291, |
|
"grad_norm": 0.16316823817190756, |
|
"learning_rate": 9.369060761483095e-05, |
|
"loss": 0.3128, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.4945770065075922, |
|
"grad_norm": 0.163300375716653, |
|
"learning_rate": 9.359815645944709e-05, |
|
"loss": 0.3438, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.4967462039045553, |
|
"grad_norm": 0.14836269729887547, |
|
"learning_rate": 9.35050791830345e-05, |
|
"loss": 0.2525, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.49891540130151846, |
|
"grad_norm": 0.14711791523393897, |
|
"learning_rate": 9.341137712229282e-05, |
|
"loss": 0.2713, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5010845986984815, |
|
"grad_norm": 0.1563819160365463, |
|
"learning_rate": 9.331705162289433e-05, |
|
"loss": 0.2919, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.5032537960954447, |
|
"grad_norm": 0.1723872547155802, |
|
"learning_rate": 9.322210403946461e-05, |
|
"loss": 0.3121, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.5054229934924078, |
|
"grad_norm": 0.17043959065030484, |
|
"learning_rate": 9.312653573556316e-05, |
|
"loss": 0.3561, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.5075921908893709, |
|
"grad_norm": 0.14967506341505113, |
|
"learning_rate": 9.303034808366367e-05, |
|
"loss": 0.282, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.5097613882863341, |
|
"grad_norm": 0.3206125419158534, |
|
"learning_rate": 9.293354246513448e-05, |
|
"loss": 0.3589, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5119305856832972, |
|
"grad_norm": 0.15389716021946065, |
|
"learning_rate": 9.283612027021862e-05, |
|
"loss": 0.3168, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.5140997830802603, |
|
"grad_norm": 0.16629395409267134, |
|
"learning_rate": 9.273808289801388e-05, |
|
"loss": 0.2989, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.5162689804772235, |
|
"grad_norm": 0.17350303956302596, |
|
"learning_rate": 9.263943175645275e-05, |
|
"loss": 0.3515, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.5184381778741866, |
|
"grad_norm": 0.16804219683380595, |
|
"learning_rate": 9.254016826228215e-05, |
|
"loss": 0.3296, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.5206073752711496, |
|
"grad_norm": 0.15382387009648443, |
|
"learning_rate": 9.244029384104311e-05, |
|
"loss": 0.2929, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5227765726681128, |
|
"grad_norm": 0.1817152430149277, |
|
"learning_rate": 9.233980992705031e-05, |
|
"loss": 0.409, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.5249457700650759, |
|
"grad_norm": 0.15284178391247238, |
|
"learning_rate": 9.223871796337147e-05, |
|
"loss": 0.29, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.527114967462039, |
|
"grad_norm": 0.17168700031201556, |
|
"learning_rate": 9.213701940180657e-05, |
|
"loss": 0.3596, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.5292841648590022, |
|
"grad_norm": 0.16236461998196408, |
|
"learning_rate": 9.203471570286711e-05, |
|
"loss": 0.3531, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.5314533622559653, |
|
"grad_norm": 0.17510095685795815, |
|
"learning_rate": 9.193180833575506e-05, |
|
"loss": 0.4172, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5336225596529284, |
|
"grad_norm": 0.1618295606935838, |
|
"learning_rate": 9.182829877834176e-05, |
|
"loss": 0.3086, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5357917570498916, |
|
"grad_norm": 0.16138627788900253, |
|
"learning_rate": 9.172418851714676e-05, |
|
"loss": 0.3409, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5379609544468547, |
|
"grad_norm": 0.15375456963212109, |
|
"learning_rate": 9.161947904731636e-05, |
|
"loss": 0.3306, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.5401301518438177, |
|
"grad_norm": 0.14093510508892232, |
|
"learning_rate": 9.151417187260226e-05, |
|
"loss": 0.2723, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5422993492407809, |
|
"grad_norm": 0.1566994003310743, |
|
"learning_rate": 9.140826850533987e-05, |
|
"loss": 0.3047, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.544468546637744, |
|
"grad_norm": 0.16263764510479803, |
|
"learning_rate": 9.130177046642667e-05, |
|
"loss": 0.3313, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.5466377440347071, |
|
"grad_norm": 0.2635396490290928, |
|
"learning_rate": 9.119467928530027e-05, |
|
"loss": 0.3137, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.5488069414316703, |
|
"grad_norm": 0.19432404691982347, |
|
"learning_rate": 9.108699649991659e-05, |
|
"loss": 0.3271, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5509761388286334, |
|
"grad_norm": 0.16321335163395914, |
|
"learning_rate": 9.097872365672757e-05, |
|
"loss": 0.3464, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5531453362255966, |
|
"grad_norm": 0.1674780863277491, |
|
"learning_rate": 9.086986231065917e-05, |
|
"loss": 0.3479, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5553145336225597, |
|
"grad_norm": 0.16720487351831387, |
|
"learning_rate": 9.076041402508893e-05, |
|
"loss": 0.3765, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5574837310195228, |
|
"grad_norm": 0.1768233975227357, |
|
"learning_rate": 9.06503803718235e-05, |
|
"loss": 0.3611, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.559652928416486, |
|
"grad_norm": 0.1658738115128469, |
|
"learning_rate": 9.053976293107612e-05, |
|
"loss": 0.3425, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.561822125813449, |
|
"grad_norm": 0.15075392628393477, |
|
"learning_rate": 9.042856329144393e-05, |
|
"loss": 0.2831, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5639913232104121, |
|
"grad_norm": 0.15462905264522137, |
|
"learning_rate": 9.031678304988509e-05, |
|
"loss": 0.3359, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5661605206073753, |
|
"grad_norm": 0.1486760035515965, |
|
"learning_rate": 9.020442381169593e-05, |
|
"loss": 0.3036, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.5683297180043384, |
|
"grad_norm": 0.14672655464193096, |
|
"learning_rate": 9.009148719048785e-05, |
|
"loss": 0.2655, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5704989154013015, |
|
"grad_norm": 0.14525138583221342, |
|
"learning_rate": 8.99779748081641e-05, |
|
"loss": 0.2893, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.5726681127982647, |
|
"grad_norm": 0.1649767278464621, |
|
"learning_rate": 8.986388829489663e-05, |
|
"loss": 0.3416, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.5748373101952278, |
|
"grad_norm": 0.1486891920011246, |
|
"learning_rate": 8.97492292891025e-05, |
|
"loss": 0.3157, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5770065075921909, |
|
"grad_norm": 0.16844270718485851, |
|
"learning_rate": 8.96339994374205e-05, |
|
"loss": 0.3434, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.579175704989154, |
|
"grad_norm": 0.15180653954202514, |
|
"learning_rate": 8.951820039468741e-05, |
|
"loss": 0.2775, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.5813449023861171, |
|
"grad_norm": 0.15671071191539224, |
|
"learning_rate": 8.940183382391429e-05, |
|
"loss": 0.2713, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.5835140997830802, |
|
"grad_norm": 0.16147849592493357, |
|
"learning_rate": 8.928490139626253e-05, |
|
"loss": 0.3173, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.5856832971800434, |
|
"grad_norm": 0.19489581469202696, |
|
"learning_rate": 8.916740479101995e-05, |
|
"loss": 0.3289, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5878524945770065, |
|
"grad_norm": 0.14998764666371173, |
|
"learning_rate": 8.90493456955766e-05, |
|
"loss": 0.2677, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.5900216919739696, |
|
"grad_norm": 0.15423637992388417, |
|
"learning_rate": 8.893072580540053e-05, |
|
"loss": 0.3233, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.5921908893709328, |
|
"grad_norm": 0.1618307748258286, |
|
"learning_rate": 8.88115468240135e-05, |
|
"loss": 0.335, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.5943600867678959, |
|
"grad_norm": 0.28889311411388907, |
|
"learning_rate": 8.869181046296647e-05, |
|
"loss": 0.3162, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.596529284164859, |
|
"grad_norm": 0.16811888898141328, |
|
"learning_rate": 8.857151844181502e-05, |
|
"loss": 0.3321, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5986984815618221, |
|
"grad_norm": 0.15974545714844673, |
|
"learning_rate": 8.845067248809469e-05, |
|
"loss": 0.3424, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.6008676789587852, |
|
"grad_norm": 0.14627135337253672, |
|
"learning_rate": 8.83292743372961e-05, |
|
"loss": 0.2914, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.6030368763557483, |
|
"grad_norm": 0.15447685130795957, |
|
"learning_rate": 8.820732573284012e-05, |
|
"loss": 0.3175, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.6052060737527115, |
|
"grad_norm": 0.17049697629599625, |
|
"learning_rate": 8.808482842605277e-05, |
|
"loss": 0.3328, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.6073752711496746, |
|
"grad_norm": 0.15776013624499943, |
|
"learning_rate": 8.796178417614007e-05, |
|
"loss": 0.3109, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6095444685466378, |
|
"grad_norm": 0.16222342988686625, |
|
"learning_rate": 8.783819475016282e-05, |
|
"loss": 0.3748, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.6117136659436009, |
|
"grad_norm": 0.2331822347475535, |
|
"learning_rate": 8.771406192301113e-05, |
|
"loss": 0.3138, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.613882863340564, |
|
"grad_norm": 0.15710540429632355, |
|
"learning_rate": 8.758938747737909e-05, |
|
"loss": 0.3204, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.6160520607375272, |
|
"grad_norm": 0.1696778017954279, |
|
"learning_rate": 8.746417320373896e-05, |
|
"loss": 0.3545, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.6182212581344902, |
|
"grad_norm": 0.160133581870411, |
|
"learning_rate": 8.733842090031565e-05, |
|
"loss": 0.3079, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6203904555314533, |
|
"grad_norm": 0.15694259520801862, |
|
"learning_rate": 8.72121323730608e-05, |
|
"loss": 0.311, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.6225596529284165, |
|
"grad_norm": 0.1585388671488109, |
|
"learning_rate": 8.708530943562683e-05, |
|
"loss": 0.3366, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.6247288503253796, |
|
"grad_norm": 0.1637524110812479, |
|
"learning_rate": 8.695795390934094e-05, |
|
"loss": 0.3351, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.6268980477223427, |
|
"grad_norm": 0.17338170936952552, |
|
"learning_rate": 8.683006762317891e-05, |
|
"loss": 0.3683, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.6290672451193059, |
|
"grad_norm": 0.15279053222915287, |
|
"learning_rate": 8.670165241373891e-05, |
|
"loss": 0.2839, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.631236442516269, |
|
"grad_norm": 0.15271124650902063, |
|
"learning_rate": 8.657271012521504e-05, |
|
"loss": 0.274, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.6334056399132321, |
|
"grad_norm": 0.20119865632692643, |
|
"learning_rate": 8.644324260937085e-05, |
|
"loss": 0.2766, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.6355748373101953, |
|
"grad_norm": 0.16358970886050292, |
|
"learning_rate": 8.631325172551284e-05, |
|
"loss": 0.3223, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.6377440347071583, |
|
"grad_norm": 0.16024548206523562, |
|
"learning_rate": 8.618273934046364e-05, |
|
"loss": 0.3255, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.6399132321041214, |
|
"grad_norm": 0.18933994539002422, |
|
"learning_rate": 8.60517073285353e-05, |
|
"loss": 0.4288, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6420824295010846, |
|
"grad_norm": 0.14871982406160117, |
|
"learning_rate": 8.592015757150225e-05, |
|
"loss": 0.3048, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.6442516268980477, |
|
"grad_norm": 0.1634546407796359, |
|
"learning_rate": 8.578809195857445e-05, |
|
"loss": 0.3775, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.6464208242950108, |
|
"grad_norm": 0.16293792940515708, |
|
"learning_rate": 8.565551238637006e-05, |
|
"loss": 0.3142, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.648590021691974, |
|
"grad_norm": 0.14930487066180928, |
|
"learning_rate": 8.552242075888838e-05, |
|
"loss": 0.2613, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6507592190889371, |
|
"grad_norm": 0.14814023625501535, |
|
"learning_rate": 8.538881898748241e-05, |
|
"loss": 0.3157, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6529284164859002, |
|
"grad_norm": 0.15429106087854486, |
|
"learning_rate": 8.525470899083138e-05, |
|
"loss": 0.3023, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.6550976138828634, |
|
"grad_norm": 0.1526889270326536, |
|
"learning_rate": 8.51200926949133e-05, |
|
"loss": 0.2622, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.6572668112798264, |
|
"grad_norm": 0.16836180192848554, |
|
"learning_rate": 8.498497203297716e-05, |
|
"loss": 0.3181, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.6594360086767896, |
|
"grad_norm": 0.1568597185268113, |
|
"learning_rate": 8.48493489455153e-05, |
|
"loss": 0.3164, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.6616052060737527, |
|
"grad_norm": 0.1717660325284911, |
|
"learning_rate": 8.47132253802355e-05, |
|
"loss": 0.3144, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6637744034707158, |
|
"grad_norm": 0.15864917205877277, |
|
"learning_rate": 8.457660329203289e-05, |
|
"loss": 0.3057, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.665943600867679, |
|
"grad_norm": 0.16569505505355422, |
|
"learning_rate": 8.443948464296211e-05, |
|
"loss": 0.3327, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6681127982646421, |
|
"grad_norm": 0.16057628499246557, |
|
"learning_rate": 8.430187140220889e-05, |
|
"loss": 0.291, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6702819956616052, |
|
"grad_norm": 0.14767031347275292, |
|
"learning_rate": 8.416376554606195e-05, |
|
"loss": 0.2795, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.6724511930585684, |
|
"grad_norm": 0.15854515513637216, |
|
"learning_rate": 8.402516905788455e-05, |
|
"loss": 0.3009, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6746203904555315, |
|
"grad_norm": 0.14713035448630263, |
|
"learning_rate": 8.388608392808593e-05, |
|
"loss": 0.2793, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.6767895878524945, |
|
"grad_norm": 0.16017456682784914, |
|
"learning_rate": 8.37465121540929e-05, |
|
"loss": 0.305, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.6789587852494577, |
|
"grad_norm": 0.15120391692718194, |
|
"learning_rate": 8.360645574032098e-05, |
|
"loss": 0.2749, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.6811279826464208, |
|
"grad_norm": 0.15560001063747786, |
|
"learning_rate": 8.346591669814572e-05, |
|
"loss": 0.3186, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.6832971800433839, |
|
"grad_norm": 0.1613679084226523, |
|
"learning_rate": 8.332489704587381e-05, |
|
"loss": 0.3328, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6854663774403471, |
|
"grad_norm": 0.15605720716726337, |
|
"learning_rate": 8.318339880871402e-05, |
|
"loss": 0.3077, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.6876355748373102, |
|
"grad_norm": 0.15488519210128188, |
|
"learning_rate": 8.304142401874818e-05, |
|
"loss": 0.3256, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.6898047722342733, |
|
"grad_norm": 0.1403558799176201, |
|
"learning_rate": 8.2898974714902e-05, |
|
"loss": 0.2849, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.6919739696312365, |
|
"grad_norm": 0.16073415458894003, |
|
"learning_rate": 8.275605294291576e-05, |
|
"loss": 0.3148, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.6941431670281996, |
|
"grad_norm": 0.14531860683004322, |
|
"learning_rate": 8.261266075531493e-05, |
|
"loss": 0.2949, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6963123644251626, |
|
"grad_norm": 0.153620854119112, |
|
"learning_rate": 8.24688002113807e-05, |
|
"loss": 0.3024, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.6984815618221258, |
|
"grad_norm": 0.1598435331827638, |
|
"learning_rate": 8.232447337712045e-05, |
|
"loss": 0.3116, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.7006507592190889, |
|
"grad_norm": 0.16126012816422514, |
|
"learning_rate": 8.217968232523798e-05, |
|
"loss": 0.3302, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.702819956616052, |
|
"grad_norm": 0.167819401197193, |
|
"learning_rate": 8.203442913510386e-05, |
|
"loss": 0.2961, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.7049891540130152, |
|
"grad_norm": 0.16091517631557192, |
|
"learning_rate": 8.188871589272547e-05, |
|
"loss": 0.3058, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7071583514099783, |
|
"grad_norm": 0.15072907511506445, |
|
"learning_rate": 8.174254469071711e-05, |
|
"loss": 0.2768, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.7093275488069414, |
|
"grad_norm": 0.15463419748008012, |
|
"learning_rate": 8.15959176282699e-05, |
|
"loss": 0.2698, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.7114967462039046, |
|
"grad_norm": 0.15765765627941916, |
|
"learning_rate": 8.144883681112168e-05, |
|
"loss": 0.3327, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.7136659436008677, |
|
"grad_norm": 0.1625944955327596, |
|
"learning_rate": 8.130130435152671e-05, |
|
"loss": 0.3115, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.7158351409978309, |
|
"grad_norm": 0.14663689685956108, |
|
"learning_rate": 8.115332236822543e-05, |
|
"loss": 0.2813, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7180043383947939, |
|
"grad_norm": 0.14366366452382384, |
|
"learning_rate": 8.100489298641387e-05, |
|
"loss": 0.252, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.720173535791757, |
|
"grad_norm": 0.14062872637814394, |
|
"learning_rate": 8.085601833771332e-05, |
|
"loss": 0.2426, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.7223427331887202, |
|
"grad_norm": 0.151319434703271, |
|
"learning_rate": 8.070670056013963e-05, |
|
"loss": 0.2804, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.7245119305856833, |
|
"grad_norm": 0.17339443782515734, |
|
"learning_rate": 8.055694179807241e-05, |
|
"loss": 0.3386, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.7266811279826464, |
|
"grad_norm": 0.162057871765424, |
|
"learning_rate": 8.040674420222442e-05, |
|
"loss": 0.3059, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7288503253796096, |
|
"grad_norm": 0.17452086702897832, |
|
"learning_rate": 8.025610992961059e-05, |
|
"loss": 0.323, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.7310195227765727, |
|
"grad_norm": 0.15218002802937822, |
|
"learning_rate": 8.010504114351699e-05, |
|
"loss": 0.2892, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.7331887201735358, |
|
"grad_norm": 0.1786691433855208, |
|
"learning_rate": 7.995354001346984e-05, |
|
"loss": 0.2993, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.735357917570499, |
|
"grad_norm": 0.15572465398388327, |
|
"learning_rate": 7.980160871520434e-05, |
|
"loss": 0.2751, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.737527114967462, |
|
"grad_norm": 0.14944653365494096, |
|
"learning_rate": 7.964924943063341e-05, |
|
"loss": 0.295, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7396963123644251, |
|
"grad_norm": 0.15137015815284247, |
|
"learning_rate": 7.949646434781637e-05, |
|
"loss": 0.2928, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.7418655097613883, |
|
"grad_norm": 0.13520870208050645, |
|
"learning_rate": 7.934325566092749e-05, |
|
"loss": 0.2392, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.7440347071583514, |
|
"grad_norm": 0.17474668202456908, |
|
"learning_rate": 7.918962557022445e-05, |
|
"loss": 0.3343, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.7462039045553145, |
|
"grad_norm": 0.15920430785387085, |
|
"learning_rate": 7.903557628201689e-05, |
|
"loss": 0.3108, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.7483731019522777, |
|
"grad_norm": 0.14495723872575775, |
|
"learning_rate": 7.888111000863455e-05, |
|
"loss": 0.2723, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7505422993492408, |
|
"grad_norm": 0.16184390654404834, |
|
"learning_rate": 7.872622896839556e-05, |
|
"loss": 0.2729, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.7527114967462039, |
|
"grad_norm": 0.16644660892068602, |
|
"learning_rate": 7.857093538557465e-05, |
|
"loss": 0.3267, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.754880694143167, |
|
"grad_norm": 0.15541714907955761, |
|
"learning_rate": 7.841523149037109e-05, |
|
"loss": 0.3217, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.7570498915401301, |
|
"grad_norm": 0.15934934206985665, |
|
"learning_rate": 7.825911951887677e-05, |
|
"loss": 0.2913, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.7592190889370932, |
|
"grad_norm": 0.1578728957708445, |
|
"learning_rate": 7.810260171304399e-05, |
|
"loss": 0.3093, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7613882863340564, |
|
"grad_norm": 0.15490528067171122, |
|
"learning_rate": 7.794568032065337e-05, |
|
"loss": 0.3009, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.7635574837310195, |
|
"grad_norm": 0.16699421067292403, |
|
"learning_rate": 7.778835759528145e-05, |
|
"loss": 0.3656, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.7657266811279827, |
|
"grad_norm": 0.22044364766801572, |
|
"learning_rate": 7.763063579626839e-05, |
|
"loss": 0.334, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.7678958785249458, |
|
"grad_norm": 0.19643699768744521, |
|
"learning_rate": 7.747251718868557e-05, |
|
"loss": 0.3231, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.7700650759219089, |
|
"grad_norm": 0.20910555862615757, |
|
"learning_rate": 7.731400404330298e-05, |
|
"loss": 0.3161, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7722342733188721, |
|
"grad_norm": 0.16468231811761894, |
|
"learning_rate": 7.715509863655661e-05, |
|
"loss": 0.2979, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.7744034707158352, |
|
"grad_norm": 0.1367744772298653, |
|
"learning_rate": 7.699580325051583e-05, |
|
"loss": 0.2166, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.7765726681127982, |
|
"grad_norm": 0.16273122371163448, |
|
"learning_rate": 7.683612017285056e-05, |
|
"loss": 0.2847, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.7787418655097614, |
|
"grad_norm": 0.15669925061953327, |
|
"learning_rate": 7.667605169679842e-05, |
|
"loss": 0.2777, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.7809110629067245, |
|
"grad_norm": 0.17725284818408057, |
|
"learning_rate": 7.651560012113182e-05, |
|
"loss": 0.3337, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7830802603036876, |
|
"grad_norm": 0.17427480006189128, |
|
"learning_rate": 7.635476775012493e-05, |
|
"loss": 0.3665, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.7852494577006508, |
|
"grad_norm": 0.17414268214659534, |
|
"learning_rate": 7.619355689352056e-05, |
|
"loss": 0.3309, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.7874186550976139, |
|
"grad_norm": 0.17512481390885523, |
|
"learning_rate": 7.60319698664971e-05, |
|
"loss": 0.3328, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.789587852494577, |
|
"grad_norm": 0.15289988099891824, |
|
"learning_rate": 7.587000898963508e-05, |
|
"loss": 0.2515, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.7917570498915402, |
|
"grad_norm": 0.1624765535782596, |
|
"learning_rate": 7.570767658888405e-05, |
|
"loss": 0.2518, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7939262472885033, |
|
"grad_norm": 0.1617918820535666, |
|
"learning_rate": 7.554497499552902e-05, |
|
"loss": 0.3053, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.7960954446854663, |
|
"grad_norm": 0.16383299523189962, |
|
"learning_rate": 7.538190654615711e-05, |
|
"loss": 0.3033, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.7982646420824295, |
|
"grad_norm": 0.1740801344083908, |
|
"learning_rate": 7.521847358262384e-05, |
|
"loss": 0.3324, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.8004338394793926, |
|
"grad_norm": 0.16040212743690085, |
|
"learning_rate": 7.505467845201965e-05, |
|
"loss": 0.3583, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.8026030368763557, |
|
"grad_norm": 0.1581087352900414, |
|
"learning_rate": 7.48905235066361e-05, |
|
"loss": 0.3184, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8047722342733189, |
|
"grad_norm": 0.15619092562938275, |
|
"learning_rate": 7.472601110393212e-05, |
|
"loss": 0.3248, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.806941431670282, |
|
"grad_norm": 0.1595073883740819, |
|
"learning_rate": 7.456114360650015e-05, |
|
"loss": 0.3324, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.8091106290672451, |
|
"grad_norm": 0.16937228457587902, |
|
"learning_rate": 7.439592338203221e-05, |
|
"loss": 0.272, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.8112798264642083, |
|
"grad_norm": 0.21955900610207899, |
|
"learning_rate": 7.423035280328589e-05, |
|
"loss": 0.2876, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.8134490238611713, |
|
"grad_norm": 0.1622542075335067, |
|
"learning_rate": 7.406443424805031e-05, |
|
"loss": 0.3292, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8156182212581344, |
|
"grad_norm": 0.1539483896012786, |
|
"learning_rate": 7.389817009911188e-05, |
|
"loss": 0.3103, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.8177874186550976, |
|
"grad_norm": 0.15631416958623467, |
|
"learning_rate": 7.373156274422022e-05, |
|
"loss": 0.2877, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.8199566160520607, |
|
"grad_norm": 0.16629989037855605, |
|
"learning_rate": 7.356461457605373e-05, |
|
"loss": 0.2955, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.8221258134490239, |
|
"grad_norm": 0.17821592166706385, |
|
"learning_rate": 7.339732799218535e-05, |
|
"loss": 0.3395, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.824295010845987, |
|
"grad_norm": 0.16495627039143948, |
|
"learning_rate": 7.322970539504802e-05, |
|
"loss": 0.2975, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8264642082429501, |
|
"grad_norm": 0.1658072464206595, |
|
"learning_rate": 7.306174919190025e-05, |
|
"loss": 0.3178, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.8286334056399133, |
|
"grad_norm": 0.15286362948439744, |
|
"learning_rate": 7.28934617947915e-05, |
|
"loss": 0.2405, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.8308026030368764, |
|
"grad_norm": 0.25352133804652655, |
|
"learning_rate": 7.272484562052762e-05, |
|
"loss": 0.3318, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.8329718004338394, |
|
"grad_norm": 0.16883132171707005, |
|
"learning_rate": 7.255590309063604e-05, |
|
"loss": 0.2697, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.8351409978308026, |
|
"grad_norm": 0.17506322037825633, |
|
"learning_rate": 7.238663663133108e-05, |
|
"loss": 0.2947, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8373101952277657, |
|
"grad_norm": 0.162905554800095, |
|
"learning_rate": 7.221704867347901e-05, |
|
"loss": 0.316, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.8394793926247288, |
|
"grad_norm": 0.16905944662023994, |
|
"learning_rate": 7.204714165256325e-05, |
|
"loss": 0.3287, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.841648590021692, |
|
"grad_norm": 0.15092794187286074, |
|
"learning_rate": 7.187691800864936e-05, |
|
"loss": 0.2997, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.8438177874186551, |
|
"grad_norm": 0.1355944525377151, |
|
"learning_rate": 7.170638018634993e-05, |
|
"loss": 0.2278, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.8459869848156182, |
|
"grad_norm": 0.17675290778005076, |
|
"learning_rate": 7.153553063478953e-05, |
|
"loss": 0.3531, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8481561822125814, |
|
"grad_norm": 0.1534412441507167, |
|
"learning_rate": 7.136437180756954e-05, |
|
"loss": 0.2781, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.8503253796095445, |
|
"grad_norm": 0.16137646593619714, |
|
"learning_rate": 7.119290616273294e-05, |
|
"loss": 0.2834, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.8524945770065075, |
|
"grad_norm": 0.15944482254384695, |
|
"learning_rate": 7.10211361627289e-05, |
|
"loss": 0.2979, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.8546637744034707, |
|
"grad_norm": 0.1656541893638849, |
|
"learning_rate": 7.084906427437757e-05, |
|
"loss": 0.2898, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.8568329718004338, |
|
"grad_norm": 0.17951549268207853, |
|
"learning_rate": 7.06766929688345e-05, |
|
"loss": 0.4042, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8590021691973969, |
|
"grad_norm": 0.1566199701101273, |
|
"learning_rate": 7.050402472155526e-05, |
|
"loss": 0.3079, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.8611713665943601, |
|
"grad_norm": 0.15458950588314033, |
|
"learning_rate": 7.03310620122599e-05, |
|
"loss": 0.2778, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.8633405639913232, |
|
"grad_norm": 0.17104556871315288, |
|
"learning_rate": 7.015780732489717e-05, |
|
"loss": 0.3175, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.8655097613882863, |
|
"grad_norm": 0.19835635840780777, |
|
"learning_rate": 6.99842631476091e-05, |
|
"loss": 0.3446, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.8676789587852495, |
|
"grad_norm": 0.17218889670168014, |
|
"learning_rate": 6.981043197269505e-05, |
|
"loss": 0.3048, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8676789587852495, |
|
"eval_loss": 0.31443339586257935, |
|
"eval_runtime": 39.7245, |
|
"eval_samples_per_second": 0.478, |
|
"eval_steps_per_second": 0.126, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8698481561822126, |
|
"grad_norm": 0.16106337443031235, |
|
"learning_rate": 6.963631629657606e-05, |
|
"loss": 0.282, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.8720173535791758, |
|
"grad_norm": 0.1549710087992951, |
|
"learning_rate": 6.946191861975888e-05, |
|
"loss": 0.2923, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.8741865509761388, |
|
"grad_norm": 0.17143963315653743, |
|
"learning_rate": 6.928724144680022e-05, |
|
"loss": 0.304, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.8763557483731019, |
|
"grad_norm": 0.16661780813221244, |
|
"learning_rate": 6.911228728627059e-05, |
|
"loss": 0.3294, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.8785249457700651, |
|
"grad_norm": 0.16403696305382504, |
|
"learning_rate": 6.893705865071842e-05, |
|
"loss": 0.3014, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8806941431670282, |
|
"grad_norm": 0.15319848299652775, |
|
"learning_rate": 6.876155805663389e-05, |
|
"loss": 0.2691, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.8828633405639913, |
|
"grad_norm": 0.17118141480932222, |
|
"learning_rate": 6.858578802441288e-05, |
|
"loss": 0.3132, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.8850325379609545, |
|
"grad_norm": 0.16178546049978892, |
|
"learning_rate": 6.840975107832067e-05, |
|
"loss": 0.2564, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.8872017353579176, |
|
"grad_norm": 0.16291335526409656, |
|
"learning_rate": 6.823344974645576e-05, |
|
"loss": 0.2873, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.8893709327548807, |
|
"grad_norm": 0.1609692593321932, |
|
"learning_rate": 6.805688656071354e-05, |
|
"loss": 0.305, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8915401301518439, |
|
"grad_norm": 0.14901262397966952, |
|
"learning_rate": 6.788006405674992e-05, |
|
"loss": 0.2673, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.8937093275488069, |
|
"grad_norm": 0.16659840958516206, |
|
"learning_rate": 6.770298477394495e-05, |
|
"loss": 0.3277, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.89587852494577, |
|
"grad_norm": 0.1713410771401561, |
|
"learning_rate": 6.75256512553663e-05, |
|
"loss": 0.3714, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.8980477223427332, |
|
"grad_norm": 0.1818967831157803, |
|
"learning_rate": 6.734806604773277e-05, |
|
"loss": 0.3811, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.9002169197396963, |
|
"grad_norm": 0.21925123588651396, |
|
"learning_rate": 6.717023170137774e-05, |
|
"loss": 0.3056, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.9023861171366594, |
|
"grad_norm": 0.15314422603554104, |
|
"learning_rate": 6.69921507702125e-05, |
|
"loss": 0.2837, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.9045553145336226, |
|
"grad_norm": 0.16258221254945943, |
|
"learning_rate": 6.681382581168956e-05, |
|
"loss": 0.3085, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.9067245119305857, |
|
"grad_norm": 0.16032298987917648, |
|
"learning_rate": 6.663525938676603e-05, |
|
"loss": 0.3223, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.9088937093275488, |
|
"grad_norm": 0.16085131851237103, |
|
"learning_rate": 6.645645405986665e-05, |
|
"loss": 0.303, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.911062906724512, |
|
"grad_norm": 0.1650727655299475, |
|
"learning_rate": 6.627741239884716e-05, |
|
"loss": 0.3276, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.913232104121475, |
|
"grad_norm": 0.15360660494909031, |
|
"learning_rate": 6.609813697495731e-05, |
|
"loss": 0.2751, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.9154013015184381, |
|
"grad_norm": 0.15157055567722505, |
|
"learning_rate": 6.591863036280398e-05, |
|
"loss": 0.3085, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.9175704989154013, |
|
"grad_norm": 0.15803099826725212, |
|
"learning_rate": 6.573889514031415e-05, |
|
"loss": 0.2939, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.9197396963123644, |
|
"grad_norm": 0.1585787128334511, |
|
"learning_rate": 6.555893388869793e-05, |
|
"loss": 0.2503, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.9219088937093276, |
|
"grad_norm": 0.17931795418506688, |
|
"learning_rate": 6.537874919241149e-05, |
|
"loss": 0.3886, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9240780911062907, |
|
"grad_norm": 0.15343352858299092, |
|
"learning_rate": 6.519834363911992e-05, |
|
"loss": 0.2793, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.9262472885032538, |
|
"grad_norm": 0.15328728201248645, |
|
"learning_rate": 6.501771981966007e-05, |
|
"loss": 0.2644, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.928416485900217, |
|
"grad_norm": 0.16064411420877225, |
|
"learning_rate": 6.483688032800337e-05, |
|
"loss": 0.3188, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.93058568329718, |
|
"grad_norm": 0.1679425352918729, |
|
"learning_rate": 6.465582776121852e-05, |
|
"loss": 0.3602, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.9327548806941431, |
|
"grad_norm": 0.16379595982132292, |
|
"learning_rate": 6.447456471943427e-05, |
|
"loss": 0.2769, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9349240780911063, |
|
"grad_norm": 0.15943476584744723, |
|
"learning_rate": 6.429309380580202e-05, |
|
"loss": 0.2702, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.9370932754880694, |
|
"grad_norm": 0.15321548417679473, |
|
"learning_rate": 6.411141762645846e-05, |
|
"loss": 0.2463, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.9392624728850325, |
|
"grad_norm": 0.15564677916963437, |
|
"learning_rate": 6.392953879048813e-05, |
|
"loss": 0.2885, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.9414316702819957, |
|
"grad_norm": 0.1631290118518175, |
|
"learning_rate": 6.374745990988598e-05, |
|
"loss": 0.2748, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.9436008676789588, |
|
"grad_norm": 0.14619225672799796, |
|
"learning_rate": 6.356518359951982e-05, |
|
"loss": 0.249, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9457700650759219, |
|
"grad_norm": 0.1571277122359372, |
|
"learning_rate": 6.338271247709278e-05, |
|
"loss": 0.2533, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.9479392624728851, |
|
"grad_norm": 0.1550916491663443, |
|
"learning_rate": 6.320004916310573e-05, |
|
"loss": 0.2677, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.9501084598698482, |
|
"grad_norm": 0.16661022388778246, |
|
"learning_rate": 6.301719628081965e-05, |
|
"loss": 0.2878, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.9522776572668112, |
|
"grad_norm": 0.2040573789571519, |
|
"learning_rate": 6.283415645621791e-05, |
|
"loss": 0.2703, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.9544468546637744, |
|
"grad_norm": 0.16469631306397306, |
|
"learning_rate": 6.265093231796864e-05, |
|
"loss": 0.3037, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9566160520607375, |
|
"grad_norm": 0.15770760260143657, |
|
"learning_rate": 6.246752649738686e-05, |
|
"loss": 0.2657, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.9587852494577006, |
|
"grad_norm": 0.15983199407271448, |
|
"learning_rate": 6.228394162839686e-05, |
|
"loss": 0.2764, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.9609544468546638, |
|
"grad_norm": 0.16489136986239505, |
|
"learning_rate": 6.210018034749421e-05, |
|
"loss": 0.2955, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.9631236442516269, |
|
"grad_norm": 0.22721431990817345, |
|
"learning_rate": 6.191624529370796e-05, |
|
"loss": 0.3395, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.96529284164859, |
|
"grad_norm": 0.157681168570052, |
|
"learning_rate": 6.173213910856277e-05, |
|
"loss": 0.3018, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9674620390455532, |
|
"grad_norm": 0.1576772973443425, |
|
"learning_rate": 6.154786443604098e-05, |
|
"loss": 0.3013, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.9696312364425163, |
|
"grad_norm": 0.1579242415527196, |
|
"learning_rate": 6.13634239225445e-05, |
|
"loss": 0.294, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.9718004338394793, |
|
"grad_norm": 0.20593023409205996, |
|
"learning_rate": 6.117882021685704e-05, |
|
"loss": 0.2883, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.9739696312364425, |
|
"grad_norm": 0.16203552803868196, |
|
"learning_rate": 6.099405597010585e-05, |
|
"loss": 0.2957, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.9761388286334056, |
|
"grad_norm": 0.1614999286165849, |
|
"learning_rate": 6.0809133835723774e-05, |
|
"loss": 0.2885, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9783080260303688, |
|
"grad_norm": 0.16900731974324282, |
|
"learning_rate": 6.0624056469411125e-05, |
|
"loss": 0.301, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.9804772234273319, |
|
"grad_norm": 0.17129193051244268, |
|
"learning_rate": 6.043882652909752e-05, |
|
"loss": 0.2701, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.982646420824295, |
|
"grad_norm": 0.19758216454445776, |
|
"learning_rate": 6.025344667490369e-05, |
|
"loss": 0.3732, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.9848156182212582, |
|
"grad_norm": 0.15788414206460427, |
|
"learning_rate": 6.006791956910334e-05, |
|
"loss": 0.286, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.9869848156182213, |
|
"grad_norm": 0.1686049946630881, |
|
"learning_rate": 5.9882247876084865e-05, |
|
"loss": 0.3423, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9891540130151844, |
|
"grad_norm": 0.1623316957404726, |
|
"learning_rate": 5.969643426231309e-05, |
|
"loss": 0.2972, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.9913232104121475, |
|
"grad_norm": 0.16633964946287502, |
|
"learning_rate": 5.951048139629105e-05, |
|
"loss": 0.3207, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.9934924078091106, |
|
"grad_norm": 0.15578806657191704, |
|
"learning_rate": 5.932439194852153e-05, |
|
"loss": 0.2821, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.9956616052060737, |
|
"grad_norm": 0.141680355386348, |
|
"learning_rate": 5.9138168591468845e-05, |
|
"loss": 0.211, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.9978308026030369, |
|
"grad_norm": 0.1626311176710574, |
|
"learning_rate": 5.8951813999520375e-05, |
|
"loss": 0.2776, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.2168432501312755, |
|
"learning_rate": 5.876533084894821e-05, |
|
"loss": 0.3189, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.002169197396963, |
|
"grad_norm": 0.15195564980233445, |
|
"learning_rate": 5.8578721817870666e-05, |
|
"loss": 0.2746, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.0043383947939262, |
|
"grad_norm": 0.1491438360530865, |
|
"learning_rate": 5.839198958621388e-05, |
|
"loss": 0.2053, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.0065075921908895, |
|
"grad_norm": 0.15367262427759873, |
|
"learning_rate": 5.820513683567328e-05, |
|
"loss": 0.2371, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.0086767895878526, |
|
"grad_norm": 0.1511433155050122, |
|
"learning_rate": 5.801816624967509e-05, |
|
"loss": 0.2213, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.0108459869848156, |
|
"grad_norm": 0.1663892898536712, |
|
"learning_rate": 5.783108051333779e-05, |
|
"loss": 0.2576, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.0130151843817787, |
|
"grad_norm": 0.17685832695456444, |
|
"learning_rate": 5.764388231343356e-05, |
|
"loss": 0.3009, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.0151843817787418, |
|
"grad_norm": 0.21787472552900225, |
|
"learning_rate": 5.745657433834968e-05, |
|
"loss": 0.2636, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.017353579175705, |
|
"grad_norm": 0.19123373552730527, |
|
"learning_rate": 5.726915927804995e-05, |
|
"loss": 0.2743, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.0195227765726682, |
|
"grad_norm": 0.17768041272481136, |
|
"learning_rate": 5.7081639824036e-05, |
|
"loss": 0.2623, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0216919739696313, |
|
"grad_norm": 0.1697916576481495, |
|
"learning_rate": 5.6894018669308735e-05, |
|
"loss": 0.2272, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.0238611713665944, |
|
"grad_norm": 0.17650211499609383, |
|
"learning_rate": 5.670629850832956e-05, |
|
"loss": 0.2761, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.0260303687635575, |
|
"grad_norm": 0.18214913995299273, |
|
"learning_rate": 5.6518482036981725e-05, |
|
"loss": 0.2459, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.0281995661605206, |
|
"grad_norm": 0.17835571006990567, |
|
"learning_rate": 5.633057195253164e-05, |
|
"loss": 0.2468, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.0303687635574836, |
|
"grad_norm": 0.2069539317038426, |
|
"learning_rate": 5.614257095359009e-05, |
|
"loss": 0.3213, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.032537960954447, |
|
"grad_norm": 0.19136911570719517, |
|
"learning_rate": 5.5954481740073505e-05, |
|
"loss": 0.2823, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.03470715835141, |
|
"grad_norm": 0.19262401192871317, |
|
"learning_rate": 5.5766307013165156e-05, |
|
"loss": 0.2595, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.0368763557483731, |
|
"grad_norm": 0.1860152304131639, |
|
"learning_rate": 5.557804947527645e-05, |
|
"loss": 0.2801, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.0390455531453362, |
|
"grad_norm": 0.16924664525627553, |
|
"learning_rate": 5.5389711830007984e-05, |
|
"loss": 0.2308, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.0412147505422993, |
|
"grad_norm": 0.1816528885351813, |
|
"learning_rate": 5.5201296782110845e-05, |
|
"loss": 0.265, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.0433839479392624, |
|
"grad_norm": 0.16993844286408233, |
|
"learning_rate": 5.501280703744769e-05, |
|
"loss": 0.2538, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.0455531453362257, |
|
"grad_norm": 0.1949073398717587, |
|
"learning_rate": 5.48242453029539e-05, |
|
"loss": 0.3056, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.0477223427331888, |
|
"grad_norm": 0.16891171117764792, |
|
"learning_rate": 5.463561428659875e-05, |
|
"loss": 0.2197, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.0498915401301518, |
|
"grad_norm": 0.18215169314661425, |
|
"learning_rate": 5.444691669734643e-05, |
|
"loss": 0.2447, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.052060737527115, |
|
"grad_norm": 0.182026157244086, |
|
"learning_rate": 5.425815524511726e-05, |
|
"loss": 0.2978, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.054229934924078, |
|
"grad_norm": 0.17881791802607705, |
|
"learning_rate": 5.406933264074866e-05, |
|
"loss": 0.2469, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.056399132321041, |
|
"grad_norm": 0.18807032896417783, |
|
"learning_rate": 5.3880451595956294e-05, |
|
"loss": 0.297, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.0585683297180044, |
|
"grad_norm": 0.20875267489125912, |
|
"learning_rate": 5.369151482329506e-05, |
|
"loss": 0.282, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.0607375271149675, |
|
"grad_norm": 0.19549344502300633, |
|
"learning_rate": 5.350252503612024e-05, |
|
"loss": 0.3833, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.0629067245119306, |
|
"grad_norm": 0.17937531323085554, |
|
"learning_rate": 5.331348494854841e-05, |
|
"loss": 0.2622, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.0650759219088937, |
|
"grad_norm": 0.1870147791370534, |
|
"learning_rate": 5.3124397275418524e-05, |
|
"loss": 0.2322, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.0672451193058567, |
|
"grad_norm": 0.1951898037972042, |
|
"learning_rate": 5.2935264732252965e-05, |
|
"loss": 0.2357, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.06941431670282, |
|
"grad_norm": 0.18975302549273446, |
|
"learning_rate": 5.274609003521846e-05, |
|
"loss": 0.2419, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.0715835140997831, |
|
"grad_norm": 0.20439478227045751, |
|
"learning_rate": 5.255687590108711e-05, |
|
"loss": 0.3275, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.0737527114967462, |
|
"grad_norm": 0.18609479985163507, |
|
"learning_rate": 5.236762504719742e-05, |
|
"loss": 0.2754, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.0759219088937093, |
|
"grad_norm": 0.1830303728930595, |
|
"learning_rate": 5.217834019141521e-05, |
|
"loss": 0.2595, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.0780911062906724, |
|
"grad_norm": 0.20315831733475495, |
|
"learning_rate": 5.1989024052094605e-05, |
|
"loss": 0.3317, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.0802603036876355, |
|
"grad_norm": 0.18316883083338042, |
|
"learning_rate": 5.1799679348039e-05, |
|
"loss": 0.2458, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.0824295010845988, |
|
"grad_norm": 0.2041065318820468, |
|
"learning_rate": 5.1610308798462016e-05, |
|
"loss": 0.3169, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.0845986984815619, |
|
"grad_norm": 0.1944264750547026, |
|
"learning_rate": 5.142091512294844e-05, |
|
"loss": 0.2903, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.086767895878525, |
|
"grad_norm": 0.18918880781763017, |
|
"learning_rate": 5.123150104141521e-05, |
|
"loss": 0.2638, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.088937093275488, |
|
"grad_norm": 0.18983712841034528, |
|
"learning_rate": 5.104206927407225e-05, |
|
"loss": 0.2871, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.0911062906724511, |
|
"grad_norm": 0.20226737440215853, |
|
"learning_rate": 5.085262254138353e-05, |
|
"loss": 0.3285, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.0932754880694142, |
|
"grad_norm": 0.19118754054883752, |
|
"learning_rate": 5.0663163564027935e-05, |
|
"loss": 0.2594, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.0954446854663775, |
|
"grad_norm": 0.2138619141242844, |
|
"learning_rate": 5.047369506286017e-05, |
|
"loss": 0.2832, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.0976138828633406, |
|
"grad_norm": 0.3269054916680425, |
|
"learning_rate": 5.028421975887173e-05, |
|
"loss": 0.2988, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.0997830802603037, |
|
"grad_norm": 0.19040645241202558, |
|
"learning_rate": 5.00947403731518e-05, |
|
"loss": 0.2712, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.1019522776572668, |
|
"grad_norm": 0.1967133503261764, |
|
"learning_rate": 4.99052596268482e-05, |
|
"loss": 0.2815, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.1041214750542299, |
|
"grad_norm": 0.1903074378491903, |
|
"learning_rate": 4.9715780241128286e-05, |
|
"loss": 0.2737, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.106290672451193, |
|
"grad_norm": 0.18119699874786585, |
|
"learning_rate": 4.952630493713984e-05, |
|
"loss": 0.269, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.1084598698481563, |
|
"grad_norm": 0.221788539906692, |
|
"learning_rate": 4.9336836435972076e-05, |
|
"loss": 0.2707, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.1106290672451193, |
|
"grad_norm": 0.1928601828912626, |
|
"learning_rate": 4.914737745861646e-05, |
|
"loss": 0.2226, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.1127982646420824, |
|
"grad_norm": 0.21645221349069002, |
|
"learning_rate": 4.895793072592776e-05, |
|
"loss": 0.2286, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.1149674620390455, |
|
"grad_norm": 0.1970144418295259, |
|
"learning_rate": 4.8768498958584795e-05, |
|
"loss": 0.2825, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.1171366594360086, |
|
"grad_norm": 0.1957160462190446, |
|
"learning_rate": 4.8579084877051565e-05, |
|
"loss": 0.2391, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.119305856832972, |
|
"grad_norm": 0.21402378244369188, |
|
"learning_rate": 4.838969120153798e-05, |
|
"loss": 0.2555, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.121475054229935, |
|
"grad_norm": 0.21877951289590283, |
|
"learning_rate": 4.820032065196101e-05, |
|
"loss": 0.334, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.123644251626898, |
|
"grad_norm": 0.21929824905146836, |
|
"learning_rate": 4.801097594790539e-05, |
|
"loss": 0.2973, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.1258134490238612, |
|
"grad_norm": 0.20619197296358086, |
|
"learning_rate": 4.78216598085848e-05, |
|
"loss": 0.3018, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.1279826464208242, |
|
"grad_norm": 0.19555135005105484, |
|
"learning_rate": 4.763237495280258e-05, |
|
"loss": 0.2462, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.1301518438177873, |
|
"grad_norm": 0.21114282630013761, |
|
"learning_rate": 4.74431240989129e-05, |
|
"loss": 0.327, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.1323210412147506, |
|
"grad_norm": 0.20234364960770707, |
|
"learning_rate": 4.725390996478155e-05, |
|
"loss": 0.266, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.1344902386117137, |
|
"grad_norm": 0.19961583213636722, |
|
"learning_rate": 4.706473526774705e-05, |
|
"loss": 0.255, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.1366594360086768, |
|
"grad_norm": 0.19946499474596735, |
|
"learning_rate": 4.6875602724581474e-05, |
|
"loss": 0.2567, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.13882863340564, |
|
"grad_norm": 0.190031071960285, |
|
"learning_rate": 4.668651505145161e-05, |
|
"loss": 0.2536, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.140997830802603, |
|
"grad_norm": 0.19912223902629478, |
|
"learning_rate": 4.649747496387976e-05, |
|
"loss": 0.2814, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.1431670281995663, |
|
"grad_norm": 0.2029251712939136, |
|
"learning_rate": 4.630848517670495e-05, |
|
"loss": 0.2357, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.1453362255965294, |
|
"grad_norm": 0.1906141032119321, |
|
"learning_rate": 4.611954840404371e-05, |
|
"loss": 0.2814, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.1475054229934925, |
|
"grad_norm": 0.188787160150689, |
|
"learning_rate": 4.593066735925135e-05, |
|
"loss": 0.2696, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.1496746203904555, |
|
"grad_norm": 0.2343991376514014, |
|
"learning_rate": 4.574184475488274e-05, |
|
"loss": 0.2429, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.1518438177874186, |
|
"grad_norm": 0.19283832286439154, |
|
"learning_rate": 4.5553083302653576e-05, |
|
"loss": 0.2433, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.1540130151843817, |
|
"grad_norm": 0.21911781620446846, |
|
"learning_rate": 4.5364385713401256e-05, |
|
"loss": 0.2821, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.1561822125813448, |
|
"grad_norm": 0.22969903578133888, |
|
"learning_rate": 4.517575469704611e-05, |
|
"loss": 0.2776, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.158351409978308, |
|
"grad_norm": 0.20410973608962502, |
|
"learning_rate": 4.498719296255231e-05, |
|
"loss": 0.2549, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.1605206073752712, |
|
"grad_norm": 0.2146495328698227, |
|
"learning_rate": 4.4798703217889166e-05, |
|
"loss": 0.2809, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.1626898047722343, |
|
"grad_norm": 0.20839631573723869, |
|
"learning_rate": 4.461028816999203e-05, |
|
"loss": 0.2974, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.1648590021691974, |
|
"grad_norm": 0.19237241347004658, |
|
"learning_rate": 4.442195052472357e-05, |
|
"loss": 0.2636, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.1670281995661604, |
|
"grad_norm": 0.18275212969570778, |
|
"learning_rate": 4.423369298683485e-05, |
|
"loss": 0.2346, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.1691973969631237, |
|
"grad_norm": 0.21224147212685224, |
|
"learning_rate": 4.404551825992651e-05, |
|
"loss": 0.2942, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.1713665943600868, |
|
"grad_norm": 0.1947203315252727, |
|
"learning_rate": 4.385742904640993e-05, |
|
"loss": 0.239, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.17353579175705, |
|
"grad_norm": 0.2014217555241961, |
|
"learning_rate": 4.366942804746837e-05, |
|
"loss": 0.282, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.175704989154013, |
|
"grad_norm": 0.20005402924569843, |
|
"learning_rate": 4.3481517963018294e-05, |
|
"loss": 0.2567, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.177874186550976, |
|
"grad_norm": 0.19683374608842036, |
|
"learning_rate": 4.329370149167046e-05, |
|
"loss": 0.2422, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.1800433839479392, |
|
"grad_norm": 0.18295647045924832, |
|
"learning_rate": 4.310598133069128e-05, |
|
"loss": 0.2608, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.1822125813449025, |
|
"grad_norm": 0.25320255904534206, |
|
"learning_rate": 4.291836017596401e-05, |
|
"loss": 0.2522, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.1843817787418656, |
|
"grad_norm": 0.25012095918625965, |
|
"learning_rate": 4.273084072195008e-05, |
|
"loss": 0.2392, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.1865509761388287, |
|
"grad_norm": 0.2042418325652416, |
|
"learning_rate": 4.2543425661650325e-05, |
|
"loss": 0.2632, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.1887201735357917, |
|
"grad_norm": 0.20971549718156754, |
|
"learning_rate": 4.2356117686566464e-05, |
|
"loss": 0.3056, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.1908893709327548, |
|
"grad_norm": 0.22494235538454008, |
|
"learning_rate": 4.2168919486662225e-05, |
|
"loss": 0.1999, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.1930585683297181, |
|
"grad_norm": 0.19594451325509984, |
|
"learning_rate": 4.1981833750324934e-05, |
|
"loss": 0.2237, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.1952277657266812, |
|
"grad_norm": 0.20048311843021904, |
|
"learning_rate": 4.179486316432674e-05, |
|
"loss": 0.2457, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.1973969631236443, |
|
"grad_norm": 0.2133231404323406, |
|
"learning_rate": 4.1608010413786145e-05, |
|
"loss": 0.2428, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.1995661605206074, |
|
"grad_norm": 0.22391429220016193, |
|
"learning_rate": 4.1421278182129345e-05, |
|
"loss": 0.2835, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.2017353579175705, |
|
"grad_norm": 0.20513974766270474, |
|
"learning_rate": 4.1234669151051814e-05, |
|
"loss": 0.2837, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.2039045553145336, |
|
"grad_norm": 0.21910779105815384, |
|
"learning_rate": 4.104818600047963e-05, |
|
"loss": 0.2552, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.2060737527114966, |
|
"grad_norm": 0.20347018099490702, |
|
"learning_rate": 4.0861831408531174e-05, |
|
"loss": 0.2825, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.20824295010846, |
|
"grad_norm": 0.20019781967770364, |
|
"learning_rate": 4.067560805147848e-05, |
|
"loss": 0.2646, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.210412147505423, |
|
"grad_norm": 0.19985925625011156, |
|
"learning_rate": 4.048951860370897e-05, |
|
"loss": 0.2339, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.2125813449023861, |
|
"grad_norm": 0.20804567399279866, |
|
"learning_rate": 4.030356573768691e-05, |
|
"loss": 0.2685, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.2147505422993492, |
|
"grad_norm": 0.210586914513503, |
|
"learning_rate": 4.0117752123915166e-05, |
|
"loss": 0.2949, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.2169197396963123, |
|
"grad_norm": 0.19395560738783052, |
|
"learning_rate": 3.9932080430896674e-05, |
|
"loss": 0.2581, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.2190889370932756, |
|
"grad_norm": 0.19335081571244175, |
|
"learning_rate": 3.974655332509632e-05, |
|
"loss": 0.2398, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.2212581344902387, |
|
"grad_norm": 0.21139167236935025, |
|
"learning_rate": 3.956117347090249e-05, |
|
"loss": 0.294, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.2234273318872018, |
|
"grad_norm": 0.2136565272145805, |
|
"learning_rate": 3.937594353058888e-05, |
|
"loss": 0.249, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.2255965292841648, |
|
"grad_norm": 0.20983577033903353, |
|
"learning_rate": 3.9190866164276224e-05, |
|
"loss": 0.277, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.227765726681128, |
|
"grad_norm": 0.22523284294717572, |
|
"learning_rate": 3.900594402989416e-05, |
|
"loss": 0.2965, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.229934924078091, |
|
"grad_norm": 0.19361498352831719, |
|
"learning_rate": 3.8821179783142976e-05, |
|
"loss": 0.2468, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.2321041214750543, |
|
"grad_norm": 0.20349907006204035, |
|
"learning_rate": 3.863657607745551e-05, |
|
"loss": 0.2707, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.2342733188720174, |
|
"grad_norm": 0.21437394856709535, |
|
"learning_rate": 3.8452135563959035e-05, |
|
"loss": 0.2482, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.2364425162689805, |
|
"grad_norm": 0.4418854079923846, |
|
"learning_rate": 3.8267860891437224e-05, |
|
"loss": 0.2526, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.2386117136659436, |
|
"grad_norm": 0.20107219218854192, |
|
"learning_rate": 3.8083754706292044e-05, |
|
"loss": 0.2302, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.2407809110629067, |
|
"grad_norm": 0.22317690892260889, |
|
"learning_rate": 3.7899819652505805e-05, |
|
"loss": 0.3018, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.2429501084598698, |
|
"grad_norm": 0.24225532136972353, |
|
"learning_rate": 3.771605837160315e-05, |
|
"loss": 0.2945, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.245119305856833, |
|
"grad_norm": 0.21311877910331786, |
|
"learning_rate": 3.753247350261314e-05, |
|
"loss": 0.268, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.2472885032537961, |
|
"grad_norm": 0.21276496906364534, |
|
"learning_rate": 3.734906768203137e-05, |
|
"loss": 0.2796, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.2494577006507592, |
|
"grad_norm": 0.21666315750271123, |
|
"learning_rate": 3.7165843543782094e-05, |
|
"loss": 0.3048, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.2516268980477223, |
|
"grad_norm": 0.2117830975027958, |
|
"learning_rate": 3.698280371918035e-05, |
|
"loss": 0.3134, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.2537960954446854, |
|
"grad_norm": 0.21204614791084325, |
|
"learning_rate": 3.679995083689427e-05, |
|
"loss": 0.2583, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.2559652928416485, |
|
"grad_norm": 0.20266543030388112, |
|
"learning_rate": 3.6617287522907215e-05, |
|
"loss": 0.2187, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.2581344902386118, |
|
"grad_norm": 0.19799515726039077, |
|
"learning_rate": 3.643481640048019e-05, |
|
"loss": 0.2486, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.2603036876355749, |
|
"grad_norm": 0.22965267318434665, |
|
"learning_rate": 3.6252540090114014e-05, |
|
"loss": 0.3141, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.262472885032538, |
|
"grad_norm": 0.20300927451116837, |
|
"learning_rate": 3.607046120951187e-05, |
|
"loss": 0.2481, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.264642082429501, |
|
"grad_norm": 0.19473972219878735, |
|
"learning_rate": 3.588858237354154e-05, |
|
"loss": 0.2425, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.2668112798264641, |
|
"grad_norm": 0.19460999115969896, |
|
"learning_rate": 3.5706906194197995e-05, |
|
"loss": 0.2695, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.2689804772234274, |
|
"grad_norm": 0.2088031103817509, |
|
"learning_rate": 3.552543528056573e-05, |
|
"loss": 0.2894, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.2711496746203905, |
|
"grad_norm": 0.20225624597683012, |
|
"learning_rate": 3.534417223878149e-05, |
|
"loss": 0.2532, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.2733188720173536, |
|
"grad_norm": 0.1925296097866438, |
|
"learning_rate": 3.516311967199664e-05, |
|
"loss": 0.2544, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.2754880694143167, |
|
"grad_norm": 0.19510001512503033, |
|
"learning_rate": 3.498228018033994e-05, |
|
"loss": 0.262, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.2776572668112798, |
|
"grad_norm": 0.2049108757868876, |
|
"learning_rate": 3.4801656360880083e-05, |
|
"loss": 0.2562, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.2798264642082429, |
|
"grad_norm": 0.2266927606829423, |
|
"learning_rate": 3.4621250807588524e-05, |
|
"loss": 0.3551, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.281995661605206, |
|
"grad_norm": 0.21569872794030454, |
|
"learning_rate": 3.444106611130209e-05, |
|
"loss": 0.2774, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.2841648590021693, |
|
"grad_norm": 0.21023989849968328, |
|
"learning_rate": 3.4261104859685865e-05, |
|
"loss": 0.2622, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.2863340563991323, |
|
"grad_norm": 0.19106084312223887, |
|
"learning_rate": 3.408136963719605e-05, |
|
"loss": 0.2252, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.2885032537960954, |
|
"grad_norm": 0.19995458008751701, |
|
"learning_rate": 3.39018630250427e-05, |
|
"loss": 0.2372, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.2906724511930585, |
|
"grad_norm": 0.22200947357348208, |
|
"learning_rate": 3.3722587601152855e-05, |
|
"loss": 0.2321, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.2928416485900218, |
|
"grad_norm": 0.1950829833237249, |
|
"learning_rate": 3.354354594013337e-05, |
|
"loss": 0.2241, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.295010845986985, |
|
"grad_norm": 0.20036238676413232, |
|
"learning_rate": 3.336474061323399e-05, |
|
"loss": 0.2055, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.297180043383948, |
|
"grad_norm": 0.2107825415915727, |
|
"learning_rate": 3.318617418831044e-05, |
|
"loss": 0.256, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.299349240780911, |
|
"grad_norm": 0.21989801828531605, |
|
"learning_rate": 3.3007849229787516e-05, |
|
"loss": 0.2764, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.3015184381778742, |
|
"grad_norm": 0.21680377382599092, |
|
"learning_rate": 3.282976829862227e-05, |
|
"loss": 0.2566, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.3015184381778742, |
|
"eval_loss": 0.3054097592830658, |
|
"eval_runtime": 39.7394, |
|
"eval_samples_per_second": 0.478, |
|
"eval_steps_per_second": 0.126, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.3036876355748372, |
|
"grad_norm": 0.20308783149132958, |
|
"learning_rate": 3.2651933952267245e-05, |
|
"loss": 0.2112, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.3058568329718003, |
|
"grad_norm": 0.21690298502452043, |
|
"learning_rate": 3.247434874463372e-05, |
|
"loss": 0.2649, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.3080260303687636, |
|
"grad_norm": 0.2055181727108075, |
|
"learning_rate": 3.2297015226055076e-05, |
|
"loss": 0.2824, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.3101952277657267, |
|
"grad_norm": 0.2320338793221242, |
|
"learning_rate": 3.211993594325009e-05, |
|
"loss": 0.2456, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.3123644251626898, |
|
"grad_norm": 0.20493923957441781, |
|
"learning_rate": 3.194311343928649e-05, |
|
"loss": 0.2366, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.314533622559653, |
|
"grad_norm": 0.2030392074075536, |
|
"learning_rate": 3.176655025354425e-05, |
|
"loss": 0.2508, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.316702819956616, |
|
"grad_norm": 0.2110656534399716, |
|
"learning_rate": 3.159024892167935e-05, |
|
"loss": 0.2502, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.3188720173535793, |
|
"grad_norm": 0.25854440170290666, |
|
"learning_rate": 3.141421197558713e-05, |
|
"loss": 0.3055, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.3210412147505424, |
|
"grad_norm": 0.19420699816129983, |
|
"learning_rate": 3.123844194336613e-05, |
|
"loss": 0.2271, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.3232104121475055, |
|
"grad_norm": 0.21496315608117952, |
|
"learning_rate": 3.1062941349281594e-05, |
|
"loss": 0.2673, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.3253796095444685, |
|
"grad_norm": 0.20106583560494895, |
|
"learning_rate": 3.0887712713729435e-05, |
|
"loss": 0.2365, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.3275488069414316, |
|
"grad_norm": 0.21120123263668, |
|
"learning_rate": 3.071275855319979e-05, |
|
"loss": 0.2554, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.3297180043383947, |
|
"grad_norm": 0.21665345900294192, |
|
"learning_rate": 3.053808138024113e-05, |
|
"loss": 0.2701, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.3318872017353578, |
|
"grad_norm": 0.21025986355262039, |
|
"learning_rate": 3.036368370342396e-05, |
|
"loss": 0.243, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.334056399132321, |
|
"grad_norm": 0.2060375691889601, |
|
"learning_rate": 3.018956802730497e-05, |
|
"loss": 0.275, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.3362255965292842, |
|
"grad_norm": 0.22136533659676028, |
|
"learning_rate": 3.0015736852390918e-05, |
|
"loss": 0.257, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.3383947939262473, |
|
"grad_norm": 0.21449871481305618, |
|
"learning_rate": 2.984219267510285e-05, |
|
"loss": 0.2704, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.3405639913232104, |
|
"grad_norm": 0.24014701061684474, |
|
"learning_rate": 2.966893798774012e-05, |
|
"loss": 0.2891, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.3427331887201737, |
|
"grad_norm": 0.2244269288798775, |
|
"learning_rate": 2.9495975278444743e-05, |
|
"loss": 0.2935, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.3449023861171367, |
|
"grad_norm": 0.21096761572427544, |
|
"learning_rate": 2.9323307031165503e-05, |
|
"loss": 0.2739, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.3470715835140998, |
|
"grad_norm": 0.20262736058046335, |
|
"learning_rate": 2.9150935725622434e-05, |
|
"loss": 0.2499, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.349240780911063, |
|
"grad_norm": 0.19820653616274597, |
|
"learning_rate": 2.8978863837271096e-05, |
|
"loss": 0.2393, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.351409978308026, |
|
"grad_norm": 0.23042991309507332, |
|
"learning_rate": 2.8807093837267062e-05, |
|
"loss": 0.302, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.353579175704989, |
|
"grad_norm": 0.1909580869184299, |
|
"learning_rate": 2.8635628192430457e-05, |
|
"loss": 0.2492, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.3557483731019522, |
|
"grad_norm": 0.1954261314272082, |
|
"learning_rate": 2.8464469365210476e-05, |
|
"loss": 0.2369, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.3579175704989155, |
|
"grad_norm": 0.21435896485747777, |
|
"learning_rate": 2.829361981365008e-05, |
|
"loss": 0.2825, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.3600867678958786, |
|
"grad_norm": 0.20366176793808735, |
|
"learning_rate": 2.812308199135064e-05, |
|
"loss": 0.2455, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.3622559652928417, |
|
"grad_norm": 0.220096792471076, |
|
"learning_rate": 2.795285834743674e-05, |
|
"loss": 0.2281, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.3644251626898047, |
|
"grad_norm": 0.23939270387667466, |
|
"learning_rate": 2.7782951326521e-05, |
|
"loss": 0.3235, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.3665943600867678, |
|
"grad_norm": 0.2501946465822018, |
|
"learning_rate": 2.7613363368668933e-05, |
|
"loss": 0.3561, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.3687635574837311, |
|
"grad_norm": 0.19776494315538312, |
|
"learning_rate": 2.744409690936396e-05, |
|
"loss": 0.235, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.3709327548806942, |
|
"grad_norm": 0.22972123156189964, |
|
"learning_rate": 2.7275154379472383e-05, |
|
"loss": 0.2781, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.3731019522776573, |
|
"grad_norm": 0.22520549969079728, |
|
"learning_rate": 2.7106538205208503e-05, |
|
"loss": 0.2952, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.3752711496746204, |
|
"grad_norm": 0.2056228650880094, |
|
"learning_rate": 2.6938250808099765e-05, |
|
"loss": 0.2337, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.3774403470715835, |
|
"grad_norm": 0.2286918375083098, |
|
"learning_rate": 2.677029460495199e-05, |
|
"loss": 0.2896, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.3796095444685466, |
|
"grad_norm": 0.19803374847789326, |
|
"learning_rate": 2.6602672007814657e-05, |
|
"loss": 0.2445, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.3817787418655096, |
|
"grad_norm": 0.21204934763741792, |
|
"learning_rate": 2.6435385423946268e-05, |
|
"loss": 0.2565, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.383947939262473, |
|
"grad_norm": 0.2623356517904308, |
|
"learning_rate": 2.6268437255779795e-05, |
|
"loss": 0.2574, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.386117136659436, |
|
"grad_norm": 0.22828545407934486, |
|
"learning_rate": 2.610182990088813e-05, |
|
"loss": 0.3083, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.3882863340563991, |
|
"grad_norm": 0.21316635288288857, |
|
"learning_rate": 2.5935565751949708e-05, |
|
"loss": 0.2884, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.3904555314533622, |
|
"grad_norm": 0.18200487727356005, |
|
"learning_rate": 2.5769647196714115e-05, |
|
"loss": 0.1971, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.3926247288503255, |
|
"grad_norm": 0.20394759124623543, |
|
"learning_rate": 2.5604076617967797e-05, |
|
"loss": 0.2531, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.3947939262472886, |
|
"grad_norm": 0.2211940525129031, |
|
"learning_rate": 2.5438856393499856e-05, |
|
"loss": 0.2642, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.3969631236442517, |
|
"grad_norm": 0.21677701478466002, |
|
"learning_rate": 2.527398889606789e-05, |
|
"loss": 0.2471, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.3991323210412148, |
|
"grad_norm": 0.19805770191187302, |
|
"learning_rate": 2.5109476493363903e-05, |
|
"loss": 0.2275, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.4013015184381779, |
|
"grad_norm": 0.370645367139852, |
|
"learning_rate": 2.494532154798036e-05, |
|
"loss": 0.2652, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.403470715835141, |
|
"grad_norm": 0.4096140214680738, |
|
"learning_rate": 2.4781526417376167e-05, |
|
"loss": 0.2214, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.405639913232104, |
|
"grad_norm": 0.20027276095982202, |
|
"learning_rate": 2.4618093453842904e-05, |
|
"loss": 0.2283, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.407809110629067, |
|
"grad_norm": 0.2042256862448447, |
|
"learning_rate": 2.4455025004470983e-05, |
|
"loss": 0.2317, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.4099783080260304, |
|
"grad_norm": 0.2170785919045779, |
|
"learning_rate": 2.4292323411115963e-05, |
|
"loss": 0.2582, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.4121475054229935, |
|
"grad_norm": 0.5111593541353036, |
|
"learning_rate": 2.412999101036493e-05, |
|
"loss": 0.3233, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.4143167028199566, |
|
"grad_norm": 0.22766256947955918, |
|
"learning_rate": 2.3968030133502916e-05, |
|
"loss": 0.2548, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.4164859002169197, |
|
"grad_norm": 0.21004376919072829, |
|
"learning_rate": 2.380644310647944e-05, |
|
"loss": 0.2463, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.418655097613883, |
|
"grad_norm": 0.22454612498741602, |
|
"learning_rate": 2.364523224987508e-05, |
|
"loss": 0.2186, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.420824295010846, |
|
"grad_norm": 0.21635926190963323, |
|
"learning_rate": 2.3484399878868185e-05, |
|
"loss": 0.2574, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.4229934924078091, |
|
"grad_norm": 0.20472709985073356, |
|
"learning_rate": 2.3323948303201586e-05, |
|
"loss": 0.2453, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.4251626898047722, |
|
"grad_norm": 0.2204401348193557, |
|
"learning_rate": 2.3163879827149448e-05, |
|
"loss": 0.2507, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.4273318872017353, |
|
"grad_norm": 0.24175915571290157, |
|
"learning_rate": 2.300419674948418e-05, |
|
"loss": 0.2907, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.4295010845986984, |
|
"grad_norm": 0.22445756380382137, |
|
"learning_rate": 2.2844901363443404e-05, |
|
"loss": 0.2791, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.4316702819956615, |
|
"grad_norm": 0.22386370995786847, |
|
"learning_rate": 2.2685995956697038e-05, |
|
"loss": 0.2779, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.4338394793926248, |
|
"grad_norm": 0.23249928031701667, |
|
"learning_rate": 2.2527482811314437e-05, |
|
"loss": 0.311, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.4360086767895879, |
|
"grad_norm": 0.2342495773652486, |
|
"learning_rate": 2.2369364203731618e-05, |
|
"loss": 0.2679, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.438177874186551, |
|
"grad_norm": 0.20675103233277622, |
|
"learning_rate": 2.221164240471857e-05, |
|
"loss": 0.2313, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.440347071583514, |
|
"grad_norm": 0.20528823101739593, |
|
"learning_rate": 2.205431967934664e-05, |
|
"loss": 0.2492, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.4425162689804774, |
|
"grad_norm": 0.2128126904380516, |
|
"learning_rate": 2.1897398286956012e-05, |
|
"loss": 0.2777, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.4446854663774404, |
|
"grad_norm": 0.21518099561214468, |
|
"learning_rate": 2.1740880481123238e-05, |
|
"loss": 0.2518, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.4468546637744035, |
|
"grad_norm": 0.2186083154676726, |
|
"learning_rate": 2.1584768509628922e-05, |
|
"loss": 0.2453, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.4490238611713666, |
|
"grad_norm": 0.25175528122891083, |
|
"learning_rate": 2.142906461442537e-05, |
|
"loss": 0.3037, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.4511930585683297, |
|
"grad_norm": 0.22763386297900118, |
|
"learning_rate": 2.1273771031604456e-05, |
|
"loss": 0.2644, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.4533622559652928, |
|
"grad_norm": 0.2295090408465574, |
|
"learning_rate": 2.1118889991365476e-05, |
|
"loss": 0.2686, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.4555314533622559, |
|
"grad_norm": 0.20891584135932628, |
|
"learning_rate": 2.096442371798313e-05, |
|
"loss": 0.2204, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.457700650759219, |
|
"grad_norm": 0.2254247187316148, |
|
"learning_rate": 2.0810374429775565e-05, |
|
"loss": 0.2913, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.4598698481561823, |
|
"grad_norm": 0.24216942402374153, |
|
"learning_rate": 2.0656744339072542e-05, |
|
"loss": 0.3349, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.4620390455531453, |
|
"grad_norm": 0.22222297204476354, |
|
"learning_rate": 2.0503535652183643e-05, |
|
"loss": 0.2793, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.4642082429501084, |
|
"grad_norm": 0.20870048389264126, |
|
"learning_rate": 2.035075056936659e-05, |
|
"loss": 0.2655, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.4663774403470715, |
|
"grad_norm": 0.22841755003782033, |
|
"learning_rate": 2.0198391284795664e-05, |
|
"loss": 0.3155, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.4685466377440348, |
|
"grad_norm": 0.22269122758511214, |
|
"learning_rate": 2.004645998653017e-05, |
|
"loss": 0.2491, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.470715835140998, |
|
"grad_norm": 0.22332389169722103, |
|
"learning_rate": 1.9894958856483026e-05, |
|
"loss": 0.2648, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.472885032537961, |
|
"grad_norm": 0.214833703844543, |
|
"learning_rate": 1.974389007038942e-05, |
|
"loss": 0.2609, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.475054229934924, |
|
"grad_norm": 0.20682197278555356, |
|
"learning_rate": 1.9593255797775577e-05, |
|
"loss": 0.2328, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.4772234273318872, |
|
"grad_norm": 0.23230767426539825, |
|
"learning_rate": 1.9443058201927588e-05, |
|
"loss": 0.2347, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.4793926247288502, |
|
"grad_norm": 0.2261449038214272, |
|
"learning_rate": 1.9293299439860396e-05, |
|
"loss": 0.2758, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.4815618221258133, |
|
"grad_norm": 0.3168804778663666, |
|
"learning_rate": 1.9143981662286665e-05, |
|
"loss": 0.273, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.4837310195227766, |
|
"grad_norm": 0.22898408446614574, |
|
"learning_rate": 1.8995107013586137e-05, |
|
"loss": 0.2713, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.4859002169197397, |
|
"grad_norm": 0.22912219172967715, |
|
"learning_rate": 1.8846677631774575e-05, |
|
"loss": 0.2937, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.4880694143167028, |
|
"grad_norm": 0.20900331783691264, |
|
"learning_rate": 1.869869564847329e-05, |
|
"loss": 0.2269, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.490238611713666, |
|
"grad_norm": 0.21015606821149282, |
|
"learning_rate": 1.8551163188878313e-05, |
|
"loss": 0.2299, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.4924078091106292, |
|
"grad_norm": 0.2250692099687625, |
|
"learning_rate": 1.840408237173011e-05, |
|
"loss": 0.2892, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.4945770065075923, |
|
"grad_norm": 0.2145016212764728, |
|
"learning_rate": 1.8257455309282882e-05, |
|
"loss": 0.2476, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.4967462039045554, |
|
"grad_norm": 0.2236334148717297, |
|
"learning_rate": 1.811128410727454e-05, |
|
"loss": 0.2259, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.4989154013015185, |
|
"grad_norm": 0.22580326511659854, |
|
"learning_rate": 1.7965570864896138e-05, |
|
"loss": 0.2611, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.5010845986984815, |
|
"grad_norm": 0.23566517211163768, |
|
"learning_rate": 1.7820317674762034e-05, |
|
"loss": 0.3256, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.5032537960954446, |
|
"grad_norm": 0.2387014338194217, |
|
"learning_rate": 1.767552662287955e-05, |
|
"loss": 0.2169, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.5054229934924077, |
|
"grad_norm": 0.2131558457240097, |
|
"learning_rate": 1.7531199788619305e-05, |
|
"loss": 0.2166, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.5075921908893708, |
|
"grad_norm": 0.21663448852517472, |
|
"learning_rate": 1.738733924468507e-05, |
|
"loss": 0.2473, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.509761388286334, |
|
"grad_norm": 0.2302024457559936, |
|
"learning_rate": 1.7243947057084252e-05, |
|
"loss": 0.2579, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.5119305856832972, |
|
"grad_norm": 0.21162459671820377, |
|
"learning_rate": 1.7101025285097988e-05, |
|
"loss": 0.2367, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.5140997830802603, |
|
"grad_norm": 0.22899234818180603, |
|
"learning_rate": 1.695857598125183e-05, |
|
"loss": 0.2487, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.5162689804772236, |
|
"grad_norm": 0.21180166251652038, |
|
"learning_rate": 1.681660119128598e-05, |
|
"loss": 0.2454, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.5184381778741867, |
|
"grad_norm": 0.21900430622153325, |
|
"learning_rate": 1.6675102954126204e-05, |
|
"loss": 0.2505, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.5206073752711498, |
|
"grad_norm": 0.22388111292395452, |
|
"learning_rate": 1.6534083301854287e-05, |
|
"loss": 0.256, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.5227765726681128, |
|
"grad_norm": 0.19613947069940627, |
|
"learning_rate": 1.639354425967904e-05, |
|
"loss": 0.2029, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.524945770065076, |
|
"grad_norm": 0.20905100864213258, |
|
"learning_rate": 1.6253487845907122e-05, |
|
"loss": 0.2275, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.527114967462039, |
|
"grad_norm": 0.21880038965460166, |
|
"learning_rate": 1.6113916071914082e-05, |
|
"loss": 0.2617, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.529284164859002, |
|
"grad_norm": 0.21208440823468386, |
|
"learning_rate": 1.5974830942115472e-05, |
|
"loss": 0.242, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.5314533622559652, |
|
"grad_norm": 0.23456256487858015, |
|
"learning_rate": 1.5836234453938054e-05, |
|
"loss": 0.2549, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.5336225596529283, |
|
"grad_norm": 0.2396806217154987, |
|
"learning_rate": 1.5698128597791122e-05, |
|
"loss": 0.3393, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.5357917570498916, |
|
"grad_norm": 0.2031182271876605, |
|
"learning_rate": 1.5560515357037898e-05, |
|
"loss": 0.2152, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.5379609544468547, |
|
"grad_norm": 0.22000222658603283, |
|
"learning_rate": 1.542339670796712e-05, |
|
"loss": 0.2648, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.5401301518438177, |
|
"grad_norm": 0.265809393717544, |
|
"learning_rate": 1.528677461976451e-05, |
|
"loss": 0.192, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.542299349240781, |
|
"grad_norm": 0.20877625917509104, |
|
"learning_rate": 1.5150651054484705e-05, |
|
"loss": 0.2257, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.5444685466377441, |
|
"grad_norm": 0.21683968747146132, |
|
"learning_rate": 1.5015027967022838e-05, |
|
"loss": 0.2702, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.5466377440347072, |
|
"grad_norm": 0.20896474897637557, |
|
"learning_rate": 1.4879907305086721e-05, |
|
"loss": 0.2459, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.5488069414316703, |
|
"grad_norm": 0.2615773501556193, |
|
"learning_rate": 1.4745291009168616e-05, |
|
"loss": 0.2782, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.5509761388286334, |
|
"grad_norm": 0.22282625503762615, |
|
"learning_rate": 1.461118101251761e-05, |
|
"loss": 0.2885, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.5531453362255965, |
|
"grad_norm": 0.21801422944358295, |
|
"learning_rate": 1.4477579241111616e-05, |
|
"loss": 0.2406, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.5553145336225596, |
|
"grad_norm": 0.22073898585184953, |
|
"learning_rate": 1.4344487613629958e-05, |
|
"loss": 0.2656, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.5574837310195226, |
|
"grad_norm": 0.2187048550685909, |
|
"learning_rate": 1.4211908041425565e-05, |
|
"loss": 0.2613, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.559652928416486, |
|
"grad_norm": 0.2101381320422253, |
|
"learning_rate": 1.4079842428497764e-05, |
|
"loss": 0.2333, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.561822125813449, |
|
"grad_norm": 0.20391298465277313, |
|
"learning_rate": 1.3948292671464708e-05, |
|
"loss": 0.2343, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.5639913232104121, |
|
"grad_norm": 0.22749006672655192, |
|
"learning_rate": 1.3817260659536368e-05, |
|
"loss": 0.2563, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.5661605206073754, |
|
"grad_norm": 0.22567622662999204, |
|
"learning_rate": 1.368674827448716e-05, |
|
"loss": 0.3026, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.5683297180043385, |
|
"grad_norm": 0.22202307546172664, |
|
"learning_rate": 1.355675739062916e-05, |
|
"loss": 0.2365, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.5704989154013016, |
|
"grad_norm": 0.20324438958594626, |
|
"learning_rate": 1.3427289874784965e-05, |
|
"loss": 0.2113, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.5726681127982647, |
|
"grad_norm": 0.21573774200754933, |
|
"learning_rate": 1.3298347586261101e-05, |
|
"loss": 0.2256, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.5748373101952278, |
|
"grad_norm": 0.2107630411490319, |
|
"learning_rate": 1.3169932376821087e-05, |
|
"loss": 0.2243, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.5770065075921909, |
|
"grad_norm": 0.2250209366455123, |
|
"learning_rate": 1.3042046090659082e-05, |
|
"loss": 0.2311, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.579175704989154, |
|
"grad_norm": 0.21327462482312162, |
|
"learning_rate": 1.2914690564373172e-05, |
|
"loss": 0.2025, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.581344902386117, |
|
"grad_norm": 0.21048416380580592, |
|
"learning_rate": 1.278786762693921e-05, |
|
"loss": 0.2505, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.58351409978308, |
|
"grad_norm": 0.23805779361424165, |
|
"learning_rate": 1.2661579099684345e-05, |
|
"loss": 0.2993, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.5856832971800434, |
|
"grad_norm": 0.2236199058892429, |
|
"learning_rate": 1.2535826796261058e-05, |
|
"loss": 0.2559, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.5878524945770065, |
|
"grad_norm": 0.2267692199206878, |
|
"learning_rate": 1.2410612522620923e-05, |
|
"loss": 0.2631, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.5900216919739696, |
|
"grad_norm": 0.22527317437513994, |
|
"learning_rate": 1.2285938076988879e-05, |
|
"loss": 0.2381, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.592190889370933, |
|
"grad_norm": 0.20643186489302753, |
|
"learning_rate": 1.2161805249837189e-05, |
|
"loss": 0.2261, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.594360086767896, |
|
"grad_norm": 0.2363410474128828, |
|
"learning_rate": 1.2038215823859944e-05, |
|
"loss": 0.2848, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.596529284164859, |
|
"grad_norm": 0.22064343187390295, |
|
"learning_rate": 1.1915171573947231e-05, |
|
"loss": 0.2114, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.5986984815618221, |
|
"grad_norm": 0.2241110750390057, |
|
"learning_rate": 1.179267426715988e-05, |
|
"loss": 0.273, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.6008676789587852, |
|
"grad_norm": 0.24065816206033924, |
|
"learning_rate": 1.1670725662703907e-05, |
|
"loss": 0.2667, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.6030368763557483, |
|
"grad_norm": 0.216540199511313, |
|
"learning_rate": 1.1549327511905322e-05, |
|
"loss": 0.2603, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.6052060737527114, |
|
"grad_norm": 0.22867224484504703, |
|
"learning_rate": 1.1428481558184984e-05, |
|
"loss": 0.2835, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.6073752711496745, |
|
"grad_norm": 0.21664360434007227, |
|
"learning_rate": 1.1308189537033532e-05, |
|
"loss": 0.2799, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.6095444685466378, |
|
"grad_norm": 0.21600464380836015, |
|
"learning_rate": 1.1188453175986502e-05, |
|
"loss": 0.2555, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.6117136659436009, |
|
"grad_norm": 0.22003089603782433, |
|
"learning_rate": 1.1069274194599477e-05, |
|
"loss": 0.2764, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.613882863340564, |
|
"grad_norm": 0.21966503118494438, |
|
"learning_rate": 1.0950654304423408e-05, |
|
"loss": 0.243, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.6160520607375273, |
|
"grad_norm": 0.2134291281940422, |
|
"learning_rate": 1.0832595208980052e-05, |
|
"loss": 0.2235, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.6182212581344904, |
|
"grad_norm": 0.22637670128987095, |
|
"learning_rate": 1.0715098603737473e-05, |
|
"loss": 0.2597, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.6203904555314534, |
|
"grad_norm": 0.21648611339436485, |
|
"learning_rate": 1.0598166176085722e-05, |
|
"loss": 0.2542, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.6225596529284165, |
|
"grad_norm": 0.23629777995443355, |
|
"learning_rate": 1.0481799605312598e-05, |
|
"loss": 0.3228, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.6247288503253796, |
|
"grad_norm": 0.21725498937866275, |
|
"learning_rate": 1.0366000562579509e-05, |
|
"loss": 0.2344, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.6268980477223427, |
|
"grad_norm": 0.21952448011527256, |
|
"learning_rate": 1.0250770710897512e-05, |
|
"loss": 0.3131, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.6290672451193058, |
|
"grad_norm": 0.2011634686461227, |
|
"learning_rate": 1.0136111705103384e-05, |
|
"loss": 0.2069, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.6312364425162689, |
|
"grad_norm": 0.22463127569065536, |
|
"learning_rate": 1.0022025191835905e-05, |
|
"loss": 0.2281, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.633405639913232, |
|
"grad_norm": 0.2062264163167854, |
|
"learning_rate": 9.90851280951216e-06, |
|
"loss": 0.2179, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.6355748373101953, |
|
"grad_norm": 0.22691208712294778, |
|
"learning_rate": 9.795576188304068e-06, |
|
"loss": 0.3075, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.6377440347071583, |
|
"grad_norm": 0.21132869155040268, |
|
"learning_rate": 9.68321695011491e-06, |
|
"loss": 0.1903, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.6399132321041214, |
|
"grad_norm": 0.21551342495874198, |
|
"learning_rate": 9.571436708556076e-06, |
|
"loss": 0.221, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.6420824295010847, |
|
"grad_norm": 0.2174975697740617, |
|
"learning_rate": 9.460237068923883e-06, |
|
"loss": 0.2587, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.6442516268980478, |
|
"grad_norm": 0.2160546965912458, |
|
"learning_rate": 9.34961962817652e-06, |
|
"loss": 0.2303, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.646420824295011, |
|
"grad_norm": 0.2376163843745843, |
|
"learning_rate": 9.239585974911074e-06, |
|
"loss": 0.3243, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.648590021691974, |
|
"grad_norm": 0.21539719738458052, |
|
"learning_rate": 9.130137689340839e-06, |
|
"loss": 0.2571, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.650759219088937, |
|
"grad_norm": 0.2410719861687306, |
|
"learning_rate": 9.021276343272434e-06, |
|
"loss": 0.2633, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.6529284164859002, |
|
"grad_norm": 0.2079469970240713, |
|
"learning_rate": 8.913003500083438e-06, |
|
"loss": 0.2379, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.6550976138828633, |
|
"grad_norm": 0.22772263664943265, |
|
"learning_rate": 8.805320714699728e-06, |
|
"loss": 0.2395, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.6572668112798263, |
|
"grad_norm": 0.20307660213818468, |
|
"learning_rate": 8.698229533573338e-06, |
|
"loss": 0.1864, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.6594360086767896, |
|
"grad_norm": 0.2148519135414743, |
|
"learning_rate": 8.591731494660132e-06, |
|
"loss": 0.2053, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.6616052060737527, |
|
"grad_norm": 0.229814247011586, |
|
"learning_rate": 8.485828127397749e-06, |
|
"loss": 0.2875, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.6637744034707158, |
|
"grad_norm": 0.21643960802023465, |
|
"learning_rate": 8.380520952683646e-06, |
|
"loss": 0.2599, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.6659436008676791, |
|
"grad_norm": 0.22244934877456704, |
|
"learning_rate": 8.275811482853245e-06, |
|
"loss": 0.2598, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.6681127982646422, |
|
"grad_norm": 0.23343908563620214, |
|
"learning_rate": 8.17170122165824e-06, |
|
"loss": 0.3098, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.6702819956616053, |
|
"grad_norm": 0.19803997345115992, |
|
"learning_rate": 8.068191664244945e-06, |
|
"loss": 0.2029, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.6724511930585684, |
|
"grad_norm": 0.2190516176521641, |
|
"learning_rate": 7.965284297132896e-06, |
|
"loss": 0.241, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.6746203904555315, |
|
"grad_norm": 0.21290567846006564, |
|
"learning_rate": 7.862980598193442e-06, |
|
"loss": 0.2063, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.6767895878524945, |
|
"grad_norm": 0.23699837862628473, |
|
"learning_rate": 7.761282036628548e-06, |
|
"loss": 0.3082, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.6789587852494576, |
|
"grad_norm": 0.22947643986082064, |
|
"learning_rate": 7.660190072949692e-06, |
|
"loss": 0.2628, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.6811279826464207, |
|
"grad_norm": 0.23210641035367943, |
|
"learning_rate": 7.559706158956898e-06, |
|
"loss": 0.2715, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.6832971800433838, |
|
"grad_norm": 0.2294766385634235, |
|
"learning_rate": 7.459831737717859e-06, |
|
"loss": 0.2749, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.685466377440347, |
|
"grad_norm": 0.20107115872856784, |
|
"learning_rate": 7.360568243547261e-06, |
|
"loss": 0.219, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.6876355748373102, |
|
"grad_norm": 0.20113176199889593, |
|
"learning_rate": 7.261917101986127e-06, |
|
"loss": 0.2074, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.6898047722342733, |
|
"grad_norm": 0.23609135019264865, |
|
"learning_rate": 7.163879729781392e-06, |
|
"loss": 0.2667, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.6919739696312366, |
|
"grad_norm": 0.2228940425973063, |
|
"learning_rate": 7.066457534865528e-06, |
|
"loss": 0.2643, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.6941431670281997, |
|
"grad_norm": 0.23796760069402623, |
|
"learning_rate": 6.969651916336334e-06, |
|
"loss": 0.2249, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.6963123644251628, |
|
"grad_norm": 0.2327272256132086, |
|
"learning_rate": 6.8734642644368576e-06, |
|
"loss": 0.2543, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.6984815618221258, |
|
"grad_norm": 0.2472213046502843, |
|
"learning_rate": 6.7778959605353906e-06, |
|
"loss": 0.2585, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.700650759219089, |
|
"grad_norm": 0.22682131480226772, |
|
"learning_rate": 6.682948377105686e-06, |
|
"loss": 0.226, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.702819956616052, |
|
"grad_norm": 0.23469843862434467, |
|
"learning_rate": 6.588622877707196e-06, |
|
"loss": 0.2933, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.704989154013015, |
|
"grad_norm": 0.2244645457364394, |
|
"learning_rate": 6.49492081696551e-06, |
|
"loss": 0.2965, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.7071583514099782, |
|
"grad_norm": 0.20869135290493288, |
|
"learning_rate": 6.401843540552921e-06, |
|
"loss": 0.2295, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.7093275488069413, |
|
"grad_norm": 0.21494691194019463, |
|
"learning_rate": 6.309392385169066e-06, |
|
"loss": 0.2418, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.7114967462039046, |
|
"grad_norm": 0.2521558215114822, |
|
"learning_rate": 6.217568678521746e-06, |
|
"loss": 0.3596, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.7136659436008677, |
|
"grad_norm": 0.21627894705194553, |
|
"learning_rate": 6.126373739307856e-06, |
|
"loss": 0.2358, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.715835140997831, |
|
"grad_norm": 0.2520399170899315, |
|
"learning_rate": 6.035808877194454e-06, |
|
"loss": 0.3594, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.718004338394794, |
|
"grad_norm": 0.22790741465610556, |
|
"learning_rate": 5.945875392799944e-06, |
|
"loss": 0.3138, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.7201735357917571, |
|
"grad_norm": 0.21149040475910053, |
|
"learning_rate": 5.8565745776754e-06, |
|
"loss": 0.2251, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.7223427331887202, |
|
"grad_norm": 0.23452143004512307, |
|
"learning_rate": 5.7679077142860135e-06, |
|
"loss": 0.2652, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.7245119305856833, |
|
"grad_norm": 0.22627084576919643, |
|
"learning_rate": 5.679876075992685e-06, |
|
"loss": 0.2856, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.7266811279826464, |
|
"grad_norm": 0.23065836475523283, |
|
"learning_rate": 5.592480927033733e-06, |
|
"loss": 0.2407, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.7288503253796095, |
|
"grad_norm": 0.21666121524543802, |
|
"learning_rate": 5.505723522506734e-06, |
|
"loss": 0.2533, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.7310195227765726, |
|
"grad_norm": 0.21851910440097064, |
|
"learning_rate": 5.419605108350501e-06, |
|
"loss": 0.1987, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.7331887201735356, |
|
"grad_norm": 0.22502982751463446, |
|
"learning_rate": 5.334126921327193e-06, |
|
"loss": 0.2522, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.735357917570499, |
|
"grad_norm": 0.24282939171015397, |
|
"learning_rate": 5.249290189004552e-06, |
|
"loss": 0.3036, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.735357917570499, |
|
"eval_loss": 0.2993714511394501, |
|
"eval_runtime": 39.7253, |
|
"eval_samples_per_second": 0.478, |
|
"eval_steps_per_second": 0.126, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.737527114967462, |
|
"grad_norm": 0.22684517743489496, |
|
"learning_rate": 5.165096129738267e-06, |
|
"loss": 0.2412, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.7396963123644251, |
|
"grad_norm": 0.21496799405491135, |
|
"learning_rate": 5.081545952654493e-06, |
|
"loss": 0.2373, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.7418655097613884, |
|
"grad_norm": 0.22310608285043565, |
|
"learning_rate": 4.998640857632464e-06, |
|
"loss": 0.2496, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.7440347071583515, |
|
"grad_norm": 0.2320457312319671, |
|
"learning_rate": 4.916382035287276e-06, |
|
"loss": 0.2582, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.7462039045553146, |
|
"grad_norm": 0.22615551698433095, |
|
"learning_rate": 4.8347706669527985e-06, |
|
"loss": 0.2548, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.7483731019522777, |
|
"grad_norm": 0.23006147126943094, |
|
"learning_rate": 4.7538079246646825e-06, |
|
"loss": 0.2776, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.7505422993492408, |
|
"grad_norm": 0.22519218682482992, |
|
"learning_rate": 4.673494971143538e-06, |
|
"loss": 0.2457, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.7527114967462039, |
|
"grad_norm": 0.21484715264028661, |
|
"learning_rate": 4.5938329597782824e-06, |
|
"loss": 0.2163, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.754880694143167, |
|
"grad_norm": 0.22724812121756507, |
|
"learning_rate": 4.514823034609455e-06, |
|
"loss": 0.2568, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.75704989154013, |
|
"grad_norm": 0.2192304364191693, |
|
"learning_rate": 4.436466330312966e-06, |
|
"loss": 0.235, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.7592190889370931, |
|
"grad_norm": 0.1833973092649833, |
|
"learning_rate": 4.358763972183599e-06, |
|
"loss": 0.166, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.7613882863340564, |
|
"grad_norm": 0.22170941113081136, |
|
"learning_rate": 4.281717076119057e-06, |
|
"loss": 0.2543, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.7635574837310195, |
|
"grad_norm": 0.2181063063924005, |
|
"learning_rate": 4.205326748603744e-06, |
|
"loss": 0.2231, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.7657266811279828, |
|
"grad_norm": 0.23650416189819906, |
|
"learning_rate": 4.129594086693012e-06, |
|
"loss": 0.2647, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.767895878524946, |
|
"grad_norm": 0.23715186447220826, |
|
"learning_rate": 4.0545201779973564e-06, |
|
"loss": 0.2395, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.770065075921909, |
|
"grad_norm": 0.25382512329158124, |
|
"learning_rate": 3.980106100666781e-06, |
|
"loss": 0.2909, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.772234273318872, |
|
"grad_norm": 0.21476601477039556, |
|
"learning_rate": 3.906352923375345e-06, |
|
"loss": 0.2326, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.7744034707158352, |
|
"grad_norm": 0.31142043160518457, |
|
"learning_rate": 3.833261705305796e-06, |
|
"loss": 0.2506, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.7765726681127982, |
|
"grad_norm": 0.23754127992716897, |
|
"learning_rate": 3.7608334961343695e-06, |
|
"loss": 0.2753, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.7787418655097613, |
|
"grad_norm": 0.2606633745028959, |
|
"learning_rate": 3.6890693360157105e-06, |
|
"loss": 0.2735, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.7809110629067244, |
|
"grad_norm": 0.2308719975444426, |
|
"learning_rate": 3.6179702555679264e-06, |
|
"loss": 0.2391, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.7830802603036875, |
|
"grad_norm": 0.20821914212005022, |
|
"learning_rate": 3.5475372758577984e-06, |
|
"loss": 0.2041, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.7852494577006508, |
|
"grad_norm": 0.27553949669527766, |
|
"learning_rate": 3.4777714083861268e-06, |
|
"loss": 0.2641, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.7874186550976139, |
|
"grad_norm": 0.2298326829683291, |
|
"learning_rate": 3.4086736550731747e-06, |
|
"loss": 0.2694, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.789587852494577, |
|
"grad_norm": 0.23725507049973954, |
|
"learning_rate": 3.340245008244308e-06, |
|
"loss": 0.2672, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.7917570498915403, |
|
"grad_norm": 0.23615628058747723, |
|
"learning_rate": 3.272486450615725e-06, |
|
"loss": 0.298, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.7939262472885034, |
|
"grad_norm": 0.24396047621014613, |
|
"learning_rate": 3.2053989552803586e-06, |
|
"loss": 0.3171, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.7960954446854664, |
|
"grad_norm": 0.22614687587834664, |
|
"learning_rate": 3.1389834856938916e-06, |
|
"loss": 0.2379, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.7982646420824295, |
|
"grad_norm": 0.22756878563712518, |
|
"learning_rate": 3.0732409956609197e-06, |
|
"loss": 0.2636, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.8004338394793926, |
|
"grad_norm": 0.19317698277364906, |
|
"learning_rate": 3.0081724293212653e-06, |
|
"loss": 0.1733, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.8026030368763557, |
|
"grad_norm": 0.2290227505004182, |
|
"learning_rate": 2.943778721136403e-06, |
|
"loss": 0.281, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.8047722342733188, |
|
"grad_norm": 0.21862342101596383, |
|
"learning_rate": 2.8800607958760497e-06, |
|
"loss": 0.2335, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.8069414316702819, |
|
"grad_norm": 0.2951658359969376, |
|
"learning_rate": 2.8170195686048837e-06, |
|
"loss": 0.2809, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.809110629067245, |
|
"grad_norm": 0.24062215600970813, |
|
"learning_rate": 2.7546559446693876e-06, |
|
"loss": 0.2618, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.8112798264642083, |
|
"grad_norm": 0.222892598517785, |
|
"learning_rate": 2.692970819684898e-06, |
|
"loss": 0.2255, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.8134490238611713, |
|
"grad_norm": 0.22574094208163784, |
|
"learning_rate": 2.6319650795226414e-06, |
|
"loss": 0.2844, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.8156182212581344, |
|
"grad_norm": 0.2143823574732022, |
|
"learning_rate": 2.571639600297143e-06, |
|
"loss": 0.2179, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.8177874186550977, |
|
"grad_norm": 0.20810428440270837, |
|
"learning_rate": 2.5119952483535214e-06, |
|
"loss": 0.2273, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.8199566160520608, |
|
"grad_norm": 0.2219262917061002, |
|
"learning_rate": 2.4530328802551516e-06, |
|
"loss": 0.2469, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.822125813449024, |
|
"grad_norm": 0.22814813313597437, |
|
"learning_rate": 2.3947533427712666e-06, |
|
"loss": 0.2717, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.824295010845987, |
|
"grad_norm": 0.20474219388181666, |
|
"learning_rate": 2.3371574728648928e-06, |
|
"loss": 0.2206, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.82646420824295, |
|
"grad_norm": 0.2393953186831936, |
|
"learning_rate": 2.2802460976807384e-06, |
|
"loss": 0.2773, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.8286334056399132, |
|
"grad_norm": 0.20242210841493188, |
|
"learning_rate": 2.2240200345333972e-06, |
|
"loss": 0.241, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.8308026030368763, |
|
"grad_norm": 0.2479672969940867, |
|
"learning_rate": 2.1684800908955362e-06, |
|
"loss": 0.304, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.8329718004338393, |
|
"grad_norm": 0.2924875615305496, |
|
"learning_rate": 2.1136270643863807e-06, |
|
"loss": 0.1846, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.8351409978308026, |
|
"grad_norm": 0.23730285407509868, |
|
"learning_rate": 2.0594617427601603e-06, |
|
"loss": 0.2475, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.8373101952277657, |
|
"grad_norm": 0.22663266708243113, |
|
"learning_rate": 2.0059849038949084e-06, |
|
"loss": 0.2662, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.8394793926247288, |
|
"grad_norm": 0.24163203609153988, |
|
"learning_rate": 1.953197315781169e-06, |
|
"loss": 0.2949, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.8416485900216921, |
|
"grad_norm": 0.22227338821415707, |
|
"learning_rate": 1.9010997365110971e-06, |
|
"loss": 0.2483, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.8438177874186552, |
|
"grad_norm": 0.20465862377896551, |
|
"learning_rate": 1.8496929142674424e-06, |
|
"loss": 0.2406, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.8459869848156183, |
|
"grad_norm": 0.2364370583373017, |
|
"learning_rate": 1.7989775873129278e-06, |
|
"loss": 0.2902, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.8481561822125814, |
|
"grad_norm": 0.26218568934111874, |
|
"learning_rate": 1.7489544839795314e-06, |
|
"loss": 0.2492, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.8503253796095445, |
|
"grad_norm": 0.21511824786196265, |
|
"learning_rate": 1.6996243226581388e-06, |
|
"loss": 0.2368, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.8524945770065075, |
|
"grad_norm": 0.2198436062463957, |
|
"learning_rate": 1.6509878117881073e-06, |
|
"loss": 0.2208, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.8546637744034706, |
|
"grad_norm": 0.21644370095781967, |
|
"learning_rate": 1.6030456498472124e-06, |
|
"loss": 0.212, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.8568329718004337, |
|
"grad_norm": 0.22133422953240703, |
|
"learning_rate": 1.5557985253415119e-06, |
|
"loss": 0.2553, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.8590021691973968, |
|
"grad_norm": 0.22714457687645023, |
|
"learning_rate": 1.509247116795548e-06, |
|
"loss": 0.2462, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.86117136659436, |
|
"grad_norm": 0.222946120475819, |
|
"learning_rate": 1.463392092742516e-06, |
|
"loss": 0.2306, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.8633405639913232, |
|
"grad_norm": 0.23418079900923416, |
|
"learning_rate": 1.4182341117147501e-06, |
|
"loss": 0.2696, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.8655097613882863, |
|
"grad_norm": 0.24002442145224728, |
|
"learning_rate": 1.373773822234181e-06, |
|
"loss": 0.3045, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.8676789587852496, |
|
"grad_norm": 0.23645720857751817, |
|
"learning_rate": 1.3300118628031044e-06, |
|
"loss": 0.2677, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.8698481561822127, |
|
"grad_norm": 0.20964939053220596, |
|
"learning_rate": 1.2869488618949488e-06, |
|
"loss": 0.2387, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.8720173535791758, |
|
"grad_norm": 0.23651470700477456, |
|
"learning_rate": 1.2445854379452726e-06, |
|
"loss": 0.2706, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.8741865509761388, |
|
"grad_norm": 0.22140918135138807, |
|
"learning_rate": 1.2029221993428873e-06, |
|
"loss": 0.2607, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.876355748373102, |
|
"grad_norm": 0.2348213137855099, |
|
"learning_rate": 1.1619597444211206e-06, |
|
"loss": 0.2672, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.878524945770065, |
|
"grad_norm": 0.21784895708257698, |
|
"learning_rate": 1.121698661449211e-06, |
|
"loss": 0.2285, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.880694143167028, |
|
"grad_norm": 0.22922875389410033, |
|
"learning_rate": 1.0821395286238656e-06, |
|
"loss": 0.2343, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.8828633405639912, |
|
"grad_norm": 0.23166059465595806, |
|
"learning_rate": 1.0432829140609723e-06, |
|
"loss": 0.2714, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.8850325379609545, |
|
"grad_norm": 0.2360632284131841, |
|
"learning_rate": 1.0051293757874002e-06, |
|
"loss": 0.2716, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.8872017353579176, |
|
"grad_norm": 0.2314940438713054, |
|
"learning_rate": 9.67679461733051e-07, |
|
"loss": 0.2835, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.8893709327548807, |
|
"grad_norm": 0.21435494553958442, |
|
"learning_rate": 9.309337097229087e-07, |
|
"loss": 0.2131, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.891540130151844, |
|
"grad_norm": 0.2210305803057693, |
|
"learning_rate": 8.94892647469403e-07, |
|
"loss": 0.2575, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.893709327548807, |
|
"grad_norm": 0.24869986480220152, |
|
"learning_rate": 8.595567925647363e-07, |
|
"loss": 0.2738, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.8958785249457701, |
|
"grad_norm": 0.2305734211734528, |
|
"learning_rate": 8.249266524735455e-07, |
|
"loss": 0.2733, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.8980477223427332, |
|
"grad_norm": 0.2536697393369725, |
|
"learning_rate": 7.910027245255413e-07, |
|
"loss": 0.3261, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.9002169197396963, |
|
"grad_norm": 0.23661550044126228, |
|
"learning_rate": 7.577854959084085e-07, |
|
"loss": 0.3014, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.9023861171366594, |
|
"grad_norm": 0.22995644681345337, |
|
"learning_rate": 7.252754436607834e-07, |
|
"loss": 0.2957, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.9045553145336225, |
|
"grad_norm": 0.22988735331559226, |
|
"learning_rate": 6.934730346654316e-07, |
|
"loss": 0.2418, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.9067245119305856, |
|
"grad_norm": 0.22730994636228313, |
|
"learning_rate": 6.623787256424984e-07, |
|
"loss": 0.2746, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.9088937093275486, |
|
"grad_norm": 0.21461778114516813, |
|
"learning_rate": 6.319929631430077e-07, |
|
"loss": 0.2225, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.911062906724512, |
|
"grad_norm": 0.23234603239697021, |
|
"learning_rate": 6.023161835423896e-07, |
|
"loss": 0.2969, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.913232104121475, |
|
"grad_norm": 0.21550966631710464, |
|
"learning_rate": 5.733488130342635e-07, |
|
"loss": 0.2503, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.9154013015184381, |
|
"grad_norm": 0.2231153295751743, |
|
"learning_rate": 5.45091267624287e-07, |
|
"loss": 0.249, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.9175704989154014, |
|
"grad_norm": 0.2293111442665084, |
|
"learning_rate": 5.175439531241999e-07, |
|
"loss": 0.29, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.9197396963123645, |
|
"grad_norm": 0.2378040770485283, |
|
"learning_rate": 4.907072651459621e-07, |
|
"loss": 0.2891, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.9219088937093276, |
|
"grad_norm": 0.21536664945034864, |
|
"learning_rate": 4.645815890961358e-07, |
|
"loss": 0.22, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.9240780911062907, |
|
"grad_norm": 0.21799292627016253, |
|
"learning_rate": 4.391673001702734e-07, |
|
"loss": 0.2168, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.9262472885032538, |
|
"grad_norm": 0.2272046818418607, |
|
"learning_rate": 4.14464763347594e-07, |
|
"loss": 0.2555, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.9284164859002169, |
|
"grad_norm": 0.26748625735516185, |
|
"learning_rate": 3.904743333857097e-07, |
|
"loss": 0.2186, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.93058568329718, |
|
"grad_norm": 0.23511293905776262, |
|
"learning_rate": 3.671963548155244e-07, |
|
"loss": 0.2622, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.932754880694143, |
|
"grad_norm": 0.2132059321481103, |
|
"learning_rate": 3.446311619363207e-07, |
|
"loss": 0.2162, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.9349240780911063, |
|
"grad_norm": 0.21597717686523027, |
|
"learning_rate": 3.227790788109197e-07, |
|
"loss": 0.2307, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.9370932754880694, |
|
"grad_norm": 0.2978176072682262, |
|
"learning_rate": 3.0164041926104e-07, |
|
"loss": 0.2577, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.9392624728850325, |
|
"grad_norm": 0.23250354980532184, |
|
"learning_rate": 2.8121548686281805e-07, |
|
"loss": 0.2714, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.9414316702819958, |
|
"grad_norm": 0.23296800107183185, |
|
"learning_rate": 2.6150457494240057e-07, |
|
"loss": 0.2414, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.943600867678959, |
|
"grad_norm": 0.2136515878728428, |
|
"learning_rate": 2.4250796657177e-07, |
|
"loss": 0.2173, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.945770065075922, |
|
"grad_norm": 0.20347321610165708, |
|
"learning_rate": 2.2422593456466468e-07, |
|
"loss": 0.2152, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.947939262472885, |
|
"grad_norm": 0.23997716902494964, |
|
"learning_rate": 2.0665874147265395e-07, |
|
"loss": 0.2305, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.9501084598698482, |
|
"grad_norm": 0.22269717072616485, |
|
"learning_rate": 1.8980663958139133e-07, |
|
"loss": 0.2665, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.9522776572668112, |
|
"grad_norm": 0.21024780049164776, |
|
"learning_rate": 1.736698709069673e-07, |
|
"loss": 0.1967, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.9544468546637743, |
|
"grad_norm": 0.2267046711412435, |
|
"learning_rate": 1.5824866719243436e-07, |
|
"loss": 0.2445, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.9566160520607374, |
|
"grad_norm": 0.22809702055070344, |
|
"learning_rate": 1.4354324990449863e-07, |
|
"loss": 0.2569, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.9587852494577005, |
|
"grad_norm": 0.23382391297633118, |
|
"learning_rate": 1.295538302303223e-07, |
|
"loss": 0.2876, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.9609544468546638, |
|
"grad_norm": 0.20472909033825798, |
|
"learning_rate": 1.1628060907449834e-07, |
|
"loss": 0.1924, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.9631236442516269, |
|
"grad_norm": 0.2096943890587732, |
|
"learning_rate": 1.0372377705616942e-07, |
|
"loss": 0.233, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.96529284164859, |
|
"grad_norm": 0.211371475954759, |
|
"learning_rate": 9.188351450626353e-08, |
|
"loss": 0.2166, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.9674620390455533, |
|
"grad_norm": 0.21471581374101012, |
|
"learning_rate": 8.075999146494595e-08, |
|
"loss": 0.2282, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.9696312364425164, |
|
"grad_norm": 0.2660219035351609, |
|
"learning_rate": 7.035336767914346e-08, |
|
"loss": 0.2735, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.9718004338394794, |
|
"grad_norm": 0.22465441666056135, |
|
"learning_rate": 6.066379260026845e-08, |
|
"loss": 0.2765, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.9739696312364425, |
|
"grad_norm": 0.22816673191661355, |
|
"learning_rate": 5.169140538207051e-08, |
|
"loss": 0.2685, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.9761388286334056, |
|
"grad_norm": 0.24062260875061986, |
|
"learning_rate": 4.343633487861598e-08, |
|
"loss": 0.2479, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.9783080260303687, |
|
"grad_norm": 0.23204079070364966, |
|
"learning_rate": 3.589869964248371e-08, |
|
"loss": 0.2544, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.9804772234273318, |
|
"grad_norm": 0.2339821898793586, |
|
"learning_rate": 2.907860792301098e-08, |
|
"loss": 0.3132, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.9826464208242949, |
|
"grad_norm": 0.21123283163235765, |
|
"learning_rate": 2.297615766477801e-08, |
|
"loss": 0.2454, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.9848156182212582, |
|
"grad_norm": 0.23481457672784686, |
|
"learning_rate": 1.7591436506170235e-08, |
|
"loss": 0.2803, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.9869848156182213, |
|
"grad_norm": 0.22666968506581125, |
|
"learning_rate": 1.2924521778151511e-08, |
|
"loss": 0.262, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.9891540130151844, |
|
"grad_norm": 0.22371033015850025, |
|
"learning_rate": 8.975480503126133e-09, |
|
"loss": 0.2253, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.9913232104121477, |
|
"grad_norm": 0.24195113149305236, |
|
"learning_rate": 5.74436939398959e-09, |
|
"loss": 0.2679, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.9934924078091107, |
|
"grad_norm": 0.22275331811796895, |
|
"learning_rate": 3.2312348533236614e-09, |
|
"loss": 0.3069, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.9956616052060738, |
|
"grad_norm": 0.2209663606112055, |
|
"learning_rate": 1.4361129727025191e-09, |
|
"loss": 0.2256, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.997830802603037, |
|
"grad_norm": 0.2356916076109314, |
|
"learning_rate": 3.590295321986847e-10, |
|
"loss": 0.2807, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.22113655920342676, |
|
"learning_rate": 0.0, |
|
"loss": 0.2328, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 922, |
|
"total_flos": 4515096717950976.0, |
|
"train_loss": 0.30544997376759503, |
|
"train_runtime": 16133.364, |
|
"train_samples_per_second": 0.228, |
|
"train_steps_per_second": 0.057 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 922, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4515096717950976.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|