Samuel J. Huskey
commited on
Commit
·
0f68ee2
1
Parent(s):
68d7b25
add: model files
Browse files- .gitignore +1 -0
- config.json +0 -0
- distilbert-finetuned-emission.csv +2 -0
- label_mapping.json +1 -0
- logs/events.out.tfevents.1734972208.93d1435969e8.4440.0 +3 -0
- logs/events.out.tfevents.1734972249.93d1435969e8.4440.1 +3 -0
- logs/events.out.tfevents.1734972335.93d1435969e8.4440.2 +3 -0
- model.safetensors +3 -0
- optimizer.pt +3 -0
- rng_state.pth +3 -0
- scheduler.pt +3 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +63 -0
- trainer_state.json +2377 -0
- training_args.bin +3 -0
- vocab.txt +0 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
*.DS_Store
|
config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
distilbert-finetuned-emission.csv
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
timestamp,project_name,run_id,experiment_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
|
2 |
+
2024-12-23T17:37:16,codecarbon,a2b8975b-512b-4158-b41f-2a00d1d6fb39,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,877.531339527,0.026065839149093644,2.970359914797282e-05,42.5,71.5115170414632,31.30389261245728,0.010351733306140969,0.03961337474623,0.0076235855749420215,0.057588693627313,United States,USA,iowa,,,Linux-6.1.85+-x86_64-with-glibc2.35,3.10.12,2.8.2,12,Intel(R) Xeon(R) CPU @ 2.20GHz,1,1 x NVIDIA A100-SXM4-40GB,-95.8517,41.2591,83.47704696655273,machine,N,1.0
|
label_mapping.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"A5349": 0, "A4246": 1, "A4448": 2, "A6040": 3, "A5015": 4, "A5397": 5, "A4767": 6, "A3060": 7, "A3156": 8, "A3711": 9, "A4034": 10, "A6088": 11, "A3690": 12, "A5701": 13, "A2491": 14, "A5671": 15, "A4190": 16, "A4664": 17, "A4733": 18, "A6081": 19, "A4696": 20, "A4510": 21, "A3041": 22, "A5598": 23, "A3146": 24, "A3831": 25, "A4372": 26, "A5429": 27, "A5418": 28, "A5422": 29, "A5425": 30, "A3346": 31, "A5432": 32, "A5420": 33, "A5428": 34, "A4199": 35, "A4180": 36, "A4207": 37, "A4220": 38, "A4178": 39, "A4210": 40, "A3528": 41, "A4243": 42, "A3469": 43, "A4196": 44, "A4188": 45, "A5416": 46, "A4197": 47, "A5424": 48, "A3825": 49, "A4752": 50, "A4480": 51, "A5431": 52, "A5415": 53, "A5332": 54, "A6146": 55, "A6188": 56, "A6212": 57, "A6223": 58, "A5973": 59, "A3966": 60, "A4632": 61, "A3836": 62, "A4391": 63, "A5473": 64, "A3829": 65, "A5984": 66, "A6000": 67, "A5551": 68, "A5419": 69, "A5414": 70, "A4200": 71, "A4643": 72, "A5609": 73, "A3919": 74, "A2941": 75, "A3880": 76, "A4398": 77, "A4259": 78, "A3153": 79, "A5066": 80, "A5423": 81, "A4493": 82, "A4204": 83, "A3309": 84, "A4327": 85, "A3087": 86, "A4514": 87, "A3117": 88, "A3089": 89, "A3093": 90, "A2883": 91, "A3016": 92, "A4538": 93, "A4487": 94, "A4739": 95, "A3721": 96, "A2927": 97, "A3693": 98, "A3983": 99, "A3540": 100, "A4724": 101, "A4288": 102, "A3031": 103, "A5505": 104, "A4484": 105, "A5430": 106, "A5034": 107, "A4519": 108, "A5417": 109, "A4673": 110, "A3442": 111, "A5669": 112, "A5014": 113, "A5421": 114, "A5443": 115, "A4466": 116, "A5470": 117, "A4337": 118, "A4476": 119, "A5497": 120, "A4038": 121, "A3434": 122, "A5504": 123, "A3042": 124, "A3675": 125, "A4109": 126, "A2920": 127, "A4098": 128, "A4548": 129, "A3214": 130, "A6001": 131, "A5195": 132, "A6295": 133, "A4914": 134, "A5447": 135, "A4352": 136, "A5444": 137, "A5451": 138, "A5435": 139, "A5439": 140, "A4486": 141, "A5101": 142, "A3397": 143, "A4646": 144, "A5442": 145, "A4577": 146, "A4458": 147, "A4551": 148, "A5986": 149, "A4410": 150, "A5678": 151, "A2755": 152, "A5558": 153, "A5445": 154, "A4385": 155, "A6108": 156, "A5478": 157, "A4159": 158, "A5044": 159, "A5446": 160, "A4280": 161, "A6291": 162, "A4755": 163, "A4087": 164, "A4375": 165, "A3702": 166, "A3447": 167, "A4310": 168, "A3431": 169, "A3408": 170, "A4485": 171, "A4494": 172, "A4132": 173, "A5499": 174, "A3840": 175, "A5450": 176, "A4973": 177, "A3010": 178, "A5010": 179, "A5474": 180, "A4295": 181, "A4464": 182, "A4116": 183, "A5590": 184, "A3356": 185, "A4084": 186, "A3394": 187, "A5709": 188, "A4497": 189, "A3120": 190, "A4120": 191, "A4078": 192, "A2940": 193, "A3241": 194, "A3001": 195, "A4172": 196, "A4151": 197, "A5434": 198, "A4473": 199, "A4267": 200, "A3245": 201, "A5599": 202, "A4139": 203, "A5438": 204, "A3465": 205, "A4234": 206, "A3179": 207, "A3056": 208, "A5449": 209, "A3626": 210, "A3667": 211, "A3277": 212, "A6294": 213, "A4329": 214, "A4775": 215, "A4413": 216, "A3291": 217, "A3343": 218, "A3218": 219, "A3196": 220, "A6233": 221, "A3229": 222, "A6118": 223, "A5441": 224, "A2992": 225, "A3323": 226, "A3370": 227, "A6154": 228, "A4166": 229, "A4156": 230, "A2967": 231, "A5433": 232, "A6175": 233, "A5437": 234, "A2989": 235, "A6079": 236, "A5440": 237, "A3002": 238, "A4336": 239, "A4051": 240, "A4467": 241, "A3791": 242, "A5347": 243, "A5448": 244, "A5055": 245, "A4901": 246, "A6129": 247, "A5458": 248, "A5452": 249, "A5597": 250, "A5436": 251, "A5058": 252, "A3100": 253, "A5103": 254, "A5462": 255, "A4183": 256, "A6109": 257, "A3194": 258, "A3987": 259, "A5571": 260, "A5457": 261, "A4170": 262, "A3325": 263, "A4452": 264, "A3949": 265, "A5454": 266, "A4126": 267, "A5466": 268, "A3904": 269, "A4691": 270, "A4346": 271, "A4388": 272, "A4229": 273, "A6196": 274, "A6123": 275, "A4441": 276, "A4239": 277, "A4101": 278, "A6176": 279, "A6135": 280, "A5604": 281, "A2978": 282, "A4613": 283, "A2966": 284, "A2965": 285, "A4017": 286, "A2979": 287, "A3837": 288, "A4237": 289, "A5461": 290, "A5465": 291, "A4110": 292, "A4142": 293, "A4580": 294, "A4912": 295, "A5469": 296, "A4099": 297, "A4011": 298, "A3843": 299, "A3335": 300, "A3677": 301, "A4088": 302, "A5047": 303, "A3630": 304, "A4511": 305, "A4529": 306, "A4669": 307, "A4228": 308, "A4209": 309, "A5453": 310, "A5460": 311, "A5456": 312, "A5464": 313, "A5468": 314, "A3005": 315, "A4505": 316, "A6034": 317, "A5509": 318, "A3129": 319, "A6242": 320, "A4185": 321, "A3188": 322, "A3571": 323, "A6282": 324, "A4020": 325, "A3375": 326, "A3965": 327, "A3399": 328, "A3319": 329, "A2911": 330, "A5472": 331, "A5586": 332, "A3105": 333, "A2975": 334, "A3024": 335, "A5608": 336, "A5724": 337, "A3372": 338, "A2977": 339, "A4027": 340, "A5728": 341, "A4015": 342, "A4163": 343, "A5455": 344, "A5697": 345, "A3915": 346, "A3879": 347, "A5459": 348, "A3685": 349, "A4155": 350, "A3417": 351, "A5685": 352, "A3522": 353, "A4543": 354, "A4795": 355, "A4523": 356, "A4533": 357, "A5463": 358, "A4502": 359, "A5467": 360, "A3854": 361, "A4757": 362, "A4661": 363, "A3604": 364, "A2868": 365, "A4997": 366, "A5471": 367, "A3315": 368, "A3161": 369, "A5477": 370, "A4700": 371, "A4137": 372, "A5531": 373, "A3801": 374, "A4123": 375, "A5225": 376, "A4133": 377, "A3548": 378, "A2984": 379, "A4471": 380, "A5476": 381, "A4021": 382, "A5479": 383, "A5481": 384, "A4373": 385, "A4729": 386, "A6048": 387, "A4002": 388, "A3512": 389, "A4029": 390, "A5361": 391, "A5355": 392, "A5367": 393, "A6054": 394, "A4039": 395, "A4012": 396, "A4508": 397, "A5136": 398, "A6148": 399, "A4641": 400, "A4105": 401, "A3943": 402, "A4742": 403, "A3090": 404, "A4540": 405, "A4670": 406, "A4677": 407, "A3566": 408, "A4581": 409, "A3433": 410, "A5500": 411, "A4520": 412, "A4778": 413, "A4982": 414, "A5648": 415, "A5507": 416, "A3568": 417, "A4687": 418, "A3381": 419, "A5570": 420, "A4658": 421, "A3393": 422, "A4667": 423, "A5351": 424, "A5366": 425, "A4530": 426, "A5385": 427, "A5370": 428, "A3684": 429, "A3637": 430, "A3674": 431, "A2964": 432, "A3625": 433, "A3255": 434, "A3492": 435, "A6256": 436, "A3422": 437, "A6068": 438, "A5954": 439, "A6105": 440, "A4129": 441, "A3310": 442, "A6114": 443, "A4718": 444, "A4539": 445, "A5568": 446, "A5364": 447, "A3881": 448, "A5971": 449, "A5356": 450, "A4622": 451, "A5362": 452, "A4650": 453, "A4944": 454, "A5684": 455, "A5501": 456, "A5495": 457, "A5373": 458, "A4054": 459, "A4322": 460, "A5353": 461, "A4583": 462, "A3048": 463, "A4936": 464, "A2891": 465, "A4609": 466, "A3665": 467, "A2963": 468, "A3141": 469, "A5314": 470, "A5358": 471, "A4114": 472, "A5550": 473, "A4082": 474, "A4447": 475, "A3063": 476, "A3131": 477, "A5346": 478, "A5352": 479, "A4408": 480, "A4128": 481, "A3262": 482, "A5365": 483, "A3756": 484, "A4711": 485, "A4387": 486, "A3768": 487, "A4629": 488, "A5689": 489, "A5363": 490, "A3352": 491, "A3557": 492, "A6177": 493, "A3009": 494, "A3254": 495, "A4019": 496, "A4314": 497, "A2897": 498, "A3142": 499, "A4647": 500, "A5614": 501, "A4703": 502, "A5498": 503, "A5506": 504, "A4384": 505, "A4108": 506, "A4124": 507, "A3774": 508, "A5714": 509, "A5128": 510, "A3892": 511, "A3359": 512, "A3003": 513, "A2973": 514, "A4118": 515, "A5595": 516, "A5336": 517, "A2982": 518, "A4734": 519, "A4193": 520, "A4102": 521, "A4097": 522, "A4386": 523, "A5666": 524, "A4111": 525, "A4202": 526, "A3971": 527, "A4257": 528, "A3806": 529, "A4321": 530, "A2962": 531, "A2950": 532, "A5726": 533, "A3301": 534, "A3757": 535, "A3349": 536, "A4080": 537, "A6117": 538, "A5350": 539, "A4146": 540, "A6140": 541, "A4680": 542, "A3186": 543, "A4517": 544, "A6178": 545, "A4134": 546, "A3565": 547, "A4247": 548, "A4745": 549, "A5357": 550, "A3258": 551, "A3298": 552, "A4215": 553, "A4058": 554, "A5000": 555, "A6179": 556, "A3803": 557, "A4546": 558, "A4268": 559, "A2961": 560, "A5607": 561, "A4077": 562, "A3049": 563, "A5360": 564, "A5993": 565, "A2934": 566, "A5354": 567, "A4112": 568, "A5496": 569, "A5348": 570, "A4005": 571, "A4708": 572, "A3792": 573, "A3780": 574, "A4736": 575, "A3921": 576, "A3206": 577, "A6180": 578, "A6181": 579, "A6182": 580, "A4311": 581, "A6041": 582, "A3039": 583, "A5486": 584, "A2968": 585, "A4343": 586, "A5394": 587, "A3537": 588, "A5392": 589, "A5377": 590, "A5485": 591, "A5494": 592, "A5490": 593, "A4370": 594, "A4431": 595, "A4217": 596, "A5491": 597, "A3802": 598, "A3340": 599, "A3257": 600, "A3928": 601, "A6183": 602, "A4063": 603, "A6110": 604, "A3523": 605, "A3514": 606, "A6134": 607, "A6065": 608, "A2969": 609, "A5380": 610, "A5493": 611, "A4177": 612, "A4089": 613, "A5484": 614, "A4255": 615, "A4095": 616, "A6005": 617, "A5396": 618, "A5487": 619, "A5488": 620, "A4400": 621, "A5399": 622, "A3305": 623, "A3996": 624, "A4075": 625, "A3190": 626, "A4103": 627, "A4227": 628, "A4674": 629, "A4353": 630, "A5172": 631, "A4158": 632, "A3151": 633, "A4763": 634, "A3900": 635, "A3279": 636, "A5180": 637, "A4049": 638, "A4263": 639, "A6133": 640, "A4059": 641, "A5611": 642, "A5581": 643, "A6029": 644, "A3071": 645, "A4094": 646, "A3545": 647, "A4642": 648, "A3789": 649, "A4379": 650, "A4208": 651, "A4526": 652, "A4348": 653, "A5690": 654, "A4053": 655, "A4074": 656, "A4071": 657, "A4079": 658, "A4666": 659, "A6185": 660, "A3275": 661, "A3515": 662, "A5718": 663, "A3260": 664, "A3036": 665, "A5045": 666, "A2935": 667, "A4056": 668, "A3014": 669, "A5675": 670, "A6009": 671, "A3203": 672, "A5381": 673, "A5398": 674, "A5400": 675, "A5386": 676, "A5391": 677, "A3023": 678, "A2983": 679, "A5713": 680, "A5376": 681, "A5631": 682, "A3083": 683, "A3032": 684, "A3282": 685, "A4695": 686, "A4092": 687, "A3719": 688, "A3292": 689, "A5936": 690, "A3764": 691, "A5939": 692, "A3351": 693, "A3287": 694, "A3712": 695, "A5652": 696, "A5407": 697, "A6026": 698, "A4704": 699, "A5554": 700, "A3294": 701, "A3912": 702, "A5475": 703, "A3758": 704, "A6186": 705, "A3278": 706, "A4064": 707, "A4248": 708, "A5411": 709, "A3213": 710, "A3770": 711, "A5387": 712, "A4090": 713, "A5980": 714, "A4096": 715, "A6038": 716, "A6049": 717, "A3776": 718, "A3853": 719, "A6059": 720, "A4143": 721, "A5389": 722, "A6189": 723, "A5404": 724, "A5374": 725, "A5406": 726, "A4076": 727, "A2959": 728, "A3327": 729, "A4656": 730, "A3641": 731, "A3098": 732, "A2888": 733, "A4683": 734, "A3563": 735, "A5656": 736, "A4304": 737, "A4730": 738, "A4414": 739, "A3585": 740, "A5600": 741, "A3402": 742, "A4050": 743, "A6023": 744, "A5410": 745, "A3891": 746, "A4715": 747, "A3011": 748, "A4975": 749, "A5401": 750, "A4536": 751, "A5584": 752, "A3387": 753, "A6043": 754, "A5613": 755, "A6116": 756, "A3312": 757, "A4621": 758, "A3589": 759, "A4904": 760, "A4615": 761, "A3624": 762, "A3593": 763, "A2915": 764, "A2910": 765, "A4671": 766, "A4633": 767, "A4638": 768, "A3616": 769, "A3581": 770, "A3256": 771, "A3191": 772, "A3554": 773, "A4665": 774, "A4312": 775, "A3715": 776, "A5294": 777, "A3577": 778, "A3634": 779, "A4612": 780, "A4644": 781, "A3582": 782, "A5405": 783, "A5576": 784, "A3629": 785, "A4256": 786, "A5409": 787, "A4619": 788, "A5260": 789, "A4801": 790, "A4557": 791, "A5537": 792, "A4799": 793, "A5998": 794, "A3102": 795, "A3019": 796, "A4544": 797, "A2993": 798, "A3058": 799, "A3623": 800, "A3193": 801, "A4652": 802, "A910": 803, "A5693": 804, "A4662": 805, "A3601": 806, "A4604": 807, "A4468": 808, "A4225": 809, "A4701": 810, "A4061": 811, "A3731": 812, "A4235": 813, "A3029": 814, "A5627": 815, "A4285": 816, "A5559": 817, "A6230": 818, "A5231": 819, "A3017": 820, "A5413": 821, "A3030": 822, "A6052": 823, "A3662": 824, "A4265": 825, "A4066": 826, "A3706": 827, "A3033": 828, "A4988": 829, "A4501": 830, "A4506": 831, "A3745": 832, "A3038": 833, "A3097": 834, "A6044": 835, "A5393": 836, "A3012": 837, "A4107": 838, "A3560": 839, "A5596": 840, "A2960": 841, "A4068": 842, "A6070": 843, "A4524": 844, "A5556": 845, "A4168": 846, "A5651": 847, "A3503": 848, "A3224": 849, "A5601": 850, "A6247": 851, "A3236": 852, "A6111": 853, "A3436": 854, "A6283": 855, "A3139": 856, "A4462": 857, "A6209": 858, "A4008": 859, "A5001": 860, "A3006": 861, "A4765": 862, "A5602": 863, "A3261": 864, "A5958": 865, "A5949": 866, "A3462": 867, "A4764": 868, "A5643": 869, "A4719": 870, "A4751": 871, "A5953": 872, "A5990": 873, "A4149": 874, "A3773": 875, "A5987": 876, "A3727": 877, "A4726": 878, "A2991": 879, "A4254": 880, "A3383": 881, "A6008": 882, "A3244": 883, "A3330": 884, "A3040": 885, "A4057": 886, "A3657": 887, "A5174": 888, "A3788": 889, "A5594": 890, "A2948": 891, "A5508": 892, "A5412": 893, "A4636": 894, "A3116": 895, "A3599": 896, "A4085": 897, "A5717": 898, "A4067": 899, "A5403": 900, "A5237": 901, "A3524": 902, "A4678": 903, "A5578": 904, "A3072": 905, "A3078": 906, "A4479": 907, "A4009": 908, "A3027": 909, "A3361": 910, "A3250": 911, "A5328": 912, "A5298": 913, "A5251": 914, "A4675": 915, "A4515": 916, "A4753": 917, "A5390": 918, "A4130": 919, "A5375": 920, "A3125": 921, "A5994": 922, "A5638": 923, "A4368": 924, "A5119": 925, "A4018": 926, "A6004": 927, "A3999": 928, "A4472": 929, "A6083": 930, "A6030": 931, "A3180": 932, "A6124": 933, "A3249": 934, "A6098": 935, "A3561": 936, "A3253": 937, "A3428": 938, "A5293": 939, "A6190": 940, "A3018": 941, "A4026": 942, "A5555": 943, "A5427": 944, "A4534": 945, "A4131": 946, "A3022": 947, "A4445": 948, "A5681": 949, "A5275": 950, "A4587": 951, "A3133": 952, "A3552": 953, "A5270": 954, "A6077": 955, "A3377": 956, "A5992": 957, "A5967": 958, "A3696": 959, "A4167": 960, "A4138": 961, "A4744": 962, "A4104": 963, "A4113": 964, "A4273": 965, "A6153": 966, "A3932": 967, "A3945": 968, "A3368": 969, "A3903": 970, "A5246": 971, "A4367": 972, "A4036": 973, "A4240": 974, "A5129": 975, "A4668": 976, "A4688": 977, "A2972": 978, "A3647": 979, "A3575": 980, "A4645": 981, "A5316": 982, "A5307": 983, "A5254": 984, "A5977": 985, "A6051": 986, "A5278": 987, "A3850": 988, "A5983": 989, "A6020": 990, "A5672": 991, "A3158": 992, "A6021": 993, "A4503": 994, "A5261": 995, "A4682": 996, "A4491": 997, "A4541": 998, "A3379": 999, "A4559": 1000, "A3813": 1001, "A3555": 1002, "A3505": 1003, "A6191": 1004, "A4521": 1005, "A3176": 1006, "A3946": 1007, "A3937": 1008, "A3907": 1009, "A4081": 1010, "A4685": 1011, "A5267": 1012, "A2896": 1013, "A5650": 1014, "A3680": 1015, "A3526": 1016, "A3603": 1017, "A3652": 1018, "A3075": 1019, "A3653": 1020, "A5563": 1021, "A5694": 1022, "A3723": 1023, "A3663": 1024, "A3638": 1025, "A3627": 1026, "A3123": 1027, "A3148": 1028, "A3539": 1029, "A3572": 1030, "A4006": 1031, "A5719": 1032, "A4316": 1033, "A5263": 1034, "A5625": 1035, "A3015": 1036, "A4287": 1037, "A4531": 1038, "A5273": 1039, "A4024": 1040, "A5633": 1041, "A5056": 1042, "A3818": 1043, "A5941": 1044, "A5285": 1045, "A4083": 1046, "A5248": 1047, "A5968": 1048, "A5281": 1049, "A5255": 1050, "A5271": 1051, "A5291": 1052, "A5277": 1053, "A5076": 1054, "A5588": 1055, "A5402": 1056, "A6084": 1057, "A3671": 1058, "A5569": 1059, "A3726": 1060, "A4611": 1061, "A4463": 1062, "A6269": 1063, "A4690": 1064, "A5279": 1065, "A5104": 1066, "A2931": 1067, "A4141": 1068, "A3632": 1069, "A5247": 1070, "A4007": 1071, "A5005": 1072, "A4684": 1073, "A3183": 1074, "A5268": 1075, "A4598": 1076, "A4203": 1077, "A4213": 1078, "A3535": 1079, "A6024": 1080, "A3619": 1081, "A3592": 1082, "A4796": 1083, "A4117": 1084, "A3648": 1085, "A3658": 1086, "A4653": 1087, "A4459": 1088, "A5700": 1089, "A5276": 1090, "A4025": 1091, "A4016": 1092, "A4046": 1093, "A3099": 1094, "A5553": 1095, "A3111": 1096, "A5310": 1097, "A5549": 1098, "A5262": 1099, "A5948": 1100, "A3952": 1101, "A4073": 1102, "A4044": 1103, "A5013": 1104, "A4567": 1105, "A3610": 1106, "A5989": 1107, "A4013": 1108, "A4022": 1109, "A4935": 1110, "A3034": 1111, "A3961": 1112, "A3013": 1113, "A5543": 1114, "A2937": 1115, "A3021": 1116, "A4003": 1117, "A5319": 1118, "A5712": 1119, "A4127": 1120, "A4509": 1121, "A6192": 1122, "A4030": 1123, "A4187": 1124, "A4602": 1125, "A3578": 1126, "A2885": 1127, "A4041": 1128, "A6193": 1129, "A5610": 1130, "A4062": 1131, "A3385": 1132, "A3374": 1133, "A4518": 1134, "A4537": 1135, "A4884": 1136, "A4241": 1137, "A3324": 1138, "A6072": 1139, "A5116": 1140, "A5344": 1141, "A2994": 1142, "A4443": 1143, "A5325": 1144, "A5020": 1145, "A3544": 1146, "A6162": 1147, "A3223": 1148, "A4093": 1149, "A3861": 1150, "A3951": 1151, "A6156": 1152, "A4001": 1153, "A4028": 1154, "A5334": 1155, "A3059": 1156, "A5289": 1157, "A4048": 1158, "A5617": 1159, "A4100": 1160, "A6045": 1161, "A4040": 1162, "A3423": 1163, "A3752": 1164, "A4206": 1165, "A4216": 1166, "A6036": 1167, "A3926": 1168, "A4712": 1169, "A3902": 1170, "A3865": 1171, "A3851": 1172, "A5199": 1173, "A3403": 1174, "A3821": 1175, "A3807": 1176, "A5978": 1177, "A3786": 1178, "A4405": 1179, "A3118": 1180, "A5304": 1181, "A4620": 1182, "A4474": 1183, "A4679": 1184, "A3150": 1185, "A4991": 1186, "A3316": 1187, "A5308": 1188, "A3288": 1189, "A3998": 1190, "A5317": 1191, "A5388": 1192, "A4242": 1193, "A4762": 1194, "A4456": 1195, "A4743": 1196, "A3386": 1197, "A5950": 1198, "A3673": 1199, "A5323": 1200, "A3114": 1201, "A6197": 1202, "A5301": 1203, "A4694": 1204, "A2924": 1205, "A5335": 1206, "A3373": 1207, "A4945": 1208, "A5311": 1209, "A5286": 1210, "A5320": 1211, "A4569": 1212, "A5329": 1213, "A3322": 1214, "A4047": 1215, "A3318": 1216, "A6145": 1217, "A3531": 1218, "A5002": 1219, "A4174": 1220, "A4781": 1221, "A6198": 1222, "A3541": 1223, "A6199": 1224, "A3772": 1225, "A4483": 1226, "A5345": 1227, "A3583": 1228, "A6085": 1229, "A6035": 1230, "A4014": 1231, "A4298": 1232, "A5721": 1233, "A4091": 1234, "A2907": 1235, "A2909": 1236, "A4637": 1237, "A3897": 1238, "A4740": 1239, "A5426": 1240, "A3553": 1241, "A5326": 1242, "A3567": 1243, "A4035": 1244, "A3212": 1245, "A4045": 1246, "A3569": 1247, "A3857": 1248, "A5395": 1249, "A3967": 1250, "A3094": 1251, "A5089": 1252, "A3238": 1253, "A3976": 1254, "A3045": 1255, "A3948": 1256, "A3899": 1257, "A6143": 1258, "A6172": 1259, "A6094": 1260, "A6159": 1261, "A5305": 1262, "A4266": 1263, "A5315": 1264, "A5337": 1265, "A5593": 1266, "A3732": 1267, "A4465": 1268, "A3973": 1269, "A5489": 1270, "A4401": 1271, "A4359": 1272, "A6080": 1273, "A5290": 1274, "A5324": 1275, "A5288": 1276, "A5333": 1277, "A5371": 1278, "A5312": 1279, "A5302": 1280, "A4440": 1281, "A4478": 1282, "A5287": 1283, "A5306": 1284, "A5330": 1285, "A5299": 1286, "A5579": 1287, "A4070": 1288, "A6215": 1289, "A4060": 1290, "A4710": 1291, "A3220": 1292, "A3471": 1293, "A4748": 1294, "A4065": 1295, "A4069": 1296, "A4072": 1297, "A6200": 1298, "A3333": 1299, "A5321": 1300, "A4623": 1301, "A3389": 1302, "A6127": 1303, "A3920": 1304, "A4846": 1305, "A5283": 1306, "A4162": 1307, "A4043": 1308, "A4032": 1309, "A4342": 1310, "A4461": 1311, "A4488": 1312, "A6121": 1313, "A5944": 1314, "A4490": 1315, "A5318": 1316, "A5327": 1317, "A5148": 1318, "A3350": 1319, "A6268": 1320, "A3208": 1321, "A4470": 1322, "A3070": 1323, "A3549": 1324, "A5655": 1325, "A5303": 1326, "A6147": 1327, "A4631": 1328, "A3519": 1329, "A5313": 1330, "A6010": 1331, "A4735": 1332, "A3890": 1333, "A5991": 1334, "A5095": 1335, "A3930": 1336, "A3562": 1337, "A3670": 1338, "A5322": 1339, "A4186": 1340, "A5331": 1341, "A5300": 1342, "A4728": 1343, "A4251": 1344, "A5230": 1345, "A5309": 1346, "A5284": 1347, "A3178": 1348, "A3269": 1349, "A6071": 1350, "A6055": 1351, "A6224": 1352, "A3476": 1353, "A3297": 1354, "A6206": 1355, "A3341": 1356, "A4593": 1357, "A4555": 1358, "A4826": 1359, "A3994": 1360, "A3558": 1361, "A3128": 1362, "A4946": 1363, "A4955": 1364, "A4292": 1365, "A3797": 1366, "A4444": 1367, "A4429": 1368, "A3532": 1369, "A5383": 1370, "A3088": 1371, "A3082": 1372, "A4432": 1373, "A4561": 1374, "A5369": 1375, "A4442": 1376, "A5341": 1377, "A4436": 1378, "A4169": 1379, "A5382": 1380, "A5502": 1381, "A5368": 1382, "A5603": 1383, "A5359": 1384, "A4415": 1385, "A5343": 1386, "A5340": 1387, "A5384": 1388, "A5379": 1389, "A4439": 1390, "A5096": 1391, "A2949": 1392, "A5372": 1393, "A5339": 1394, "A3538": 1395, "A5295": 1396, "A4424": 1397, "A5342": 1398, "A5274": 1399, "A5264": 1400, "A4717": 1401, "A5252": 1402, "A5167": 1403, "A6136": 1404, "A6201": 1405, "A4693": 1406, "A4245": 1407, "A5219": 1408, "A4608": 1409, "A4635": 1410, "A4434": 1411, "A4738": 1412, "A4033": 1413, "A5982": 1414, "A3295": 1415, "A5999": 1416, "A5511": 1417, "A4699": 1418, "A4231": 1419, "A3769": 1420, "A3411": 1421, "A4412": 1422, "A5296": 1423, "A4135": 1424, "A5243": 1425, "A5265": 1426, "A5573": 1427, "A2970": 1428, "A5482": 1429, "A4421": 1430, "A3064": 1431, "A3445": 1432, "A4883": 1433, "A4554": 1434, "A3300": 1435, "A6266": 1436, "A4449": 1437, "A5292": 1438, "A5249": 1439, "A5282": 1440, "A3109": 1441, "A3621": 1442, "A2903": 1443, "A6087": 1444, "A3273": 1445, "A4547": 1446, "A5210": 1447, "A3953": 1448, "A5272": 1449, "A2944": 1450, "A3707": 1451, "A5734": 1452, "A4628": 1453, "A4275": 1454, "A2879": 1455, "A4119": 1456, "A4355": 1457, "A3198": 1458, "A555": 1459, "A3020": 1460, "A2943": 1461, "A2923": 1462, "A3037": 1463, "A5710": 1464, "A3211": 1465, "A3923": 1466, "A5524": 1467, "A3357": 1468, "A5628": 1469, "A4331": 1470, "A3559": 1471, "A4806": 1472, "A4876": 1473, "A4807": 1474, "A6202": 1475, "A3968": 1476, "A4433": 1477, "A3137": 1478, "A5256": 1479, "A3192": 1480, "A4446": 1481, "A4323": 1482, "A4965": 1483, "A4773": 1484, "A4249": 1485, "A4232": 1486, "A4794": 1487, "A2990": 1488, "A3234": 1489, "A4746": 1490, "A3737": 1491, "A5946": 1492, "A5280": 1493, "A4010": 1494, "A5245": 1495, "A4325": 1496, "A5269": 1497, "A3106": 1498, "A4409": 1499, "A4418": 1500, "A4905": 1501, "A4872": 1502, "A6203": 1503, "A6057": 1504, "A3050": 1505, "A2942": 1506, "A3958": 1507, "A3170": 1508, "A4345": 1509, "A3728": 1510, "A5626": 1511, "A3997": 1512, "A3743": 1513, "A5979": 1514, "A4856": 1515, "A4430": 1516, "A3430": 1517, "A3065": 1518, "A3266": 1519, "A4152": 1520, "A6204": 1521, "A5606": 1522, "A5729": 1523, "A4289": 1524, "A5723": 1525, "A3893": 1526, "A5661": 1527, "A3812": 1528, "A2986": 1529, "A2921": 1530, "A3267": 1531, "A6137": 1532, "A4768": 1533, "A3841": 1534, "A3694": 1535, "A5646": 1536, "A4140": 1537, "A4125": 1538, "A3342": 1539, "A3955": 1540, "A3345": 1541, "A6046": 1542, "A5975": 1543, "A4181": 1544, "A6007": 1545, "A6293": 1546, "A3754": 1547, "A3766": 1548, "A3225": 1549, "A3429": 1550, "A5253": 1551, "A3332": 1552, "A6013": 1553, "A2495": 1554, "A3155": 1555, "A3520": 1556, "A3200": 1557, "A3516": 1558, "A6107": 1559, "A3264": 1560, "A3444": 1561, "A3977": 1562, "A3975": 1563, "A5512": 1564, "A4722": 1565, "A5266": 1566, "A6132": 1567, "A3173": 1568, "A6025": 1569, "A5033": 1570, "A6205": 1571, "A6130": 1572, "A5297": 1573, "A3320": 1574, "A5956": 1575, "A3546": 1576, "A3500": 1577, "A6074": 1578, "A3226": 1579, "A3484": 1580, "A3412": 1581, "A3441": 1582, "A6104": 1583, "A6296": 1584, "A6263": 1585, "A6267": 1586, "A6275": 1587, "A3472": 1588, "A3449": 1589, "A4422": 1590, "A3959": 1591, "A6144": 1592, "A3416": 1593, "A5962": 1594, "A3274": 1595, "A3735": 1596, "A6120": 1597, "A5244": 1598, "A4597": 1599, "A4605": 1600, "A6091": 1601, "A4427": 1602, "A5250": 1603, "A5510": 1604, "A5228": 1605, "A4923": 1606, "A5142": 1607, "A2930": 1608, "A3209": 1609, "A4648": 1610, "A4898": 1611, "A6115": 1612, "A6211": 1613, "A6039": 1614, "A6157": 1615, "A5961": 1616, "A5963": 1617, "A2933": 1618, "A3435": 1619, "A5632": 1620, "A3974": 1621, "A2908": 1622, "A4626": 1623, "A5670": 1624, "A3454": 1625, "A4383": 1626, "A3596": 1627, "A4586": 1628, "A3458": 1629, "A2887": 1630, "A3660": 1631, "A5223": 1632, "A4542": 1633, "A3147": 1634, "A5205": 1635, "A2976": 1636, "A4023": 1637, "A5214": 1638, "A5190": 1639, "A4000": 1640, "A5212": 1641, "A4654": 1642, "A4716": 1643, "A4419": 1644, "A5695": 1645, "A5679": 1646, "A4816": 1647, "A3609": 1648, "A3602": 1649, "A5574": 1650, "A4402": 1651, "A4818": 1652, "A4420": 1653, "A4860": 1654, "A3073": 1655, "A3363": 1656, "A4890": 1657, "A3353": 1658, "A3954": 1659, "A3761": 1660, "A4226": 1661, "A3963": 1662, "A6017": 1663, "A5232": 1664, "A4455": 1665, "A3121": 1666, "A2947": 1667, "A5240": 1668, "A4851": 1669, "A3590": 1670, "A3424": 1671, "A3272": 1672, "A5211": 1673, "A5238": 1674, "A5722": 1675, "A4603": 1676, "A3242": 1677, "A5660": 1678, "A5220": 1679, "A3130": 1680, "A6073": 1681, "A6278": 1682, "A3490": 1683, "A6254": 1684, "A6216": 1685, "A5644": 1686, "A6075": 1687, "A3846": 1688, "A3259": 1689, "A3215": 1690, "A4004": 1691, "A3832": 1692, "A3280": 1693, "A3306": 1694, "A6207": 1695, "A3152": 1696, "A5257": 1697, "A5995": 1698, "A4031": 1699, "A5191": 1700, "A4492": 1701, "A4469": 1702, "A3979": 1703, "A5226": 1704, "A5233": 1705, "A2956": 1706, "A2996": 1707, "A5229": 1708, "A3771": 1709, "A5217": 1710, "A5208": 1711, "A4378": 1712, "A5193": 1713, "A5733": 1714, "A5235": 1715, "A3738": 1716, "A4404": 1717, "A4042": 1718, "A3980": 1719, "A5224": 1720, "A5150": 1721, "A3992": 1722, "A3863": 1723, "A3759": 1724, "A3969": 1725, "A3991": 1726, "A4477": 1727, "A3956": 1728, "A5241": 1729, "A3984": 1730, "A3988": 1731, "A5236": 1732, "A3990": 1733, "A5951": 1734, "A6208": 1735, "A4171": 1736, "A5206": 1737, "A5972": 1738, "A6163": 1739, "A5215": 1740, "A4189": 1741, "A4727": 1742, "A4313": 1743, "A2952": 1744, "A3878": 1745, "A4037": 1746, "A6050": 1747, "A3007": 1748, "A3303": 1749, "A3666": 1750, "A4306": 1751, "A2494": 1752, "A3748": 1753, "A4758": 1754, "A5184": 1755, "A3710": 1756, "A3380": 1757, "A4749": 1758, "A3869": 1759, "A6047": 1760, "A2998": 1761, "A5152": 1762, "A2932": 1763, "A3978": 1764, "A3404": 1765, "A3950": 1766, "A3008": 1767, "A2954": 1768, "A4214": 1769, "A3779": 1770, "A4916": 1771, "A6168": 1772, "A3597": 1773, "A3960": 1774, "A6210": 1775, "A4634": 1776, "A5720": 1777, "A3669": 1778, "A4692": 1779, "A5227": 1780, "A3687": 1781, "A5188": 1782, "A5259": 1783, "A3221": 1784, "A3362": 1785, "A5239": 1786, "A5692": 1787, "A3366": 1788, "A5203": 1789, "A5209": 1790, "A6031": 1791, "A3344": 1792, "A5665": 1793, "A6066": 1794, "A4318": 1795, "A3154": 1796, "A3533": 1797, "A3529": 1798, "A4624": 1799, "A6032": 1800, "A3579": 1801, "A4617": 1802, "A2958": 1803, "A3985": 1804, "A4376": 1805, "A4981": 1806, "A4320": 1807, "A5258": 1808, "A5218": 1809, "A4896": 1810, "A4698": 1811, "A4877": 1812, "A3414": 1813, "A3957": 1814, "A2999": 1815, "A4425": 1816, "A3986": 1817, "A3314": 1818, "A3995": 1819, "A6062": 1820, "A3972": 1821, "A4411": 1822, "A6060": 1823, "A5940": 1824, "A5996": 1825, "A4840": 1826, "A3140": 1827, "A4835": 1828, "A3982": 1829, "A4953": 1830, "A4354": 1831, "A4847": 1832, "A4454": 1833, "A4839": 1834, "A5207": 1835, "A5194": 1836, "A4861": 1837, "A4435": 1838, "A5216": 1839, "A4854": 1840, "A4475": 1841, "A5575": 1842, "A5542": 1843, "A4844": 1844, "A4689": 1845, "A4837": 1846, "A5539": 1847, "A5572": 1848, "A4858": 1849, "A4164": 1850, "A4714": 1851, "A4296": 1852, "A3898": 1853, "A5234": 1854, "A5242": 1855, "A4260": 1856, "A5959": 1857, "A5204": 1858, "A3354": 1859, "A5970": 1860, "A6238": 1861, "A3304": 1862, "A6243": 1863, "A5659": 1864, "A3177": 1865, "A3334": 1866, "A3227": 1867, "A4723": 1868, "A3935": 1869, "A5213": 1870, "A5189": 1871, "A6213": 1872, "A5658": 1873, "A4495": 1874, "A3931": 1875, "A3964": 1876, "A3251": 1877, "A4236": 1878, "A5619": 1879, "A3230": 1880, "A3938": 1881, "A3406": 1882, "A5155": 1883, "A6214": 1884, "A3210": 1885, "A3096": 1886, "A6101": 1887, "A3970": 1888, "A3143": 1889, "A3835": 1890, "A5139": 1891, "A3480": 1892, "A4271": 1893, "A5109": 1894, "A6277": 1895, "A3509": 1896, "A6016": 1897, "A6086": 1898, "A6249": 1899, "A3172": 1900, "A5952": 1901, "A5117": 1902, "A6290": 1903, "A5937": 1904, "A5097": 1905, "A3838": 1906, "A5138": 1907, "A4987": 1908, "A3499": 1909, "A5126": 1910, "A5108": 1911, "A3606": 1912, "A4145": 1913, "A4393": 1914, "A3913": 1915, "A3338": 1916, "A5985": 1917, "A3311": 1918, "A3917": 1919, "A5966": 1920, "A3947": 1921, "A4332": 1922, "A3944": 1923, "A5957": 1924, "A5676": 1925, "A3908": 1926, "A4741": 1927, "A3922": 1928, "A3914": 1929, "A3184": 1930, "A3506": 1931, "A6106": 1932, "A5143": 1933, "A6217": 1934, "A6218": 1935, "A3691": 1936, "A5111": 1937, "A3169": 1938, "A5106": 1939, "A3905": 1940, "A5093": 1941, "A5636": 1942, "A2939": 1943, "A5134": 1944, "A5140": 1945, "A5110": 1946, "A3939": 1947, "A6285": 1948, "A6234": 1949, "A3233": 1950, "A6260": 1951, "A6270": 1952, "A3488": 1953, "A3911": 1954, "A5620": 1955, "A4382": 1956, "A4326": 1957, "A5115": 1958, "A4838": 1959, "A1868": 1960, "A3478": 1961, "A5519": 1962, "A5657": 1963, "A5166": 1964, "A3814": 1965, "A4317": 1966, "A3487": 1967, "A4606": 1968, "A5124": 1969, "A5137": 1970, "A4481": 1971, "A5639": 1972, "A5107": 1973, "A4361": 1974, "A5112": 1975, "A6222": 1976, "A5094": 1977, "A5135": 1978, "A3489": 1979, "A3243": 1980, "A4195": 1981, "A3497": 1982, "A6155": 1983, "A4175": 1984, "A3237": 1985, "A5141": 1986, "A4697": 1987, "A3698": 1988, "A3276": 1989, "A5105": 1990, "A4283": 1991, "A5147": 1992, "A3901": 1993, "A5144": 1994, "A3479": 1995, "A3493": 1996, "A3205": 1997, "A5196": 1998, "A3848": 1999, "A3614": 2000, "A3910": 2001, "A3051": 2002, "A5526": 2003, "A5564": 2004, "A3733": 2005, "A4381": 2006, "A4943": 2007, "A5071": 2008, "A5200": 2009, "A5165": 2010, "A6225": 2011, "A3941": 2012, "A5378": 2013, "A5587": 2014, "A3895": 2015, "A3866": 2016, "A5153": 2017, "A3858": 2018, "A3856": 2019, "A5146": 2020, "A5492": 2021, "A4964": 2022, "A5171": 2023, "A5164": 2024, "A6113": 2025, "A5960": 2026, "A3883": 2027, "A6245": 2028, "A6264": 2029, "A3804": 2030, "A5169": 2031, "A5582": 2032, "A5145": 2033, "A3859": 2034, "A3364": 2035, "A3747": 2036, "A5654": 2037, "A3924": 2038, "A4396": 2039, "A3716": 2040, "A4086": 2041, "A3936": 2042, "A3844": 2043, "A4732": 2044, "A3828": 2045, "A5674": 2046, "A5704": 2047, "A5945": 2048, "A6229": 2049, "A4394": 2050, "A2508": 2051, "A3884": 2052, "A3396": 2053, "A6166": 2054, "A3360": 2055, "A5641": 2056, "A5513": 2057, "A4649": 2058, "A4528": 2059, "A3587": 2060, "A6226": 2061, "A3871": 2062, "A5197": 2063, "A3388": 2064, "A5667": 2065, "A4160": 2066, "A5160": 2067, "A4705": 2068, "A3888": 2069, "A5156": 2070, "A4721": 2071, "A5629": 2072, "A4198": 2073, "A4772": 2074, "A5162": 2075, "A4709": 2076, "A6150": 2077, "A4153": 2078, "A4148": 2079, "A3860": 2080, "A4377": 2081, "A6019": 2082, "A4771": 2083, "A4759": 2084, "A5546": 2085, "A3132": 2086, "A5185": 2087, "A3787": 2088, "A3688": 2089, "A4356": 2090, "A4309": 2091, "A5591": 2092, "A3889": 2093, "A3882": 2094, "A5547": 2095, "A5178": 2096, "A5176": 2097, "A5623": 2098, "A4366": 2099, "A3730": 2100, "A5177": 2101, "A5151": 2102, "A3199": 2103, "A5168": 2104, "A4276": 2105, "A5548": 2106, "A5585": 2107, "A3809": 2108, "A4218": 2109, "A3827": 2110, "A5149": 2111, "A4423": 2112, "A4222": 2113, "A5202": 2114, "A3720": 2115, "A4426": 2116, "A5222": 2117, "A5527": 2118, "A5159": 2119, "A4150": 2120, "A5041": 2121, "A4720": 2122, "A5042": 2123, "A5981": 2124, "A5027": 2125, "A3708": 2126, "A4122": 2127, "A3826": 2128, "A6027": 2129, "A4713": 2130, "A6022": 2131, "A3195": 2132, "A4882": 2133, "A5183": 2134, "A2953": 2135, "A5161": 2136, "A4392": 2137, "A4395": 2138, "A5009": 2139, "A4967": 2140, "A5157": 2141, "A5040": 2142, "A5730": 2143, "A5181": 2144, "A3498": 2145, "A4374": 2146, "A2902": 2147, "A3507": 2148, "A5201": 2149, "A5187": 2150, "A5589": 2151, "A5173": 2152, "A5179": 2153, "A5154": 2154, "A3570": 2155, "A3174": 2156, "A3501": 2157, "A3849": 2158, "A6158": 2159, "A3321": 2160, "A4369": 2161, "A6227": 2162, "A3405": 2163, "A6171": 2164, "A5630": 2165, "A3494": 2166, "A3376": 2167, "A6228": 2168, "A3289": 2169, "A6122": 2170, "A3800": 2171, "A6149": 2172, "A3817": 2173, "A3222": 2174, "A5170": 2175, "A3483": 2176, "A3157": 2177, "A5182": 2178, "A3672": 2179, "A3744": 2180, "A3681": 2181, "A3676": 2182, "A3713": 2183, "A3933": 2184, "A6015": 2185, "A3432": 2186, "A6288": 2187, "A3746": 2188, "A3518": 2189, "A6053": 2190, "A6064": 2191, "A4450": 2192, "A4307": 2193, "A3790": 2194, "A4365": 2195, "A3739": 2196, "A4147": 2197, "A6160": 2198, "A5624": 2199, "A3392": 2200, "A6028": 2201, "A5186": 2202, "A3742": 2203, "A3810": 2204, "A3824": 2205, "A6151": 2206, "A4194": 2207, "A3149": 2208, "A6063": 2209, "A5947": 2210, "A6076": 2211, "A6058": 2212, "A6250": 2213, "A3409": 2214, "A2493": 2215, "A3197": 2216, "A3816": 2217, "A6231": 2218, "A3104": 2219, "A6069": 2220, "A5942": 2221, "A6018": 2222, "A3689": 2223, "A6287": 2224, "A3722": 2225, "A5706": 2226, "A3799": 2227, "A3683": 2228, "A3741": 2229, "A3113": 2230, "A3750": 2231, "A6096": 2232, "A3705": 2233, "A3704": 2234, "A5616": 2235, "A3736": 2236, "A3703": 2237, "A5090": 2238, "A3697": 2239, "A3682": 2240, "A5731": 2241, "A3686": 2242, "A6014": 2243, "A5969": 2244, "A6280": 2245, "A1870": 2246, "A5158": 2247, "A3463": 2248, "A3725": 2249, "A6139": 2250, "A6258": 2251, "A3668": 2252, "A3709": 2253, "A3296": 2254, "A3751": 2255, "A3358": 2256, "A3692": 2257, "A3701": 2258, "A3717": 2259, "A3798": 2260, "A6061": 2261, "A5634": 2262, "A5221": 2263, "A3934": 2264, "A6089": 2265, "A3927": 2266, "A3781": 2267, "A3283": 2268, "A6184": 2269, "A6152": 2270, "A3355": 2271, "A3339": 2272, "A3317": 2273, "A2870": 2274, "A3534": 2275, "A6093": 2276, "A6240": 2277, "A3438": 2278, "A6272": 2279, "A6078": 2280, "A3504": 2281, "A6090": 2282, "A5662": 2283, "A3271": 2284, "A3510": 2285, "A3421": 2286, "A6092": 2287, "A3248": 2288, "A6281": 2289, "A2492": 2290, "A3348": 2291, "A6292": 2292, "A3270": 2293, "A6164": 2294, "A6232": 2295, "A4319": 2296, "A3369": 2297, "A3398": 2298, "A4106": 2299, "A3870": 2300, "A3347": 2301, "A3252": 2302, "A3207": 2303, "A3328": 2304, "A5647": 2305, "A6125": 2306, "A3268": 2307, "A6236": 2308, "A6102": 2309, "A3265": 2310, "A6235": 2311, "A6239": 2312, "A5642": 2313, "A6138": 2314, "A3495": 2315, "A3246": 2316, "A6241": 2317, "A5545": 2318, "A4579": 2319, "A3576": 2320, "A3622": 2321, "A3591": 2322, "A3138": 2323, "A4595": 2324, "A2904": 2325, "A3940": 2326, "A6244": 2327, "A3699": 2328, "A3595": 2329, "A3654": 2330, "A3336": 2331, "A5102": 2332, "A3337": 2333, "A4599": 2334, "A6103": 2335, "A3187": 2336, "A3815": 2337, "A3613": 2338, "A4627": 2339, "A4397": 2340, "A5091": 2341, "A5082": 2342, "A4496": 2343, "A5072": 2344, "A5062": 2345, "A3942": 2346, "A5053": 2347, "A4731": 2348, "A4880": 2349, "A4438": 2350, "A4625": 2351, "A3299": 2352, "A5562": 2353, "A5080": 2354, "A6067": 2355, "A4570": 2356, "A3326": 2357, "A3313": 2358, "A3918": 2359, "A5051": 2360, "A5070": 2361, "A5618": 2362, "A3925": 2363, "A5060": 2364, "A3286": 2365, "A4390": 2366, "A5615": 2367, "A4380": 2368, "A5649": 2369, "A3426": 2370, "A3543": 2371, "A2872": 2372, "A2987": 2373, "A5067": 2374, "A4297": 2375, "A4676": 2376, "A5086": 2377, "A5077": 2378, "A5048": 2379, "A4686": 2380, "A5057": 2381, "A6095": 2382, "A4428": 2383, "A5702": 2384, "A2913": 2385, "A3580": 2386, "A5092": 2387, "A3929": 2388, "A5073": 2389, "A5083": 2390, "A4360": 2391, "A5054": 2392, "A4406": 2393, "A5063": 2394, "A4639": 2395, "A2890": 2396, "A3909": 2397, "A3290": 2398, "A3055": 2399, "A3365": 2400, "A5087": 2401, "A5964": 2402, "A5068": 2403, "A4979": 2404, "A4371": 2405, "A4362": 2406, "A3834": 2407, "A6161": 2408, "A3281": 2409, "A3057": 2410, "A4994": 2411, "A4513": 2412, "A4747": 2413, "A5046": 2414, "A5074": 2415, "A4389": 2416, "A3811": 2417, "A5075": 2418, "A2878": 2419, "A5078": 2420, "A3126": 2421, "A3448": 2422, "A5049": 2423, "A4618": 2424, "A4655": 2425, "A4920": 2426, "A4560": 2427, "A3457": 2428, "A5691": 2429, "A3633": 2430, "A3588": 2431, "A5682": 2432, "A4564": 2433, "A4571": 2434, "A2901": 2435, "A5583": 2436, "A2892": 2437, "A3656": 2438, "A3108": 2439, "A4610": 2440, "A5084": 2441, "A4340": 2442, "A5988": 2443, "A3391": 2444, "A3461": 2445, "A5064": 2446, "A4737": 2447, "A4489": 2448, "A4600": 2449, "A5560": 2450, "A4756": 2451, "A4614": 2452, "A3906": 2453, "A4363": 2454, "A2871": 2455, "A3607": 2456, "A4594": 2457, "A4279": 2458, "A4294": 2459, "A4303": 2460, "A5566": 2461, "A5031": 2462, "A3833": 2463, "A3729": 2464, "A2919": 2465, "A4750": 2466, "A4918": 2467, "A5192": 2468, "A5673": 2469, "A5081": 2470, "A6042": 2471, "A3390": 2472, "A5120": 2473, "A5098": 2474, "A2957": 2475, "A5534": 2476, "A5052": 2477, "A3855": 2478, "A5061": 2479, "A5088": 2480, "A5565": 2481, "A3664": 2482, "A6246": 2483, "A3427": 2484, "A3887": 2485, "A5069": 2486, "A5079": 2487, "A3873": 2488, "A5065": 2489, "A5050": 2490, "A5059": 2491, "A3783": 2492, "A5085": 2493, "A4349": 2494, "A5133": 2495, "A3875": 2496, "A5125": 2497, "A5121": 2498, "A5099": 2499, "A5557": 2500, "A3331": 2501, "A3171": 2502, "A3134": 2503, "A4437": 2504, "A3586": 2505, "A3496": 2506, "A3401": 2507, "A2929": 2508, "A4504": 2509, "A5163": 2510, "A3407": 2511, "A5118": 2512, "A4211": 2513, "A3162": 2514, "A5122": 2515, "A5100": 2516, "A5127": 2517, "A5132": 2518, "A3885": 2519, "A2997": 2520, "A4300": 2521, "A4364": 2522, "A6265": 2523, "A4299": 2524, "A3228": 2525, "A4115": 2526, "A4350": 2527, "A5552": 2528, "A3872": 2529, "A4121": 2530, "A5707": 2531, "A4403": 2532, "A5131": 2533, "A4351": 2534, "A2985": 2535, "A2877": 2536, "A3418": 2537, "A4253": 2538, "A3068": 2539, "A3202": 2540, "A4335": 2541, "A5113": 2542, "A5577": 2543, "A5130": 2544, "A6248": 2545, "A4607": 2546, "A4707": 2547, "A4993": 2548, "A3124": 2549, "A2895": 2550, "A3864": 2551, "A5653": 2552, "A4277": 2553, "A3556": 2554, "A3527": 2555, "A4972": 2556, "A5677": 2557, "A2874": 2558, "A6169": 2559, "A6252": 2560, "A5480": 2561, "A4173": 2562, "A3794": 2563, "A5017": 2564, "A3796": 2565, "A4903": 2566, "A4984": 2567, "A5123": 2568, "A2928": 2569, "A2905": 2570, "A3425": 2571, "A4990": 2572, "A3004": 2573, "A3263": 2574, "A2974": 2575, "A5516": 2576, "A4926": 2577, "A6253": 2578, "A5018": 2579, "A4985": 2580, "A3886": 2581, "A4725": 2582, "A5514": 2583, "A6100": 2584, "A3239": 2585, "A3307": 2586, "A3695": 2587, "A4308": 2588, "A3640": 2589, "A6255": 2590, "A3439": 2591, "A4566": 2592, "A4291": 2593, "A6142": 2594, "A4575": 2595, "A3451": 2596, "A3867": 2597, "A5523": 2598, "A4453": 2599, "A3896": 2600, "A6003": 2601, "A5022": 2602, "A4347": 2603, "A4500": 2604, "A3598": 2605, "A2906": 2606, "A5043": 2607, "A3204": 2608, "A5037": 2609, "A5520": 2610, "A4663": 2611, "A3308": 2612, "A4176": 2613, "A4223": 2614, "A3830": 2615, "A3839": 2616, "A5699": 2617, "A2181": 2618, "A4219": 2619, "A5528": 2620, "A4558": 2621, "A6257": 2622, "A5028": 2623, "A5011": 2624, "A3284": 2625, "A2882": 2626, "A5035": 2627, "A5025": 2628, "A5525": 2629, "A5007": 2630, "A5032": 2631, "A4549": 2632, "A3521": 2633, "A3877": 2634, "A5175": 2635, "A5727": 2636, "A4212": 2637, "A5021": 2638, "A5023": 2639, "A5521": 2640, "A4416": 2641, "A4221": 2642, "A4184": 2643, "A5038": 2644, "A4902": 2645, "A5518": 2646, "A5529": 2647, "A3502": 2648, "A5029": 2649, "A5012": 2650, "A5036": 2651, "A5517": 2652, "A4457": 2653, "A5026": 2654, "A5008": 2655, "A3437": 2656, "A6112": 2657, "A3185": 2658, "A3466": 2659, "A6131": 2660, "A5938": 2661, "A5522": 2662, "A6259": 2663, "A3615": 2664, "A3644": 2665, "A3635": 2666, "A4793": 2667, "A4535": 2668, "A2886": 2669, "A3874": 2670, "A5024": 2671, "A5006": 2672, "A3651": 2673, "A4573": 2674, "A5533": 2675, "A5039": 2676, "A3643": 2677, "A4315": 2678, "A5592": 2679, "A5732": 2680, "A4966": 2681, "A3069": 2682, "A4330": 2683, "A3862": 2684, "A4888": 2685, "A5530": 2686, "A3525": 2687, "A4286": 2688, "A5635": 2689, "A2876": 2690, "A3110": 2691, "A3547": 2692, "A5030": 2693, "A4897": 2694, "A4873": 2695, "A4989": 2696, "A4894": 2697, "A3486": 2698, "A3419": 2699, "A3459": 2700, "A4971": 2701, "A4499": 2702, "A5016": 2703, "A4293": 2704, "A4871": 2705, "A4278": 2706, "A5580": 2707, "A3650": 2708, "A3822": 2709, "A3201": 2710, "A3962": 2711, "A4154": 2712, "A4986": 2713, "A3446": 2714, "A4881": 2715, "A4869": 2716, "A4969": 2717, "A3511": 2718, "A4909": 2719, "A4892": 2720, "A3714": 2721, "A5612": 2722, "A4968": 2723, "A4192": 2724, "A4672": 2725, "A557": 2726, "A4992": 2727, "A5019": 2728, "A4601": 2729, "A4995": 2730, "A4974": 2731, "A4874": 2732, "A6261": 2733, "A4886": 2734, "A2988": 2735, "A3847": 2736, "A3753": 2737, "A3144": 2738, "A3819": 2739, "A5997": 2740, "A6056": 2741, "A3618": 2742, "A4911": 2743, "A6006": 2744, "A4302": 2745, "A6002": 2746, "A6033": 2747, "A4776": 2748, "A5640": 2749, "A3820": 2750, "A4407": 2751, "A4895": 2752, "A5716": 2753, "A3491": 2754, "A4258": 2755, "A4970": 2756, "A4333": 2757, "A4344": 2758, "A3808": 2759, "A3762": 2760, "A3852": 2761, "A4284": 2762, "A4591": 2763, "A3112": 2764, "A3420": 2765, "A3460": 2766, "A4562": 2767, "A4290": 2768, "A3631": 2769, "A3678": 2770, "A4893": 2771, "A4910": 2772, "A2926": 2773, "A5664": 2774, "A4783": 2775, "A4792": 2776, "A4828": 2777, "A4811": 2778, "A4809": 2779, "A4787": 2780, "A4831": 2781, "A4842": 2782, "A4866": 2783, "A5976": 2784, "A4552": 2785, "A3136": 2786, "A5535": 2787, "A2918": 2788, "A4578": 2789, "A4887": 2790, "A6170": 2791, "A4875": 2792, "A4498": 2793, "A4261": 2794, "A2917": 2795, "A4588": 2796, "A3329": 2797, "A2889": 2798, "A4889": 2799, "A4899": 2800, "A5705": 2801, "A5532": 2802, "A3079": 2803, "A4907": 2804, "A4451": 2805, "A4582": 2806, "A2995": 2807, "A4859": 2808, "A4830": 2809, "A4915": 2810, "A3611": 2811, "A5567": 2812, "A3655": 2813, "A2894": 2814, "A4754": 2815, "A5711": 2816, "A3646": 2817, "A3805": 2818, "A4823": 2819, "A3455": 2820, "A2875": 2821, "A4238": 2822, "A4516": 2823, "A4760": 2824, "A3443": 2825, "A3395": 2826, "A3659": 2827, "A4879": 2828, "A4766": 2829, "A4301": 2830, "A4681": 2831, "A4954": 2832, "A4924": 2833, "A4933": 2834, "A3371": 2835, "A3981": 2836, "A4941": 2837, "A4932": 2838, "A3823": 2839, "A4951": 2840, "A4921": 2841, "A4931": 2842, "A3000": 2843, "A3081": 2844, "A4269": 2845, "A2884": 2846, "A4940": 2847, "A3219": 2848, "A4949": 2849, "A5965": 2850, "A6097": 2851, "A4959": 2852, "A4770": 2853, "A4305": 2854, "A4929": 2855, "A3734": 2856, "A4358": 2857, "A4938": 2858, "A4334": 2859, "A2938": 2860, "A3456": 2861, "A3763": 2862, "A3168": 2863, "A4919": 2864, "A4947": 2865, "A4660": 2866, "A4328": 2867, "A6037": 2868, "A4201": 2869, "A4956": 2870, "A5680": 2871, "A3876": 2872, "A3175": 2873, "A3285": 2874, "A6011": 2875, "A4460": 2876, "A3302": 2877, "A3452": 2878, "A5974": 2879, "A3517": 2880, "A3122": 2881, "A3845": 2882, "A3470": 2883, "A4913": 2884, "A4976": 2885, "A4761": 2886, "A4774": 2887, "A3095": 2888, "A4568": 2889, "A4576": 2890, "A3189": 2891, "A3367": 2892, "A3453": 2893, "A2916": 2894, "A4182": 2895, "A4592": 2896, "A4822": 2897, "A5408": 2898, "A4244": 2899, "A4785": 2900, "A4797": 2901, "A4868": 2902, "A4865": 2903, "A4878": 2904, "A4867": 2905, "A4357": 2906, "A4937": 2907, "A4779": 2908, "A4789": 2909, "A4950": 2910, "A4960": 2911, "A3101": 2912, "A3989": 2913, "A4998": 2914, "A3067": 2915, "A4584": 2916, "A2873": 2917, "A4930": 2918, "A3536": 2919, "A3617": 2920, "A4565": 2921, "A4574": 2922, "A6273": 2923, "A6274": 2924, "A6276": 2925, "A5663": 2926, "A3440": 2927, "A3232": 2928, "A6012": 2929, "A6173": 2930, "A6279": 2931, "A3481": 2932, "A3145": 2933, "A4808": 2934, "A4825": 2935, "A4136": 2936, "A4590": 2937, "A4798": 2938, "A4939": 2939, "A3066": 2940, "A2922": 2941, "A3636": 2942, "A3181": 2943, "A3135": 2944, "A4948": 2945, "A4957": 2946, "A4927": 2947, "A4777": 2948, "A4786": 2949, "A4934": 2950, "A4563": 2951, "A2881": 2952, "A4906": 2953, "A4640": 2954, "A4917": 2955, "A3778": 2956, "A4804": 2957, "A4870": 2958, "A3165": 2959, "A3628": 2960, "A4958": 2961, "A4572": 2962, "A3119": 2963, "A4928": 2964, "A6167": 2965, "A4780": 2966, "A4790": 2967, "A3784": 2968, "A3085": 2969, "A4553": 2970, "A3247": 2971, "A5943": 2972, "A4205": 2973, "A4702": 2974, "A4482": 2975, "A3062": 2976, "A3760": 2977, "A4855": 2978, "A2946": 2979, "A4179": 2980, "A3775": 2981, "A4589": 2982, "A4925": 2983, "A5683": 2984, "A3028": 2985, "A5544": 2986, "A5687": 2987, "A3661": 2988, "A4264": 2989, "A4805": 2990, "A3107": 2991, "A3080": 2992, "A3795": 2993, "A4942": 2994, "A4812": 2995, "A4802": 2996, "A4281": 2997, "A4980": 2998, "A4829": 2999, "A4262": 3000, "A3782": 3001, "A4952": 3002, "A4922": 3003, "A4339": 3004, "A4999": 3005, "A4996": 3006, "A4977": 3007, "A4978": 3008, "A4962": 3009, "A6286": 3010, "A5003": 3011, "A4556": 3012, "A3115": 3013, "A4252": 3014, "A5004": 3015, "A3413": 3016, "A3482": 3017, "A3077": 3018, "A4324": 3019, "A3076": 3020, "A4399": 3021, "A4341": 3022, "A3382": 3023, "A3384": 3024, "A4706": 3025, "A4769": 3026, "A3777": 3027, "A5622": 3028, "A3035": 3029, "A4791": 3030, "A4827": 3031, "A4550": 3032, "A4270": 3033, "A3550": 3034, "A4616": 3035, "A3086": 3036, "A3474": 3037, "A3600": 3038, "A3410": 3039, "A2945": 3040, "A2936": 3041, "A4817": 3042, "A3182": 3043, "A3551": 3044, "A4585": 3045, "A3574": 3046, "A3091": 3047, "A4810": 3048, "A2912": 3049, "A3793": 3050, "A4800": 3051, "A3700": 3052, "A2898": 3053, "A4864": 3054, "A3240": 3055, "A6284": 3056, "A3645": 3057, "A4507": 3058, "A3612": 3059, "A5541": 3060, "A4845": 3061, "A4834": 3062, "A3916": 3063, "A4814": 3064, "A4788": 3065, "A4282": 3066, "A4272": 3067, "A4824": 3068, "A3642": 3069, "A4832": 3070, "A4545": 3071, "A3584": 3072, "A4525": 3073, "A2880": 3074, "A3415": 3075, "A5688": 3076, "A2914": 3077, "A2893": 3078, "A4522": 3079, "A5696": 3080, "A5515": 3081, "A4850": 3082, "A3573": 3083, "A5686": 3084, "A3054": 3085, "A2925": 3086, "A4161": 3087, "A4813": 3088, "A4417": 3089, "A4820": 3090, "A4250": 3091, "A4803": 3092, "A4782": 3093, "A4224": 3094, "A4233": 3095, "A4819": 3096, "A4836": 3097, "A5668": 3098, "A3477": 3099, "A5645": 3100, "A5540": 3101, "A4853": 3102, "A4527": 3103, "A6128": 3104, "A4532": 3105, "A4848": 3106, "A5338": 3107, "A3530": 3108, "A4863": 3109, "A4862": 3110, "A5538": 3111, "A3993": 3112, "A3061": 3113, "A4815": 3114, "A4841": 3115, "A4857": 3116, "A6289": 3117, "A3450": 3118, "A4891": 3119, "A4849": 3120, "A4852": 3121, "A4165": 3122, "A4908": 3123, "A4833": 3124, "A4900": 3125, "A6251": 3126, "A5483": 3127, "A5536": 3128, "A4885": 3129, "A4338": 3130, "A3842": 3131, "A4843": 3132, "A3740": 3133, "A4821": 3134, "A3293": 3135, "A3749": 3136}
|
logs/events.out.tfevents.1734972208.93d1435969e8.4440.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:829bc8d48a2e8b954d34a93cee0149e46139d20038458b2f952bf4c978db008b
|
3 |
+
size 158215
|
logs/events.out.tfevents.1734972249.93d1435969e8.4440.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d6653af7564a1599e618006dc978cba82da207096db72ed34fad94e6126725c
|
3 |
+
size 157582
|
logs/events.out.tfevents.1734972335.93d1435969e8.4440.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44cc61e392212916dc8e2a14b36dc5c61f0229fd7d8bdd482ce6ec73e0db31fc
|
3 |
+
size 230104
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:adaf804d8485ef093122014af7606b6b95e32f9a51e2d221808fdd76a9a1eb86
|
3 |
+
size 550960660
|
optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b853e0f1f0432f673042171c53b8ff771a226748ce2813bf9c065192ed4766af
|
3 |
+
size 1101983482
|
rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2cf92d7d6dc31cd881a8f292cbbc0ebf0d8c49dd968d93f9941f5b66f8b5cb3
|
3 |
+
size 14244
|
scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bbc4efd8949421fb0d93c9a24ed20a0314df073ad2e0511623b89646697da6e
|
3 |
+
size 1064
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": false,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": false,
|
47 |
+
"extra_special_tokens": {},
|
48 |
+
"mask_token": "[MASK]",
|
49 |
+
"max_length": 19,
|
50 |
+
"model_max_length": 512,
|
51 |
+
"pad_to_multiple_of": null,
|
52 |
+
"pad_token": "[PAD]",
|
53 |
+
"pad_token_type_id": 0,
|
54 |
+
"padding_side": "right",
|
55 |
+
"sep_token": "[SEP]",
|
56 |
+
"stride": 0,
|
57 |
+
"strip_accents": null,
|
58 |
+
"tokenize_chinese_chars": true,
|
59 |
+
"tokenizer_class": "DistilBertTokenizer",
|
60 |
+
"truncation_side": "right",
|
61 |
+
"truncation_strategy": "longest_first",
|
62 |
+
"unk_token": "[UNK]"
|
63 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,2377 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.9499685377695699,
|
3 |
+
"best_model_checkpoint": "./distilbert-finetuned/checkpoint-30520",
|
4 |
+
"epoch": 20.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 30520,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.0655307994757536,
|
13 |
+
"grad_norm": 1.8972834348678589,
|
14 |
+
"learning_rate": 4.983617300131062e-05,
|
15 |
+
"loss": 8.0585,
|
16 |
+
"step": 100
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.1310615989515072,
|
20 |
+
"grad_norm": 1.7784675359725952,
|
21 |
+
"learning_rate": 4.9672346002621236e-05,
|
22 |
+
"loss": 8.0566,
|
23 |
+
"step": 200
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.1965923984272608,
|
27 |
+
"grad_norm": 2.920842409133911,
|
28 |
+
"learning_rate": 4.950851900393185e-05,
|
29 |
+
"loss": 8.0383,
|
30 |
+
"step": 300
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.2621231979030144,
|
34 |
+
"grad_norm": 3.6251978874206543,
|
35 |
+
"learning_rate": 4.934469200524246e-05,
|
36 |
+
"loss": 7.9945,
|
37 |
+
"step": 400
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.32765399737876805,
|
41 |
+
"grad_norm": 4.245726585388184,
|
42 |
+
"learning_rate": 4.918086500655309e-05,
|
43 |
+
"loss": 7.8561,
|
44 |
+
"step": 500
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.3931847968545216,
|
48 |
+
"grad_norm": 4.707456111907959,
|
49 |
+
"learning_rate": 4.90170380078637e-05,
|
50 |
+
"loss": 7.7312,
|
51 |
+
"step": 600
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.45871559633027525,
|
55 |
+
"grad_norm": 5.121094703674316,
|
56 |
+
"learning_rate": 4.8853211009174314e-05,
|
57 |
+
"loss": 7.5316,
|
58 |
+
"step": 700
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.5242463958060288,
|
62 |
+
"grad_norm": 5.974259376525879,
|
63 |
+
"learning_rate": 4.868938401048493e-05,
|
64 |
+
"loss": 7.3433,
|
65 |
+
"step": 800
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.5897771952817824,
|
69 |
+
"grad_norm": 5.776278972625732,
|
70 |
+
"learning_rate": 4.852555701179555e-05,
|
71 |
+
"loss": 7.0775,
|
72 |
+
"step": 900
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.6553079947575361,
|
76 |
+
"grad_norm": 6.829719543457031,
|
77 |
+
"learning_rate": 4.836173001310616e-05,
|
78 |
+
"loss": 6.8544,
|
79 |
+
"step": 1000
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.7208387942332897,
|
83 |
+
"grad_norm": 7.138682842254639,
|
84 |
+
"learning_rate": 4.819790301441678e-05,
|
85 |
+
"loss": 6.6293,
|
86 |
+
"step": 1100
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.7863695937090432,
|
90 |
+
"grad_norm": 6.803562641143799,
|
91 |
+
"learning_rate": 4.803407601572739e-05,
|
92 |
+
"loss": 6.3618,
|
93 |
+
"step": 1200
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.8519003931847968,
|
97 |
+
"grad_norm": 7.9476752281188965,
|
98 |
+
"learning_rate": 4.787024901703801e-05,
|
99 |
+
"loss": 6.0285,
|
100 |
+
"step": 1300
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.9174311926605505,
|
104 |
+
"grad_norm": 9.059676170349121,
|
105 |
+
"learning_rate": 4.7706422018348626e-05,
|
106 |
+
"loss": 5.7603,
|
107 |
+
"step": 1400
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.9829619921363041,
|
111 |
+
"grad_norm": 8.36684513092041,
|
112 |
+
"learning_rate": 4.754259501965924e-05,
|
113 |
+
"loss": 5.4373,
|
114 |
+
"step": 1500
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 1.0,
|
118 |
+
"eval_accuracy": 0.3351490825688073,
|
119 |
+
"eval_f1": 0.2757507443104033,
|
120 |
+
"eval_loss": 4.97554874420166,
|
121 |
+
"eval_runtime": 0.9115,
|
122 |
+
"eval_samples_per_second": 3826.458,
|
123 |
+
"eval_steps_per_second": 60.337,
|
124 |
+
"step": 1526
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"epoch": 1.0484927916120577,
|
128 |
+
"grad_norm": 9.069089889526367,
|
129 |
+
"learning_rate": 4.737876802096986e-05,
|
130 |
+
"loss": 4.6364,
|
131 |
+
"step": 1600
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"epoch": 1.1140235910878113,
|
135 |
+
"grad_norm": 9.420051574707031,
|
136 |
+
"learning_rate": 4.7214941022280476e-05,
|
137 |
+
"loss": 4.1063,
|
138 |
+
"step": 1700
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"epoch": 1.1795543905635648,
|
142 |
+
"grad_norm": 9.825067520141602,
|
143 |
+
"learning_rate": 4.705111402359109e-05,
|
144 |
+
"loss": 3.7956,
|
145 |
+
"step": 1800
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"epoch": 1.2450851900393185,
|
149 |
+
"grad_norm": 11.169500350952148,
|
150 |
+
"learning_rate": 4.68872870249017e-05,
|
151 |
+
"loss": 3.5422,
|
152 |
+
"step": 1900
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"epoch": 1.310615989515072,
|
156 |
+
"grad_norm": 10.43234920501709,
|
157 |
+
"learning_rate": 4.672346002621232e-05,
|
158 |
+
"loss": 3.2258,
|
159 |
+
"step": 2000
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"epoch": 1.3761467889908257,
|
163 |
+
"grad_norm": 9.745192527770996,
|
164 |
+
"learning_rate": 4.655963302752294e-05,
|
165 |
+
"loss": 2.9775,
|
166 |
+
"step": 2100
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"epoch": 1.4416775884665793,
|
170 |
+
"grad_norm": 11.521934509277344,
|
171 |
+
"learning_rate": 4.6395806028833554e-05,
|
172 |
+
"loss": 2.8164,
|
173 |
+
"step": 2200
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"epoch": 1.5072083879423328,
|
177 |
+
"grad_norm": 10.573694229125977,
|
178 |
+
"learning_rate": 4.623197903014417e-05,
|
179 |
+
"loss": 2.5647,
|
180 |
+
"step": 2300
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"epoch": 1.5727391874180865,
|
184 |
+
"grad_norm": 11.856453895568848,
|
185 |
+
"learning_rate": 4.606815203145479e-05,
|
186 |
+
"loss": 2.3067,
|
187 |
+
"step": 2400
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"epoch": 1.6382699868938402,
|
191 |
+
"grad_norm": 13.598255157470703,
|
192 |
+
"learning_rate": 4.59043250327654e-05,
|
193 |
+
"loss": 2.2179,
|
194 |
+
"step": 2500
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"epoch": 1.7038007863695936,
|
198 |
+
"grad_norm": 9.973114013671875,
|
199 |
+
"learning_rate": 4.5740498034076015e-05,
|
200 |
+
"loss": 1.9873,
|
201 |
+
"step": 2600
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"epoch": 1.7693315858453473,
|
205 |
+
"grad_norm": 11.870305061340332,
|
206 |
+
"learning_rate": 4.557667103538664e-05,
|
207 |
+
"loss": 1.844,
|
208 |
+
"step": 2700
|
209 |
+
},
|
210 |
+
{
|
211 |
+
"epoch": 1.834862385321101,
|
212 |
+
"grad_norm": 12.496658325195312,
|
213 |
+
"learning_rate": 4.541284403669725e-05,
|
214 |
+
"loss": 1.7231,
|
215 |
+
"step": 2800
|
216 |
+
},
|
217 |
+
{
|
218 |
+
"epoch": 1.9003931847968545,
|
219 |
+
"grad_norm": 9.312653541564941,
|
220 |
+
"learning_rate": 4.5249017038007866e-05,
|
221 |
+
"loss": 1.5158,
|
222 |
+
"step": 2900
|
223 |
+
},
|
224 |
+
{
|
225 |
+
"epoch": 1.9659239842726082,
|
226 |
+
"grad_norm": 12.817119598388672,
|
227 |
+
"learning_rate": 4.508519003931848e-05,
|
228 |
+
"loss": 1.4242,
|
229 |
+
"step": 3000
|
230 |
+
},
|
231 |
+
{
|
232 |
+
"epoch": 2.0,
|
233 |
+
"eval_accuracy": 0.7577408256880734,
|
234 |
+
"eval_f1": 0.7301710983333689,
|
235 |
+
"eval_loss": 1.4731484651565552,
|
236 |
+
"eval_runtime": 0.8691,
|
237 |
+
"eval_samples_per_second": 4013.239,
|
238 |
+
"eval_steps_per_second": 63.282,
|
239 |
+
"step": 3052
|
240 |
+
},
|
241 |
+
{
|
242 |
+
"epoch": 2.031454783748362,
|
243 |
+
"grad_norm": 10.233814239501953,
|
244 |
+
"learning_rate": 4.49213630406291e-05,
|
245 |
+
"loss": 1.1832,
|
246 |
+
"step": 3100
|
247 |
+
},
|
248 |
+
{
|
249 |
+
"epoch": 2.0969855832241153,
|
250 |
+
"grad_norm": 12.700181007385254,
|
251 |
+
"learning_rate": 4.475753604193971e-05,
|
252 |
+
"loss": 0.941,
|
253 |
+
"step": 3200
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"epoch": 2.162516382699869,
|
257 |
+
"grad_norm": 12.155367851257324,
|
258 |
+
"learning_rate": 4.459370904325033e-05,
|
259 |
+
"loss": 0.9038,
|
260 |
+
"step": 3300
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"epoch": 2.2280471821756227,
|
264 |
+
"grad_norm": 12.641743659973145,
|
265 |
+
"learning_rate": 4.4429882044560943e-05,
|
266 |
+
"loss": 0.8846,
|
267 |
+
"step": 3400
|
268 |
+
},
|
269 |
+
{
|
270 |
+
"epoch": 2.293577981651376,
|
271 |
+
"grad_norm": 11.405875205993652,
|
272 |
+
"learning_rate": 4.426605504587156e-05,
|
273 |
+
"loss": 0.8371,
|
274 |
+
"step": 3500
|
275 |
+
},
|
276 |
+
{
|
277 |
+
"epoch": 2.3591087811271296,
|
278 |
+
"grad_norm": 9.20864200592041,
|
279 |
+
"learning_rate": 4.410222804718218e-05,
|
280 |
+
"loss": 0.737,
|
281 |
+
"step": 3600
|
282 |
+
},
|
283 |
+
{
|
284 |
+
"epoch": 2.4246395806028835,
|
285 |
+
"grad_norm": 15.105806350708008,
|
286 |
+
"learning_rate": 4.3938401048492794e-05,
|
287 |
+
"loss": 0.7328,
|
288 |
+
"step": 3700
|
289 |
+
},
|
290 |
+
{
|
291 |
+
"epoch": 2.490170380078637,
|
292 |
+
"grad_norm": 7.7599310874938965,
|
293 |
+
"learning_rate": 4.3774574049803404e-05,
|
294 |
+
"loss": 0.7012,
|
295 |
+
"step": 3800
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"epoch": 2.5557011795543905,
|
299 |
+
"grad_norm": 10.58204460144043,
|
300 |
+
"learning_rate": 4.361074705111403e-05,
|
301 |
+
"loss": 0.6169,
|
302 |
+
"step": 3900
|
303 |
+
},
|
304 |
+
{
|
305 |
+
"epoch": 2.621231979030144,
|
306 |
+
"grad_norm": 9.051236152648926,
|
307 |
+
"learning_rate": 4.344692005242464e-05,
|
308 |
+
"loss": 0.6348,
|
309 |
+
"step": 4000
|
310 |
+
},
|
311 |
+
{
|
312 |
+
"epoch": 2.686762778505898,
|
313 |
+
"grad_norm": 5.441799640655518,
|
314 |
+
"learning_rate": 4.3283093053735255e-05,
|
315 |
+
"loss": 0.5538,
|
316 |
+
"step": 4100
|
317 |
+
},
|
318 |
+
{
|
319 |
+
"epoch": 2.7522935779816513,
|
320 |
+
"grad_norm": 9.519750595092773,
|
321 |
+
"learning_rate": 4.311926605504588e-05,
|
322 |
+
"loss": 0.5626,
|
323 |
+
"step": 4200
|
324 |
+
},
|
325 |
+
{
|
326 |
+
"epoch": 2.8178243774574048,
|
327 |
+
"grad_norm": 6.30112886428833,
|
328 |
+
"learning_rate": 4.295543905635649e-05,
|
329 |
+
"loss": 0.5072,
|
330 |
+
"step": 4300
|
331 |
+
},
|
332 |
+
{
|
333 |
+
"epoch": 2.8833551769331587,
|
334 |
+
"grad_norm": 11.238988876342773,
|
335 |
+
"learning_rate": 4.2791612057667106e-05,
|
336 |
+
"loss": 0.482,
|
337 |
+
"step": 4400
|
338 |
+
},
|
339 |
+
{
|
340 |
+
"epoch": 2.948885976408912,
|
341 |
+
"grad_norm": 8.047210693359375,
|
342 |
+
"learning_rate": 4.262778505897772e-05,
|
343 |
+
"loss": 0.5018,
|
344 |
+
"step": 4500
|
345 |
+
},
|
346 |
+
{
|
347 |
+
"epoch": 3.0,
|
348 |
+
"eval_accuracy": 0.8847477064220184,
|
349 |
+
"eval_f1": 0.8750008035415145,
|
350 |
+
"eval_loss": 0.6360189318656921,
|
351 |
+
"eval_runtime": 0.9139,
|
352 |
+
"eval_samples_per_second": 3816.692,
|
353 |
+
"eval_steps_per_second": 60.183,
|
354 |
+
"step": 4578
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"epoch": 3.0144167758846656,
|
358 |
+
"grad_norm": 5.651986598968506,
|
359 |
+
"learning_rate": 4.246395806028834e-05,
|
360 |
+
"loss": 0.4096,
|
361 |
+
"step": 4600
|
362 |
+
},
|
363 |
+
{
|
364 |
+
"epoch": 3.0799475753604195,
|
365 |
+
"grad_norm": 4.4963274002075195,
|
366 |
+
"learning_rate": 4.230013106159895e-05,
|
367 |
+
"loss": 0.2835,
|
368 |
+
"step": 4700
|
369 |
+
},
|
370 |
+
{
|
371 |
+
"epoch": 3.145478374836173,
|
372 |
+
"grad_norm": 3.222943067550659,
|
373 |
+
"learning_rate": 4.2136304062909573e-05,
|
374 |
+
"loss": 0.2747,
|
375 |
+
"step": 4800
|
376 |
+
},
|
377 |
+
{
|
378 |
+
"epoch": 3.2110091743119265,
|
379 |
+
"grad_norm": 9.179097175598145,
|
380 |
+
"learning_rate": 4.1972477064220184e-05,
|
381 |
+
"loss": 0.2935,
|
382 |
+
"step": 4900
|
383 |
+
},
|
384 |
+
{
|
385 |
+
"epoch": 3.2765399737876804,
|
386 |
+
"grad_norm": 6.673375606536865,
|
387 |
+
"learning_rate": 4.18086500655308e-05,
|
388 |
+
"loss": 0.2708,
|
389 |
+
"step": 5000
|
390 |
+
},
|
391 |
+
{
|
392 |
+
"epoch": 3.342070773263434,
|
393 |
+
"grad_norm": 6.3241400718688965,
|
394 |
+
"learning_rate": 4.164482306684142e-05,
|
395 |
+
"loss": 0.2805,
|
396 |
+
"step": 5100
|
397 |
+
},
|
398 |
+
{
|
399 |
+
"epoch": 3.4076015727391873,
|
400 |
+
"grad_norm": 3.614450216293335,
|
401 |
+
"learning_rate": 4.1480996068152034e-05,
|
402 |
+
"loss": 0.2383,
|
403 |
+
"step": 5200
|
404 |
+
},
|
405 |
+
{
|
406 |
+
"epoch": 3.473132372214941,
|
407 |
+
"grad_norm": 1.6470447778701782,
|
408 |
+
"learning_rate": 4.1317169069462644e-05,
|
409 |
+
"loss": 0.205,
|
410 |
+
"step": 5300
|
411 |
+
},
|
412 |
+
{
|
413 |
+
"epoch": 3.5386631716906947,
|
414 |
+
"grad_norm": 3.095306873321533,
|
415 |
+
"learning_rate": 4.115334207077327e-05,
|
416 |
+
"loss": 0.2245,
|
417 |
+
"step": 5400
|
418 |
+
},
|
419 |
+
{
|
420 |
+
"epoch": 3.604193971166448,
|
421 |
+
"grad_norm": 2.1946816444396973,
|
422 |
+
"learning_rate": 4.0989515072083885e-05,
|
423 |
+
"loss": 0.2159,
|
424 |
+
"step": 5500
|
425 |
+
},
|
426 |
+
{
|
427 |
+
"epoch": 3.669724770642202,
|
428 |
+
"grad_norm": 4.0140886306762695,
|
429 |
+
"learning_rate": 4.0825688073394495e-05,
|
430 |
+
"loss": 0.2264,
|
431 |
+
"step": 5600
|
432 |
+
},
|
433 |
+
{
|
434 |
+
"epoch": 3.7352555701179555,
|
435 |
+
"grad_norm": 4.582081317901611,
|
436 |
+
"learning_rate": 4.066186107470511e-05,
|
437 |
+
"loss": 0.2393,
|
438 |
+
"step": 5700
|
439 |
+
},
|
440 |
+
{
|
441 |
+
"epoch": 3.800786369593709,
|
442 |
+
"grad_norm": 3.2640044689178467,
|
443 |
+
"learning_rate": 4.049803407601573e-05,
|
444 |
+
"loss": 0.1792,
|
445 |
+
"step": 5800
|
446 |
+
},
|
447 |
+
{
|
448 |
+
"epoch": 3.866317169069463,
|
449 |
+
"grad_norm": 4.081344127655029,
|
450 |
+
"learning_rate": 4.0334207077326346e-05,
|
451 |
+
"loss": 0.1883,
|
452 |
+
"step": 5900
|
453 |
+
},
|
454 |
+
{
|
455 |
+
"epoch": 3.9318479685452163,
|
456 |
+
"grad_norm": 7.787130355834961,
|
457 |
+
"learning_rate": 4.017038007863696e-05,
|
458 |
+
"loss": 0.2032,
|
459 |
+
"step": 6000
|
460 |
+
},
|
461 |
+
{
|
462 |
+
"epoch": 3.99737876802097,
|
463 |
+
"grad_norm": 10.889232635498047,
|
464 |
+
"learning_rate": 4.000655307994758e-05,
|
465 |
+
"loss": 0.1863,
|
466 |
+
"step": 6100
|
467 |
+
},
|
468 |
+
{
|
469 |
+
"epoch": 4.0,
|
470 |
+
"eval_accuracy": 0.9208715596330275,
|
471 |
+
"eval_f1": 0.91669118025196,
|
472 |
+
"eval_loss": 0.4279778301715851,
|
473 |
+
"eval_runtime": 0.9015,
|
474 |
+
"eval_samples_per_second": 3869.286,
|
475 |
+
"eval_steps_per_second": 61.012,
|
476 |
+
"step": 6104
|
477 |
+
},
|
478 |
+
{
|
479 |
+
"epoch": 4.062909567496724,
|
480 |
+
"grad_norm": 3.0653154850006104,
|
481 |
+
"learning_rate": 3.984272608125819e-05,
|
482 |
+
"loss": 0.1164,
|
483 |
+
"step": 6200
|
484 |
+
},
|
485 |
+
{
|
486 |
+
"epoch": 4.128440366972477,
|
487 |
+
"grad_norm": 4.659291744232178,
|
488 |
+
"learning_rate": 3.967889908256881e-05,
|
489 |
+
"loss": 0.1092,
|
490 |
+
"step": 6300
|
491 |
+
},
|
492 |
+
{
|
493 |
+
"epoch": 4.193971166448231,
|
494 |
+
"grad_norm": 1.27858567237854,
|
495 |
+
"learning_rate": 3.9515072083879424e-05,
|
496 |
+
"loss": 0.1057,
|
497 |
+
"step": 6400
|
498 |
+
},
|
499 |
+
{
|
500 |
+
"epoch": 4.259501965923985,
|
501 |
+
"grad_norm": 1.6424704790115356,
|
502 |
+
"learning_rate": 3.935124508519004e-05,
|
503 |
+
"loss": 0.1048,
|
504 |
+
"step": 6500
|
505 |
+
},
|
506 |
+
{
|
507 |
+
"epoch": 4.325032765399738,
|
508 |
+
"grad_norm": 2.527622938156128,
|
509 |
+
"learning_rate": 3.918741808650066e-05,
|
510 |
+
"loss": 0.1116,
|
511 |
+
"step": 6600
|
512 |
+
},
|
513 |
+
{
|
514 |
+
"epoch": 4.3905635648754915,
|
515 |
+
"grad_norm": 2.611750364303589,
|
516 |
+
"learning_rate": 3.9023591087811274e-05,
|
517 |
+
"loss": 0.1106,
|
518 |
+
"step": 6700
|
519 |
+
},
|
520 |
+
{
|
521 |
+
"epoch": 4.456094364351245,
|
522 |
+
"grad_norm": 3.4234442710876465,
|
523 |
+
"learning_rate": 3.885976408912189e-05,
|
524 |
+
"loss": 0.0898,
|
525 |
+
"step": 6800
|
526 |
+
},
|
527 |
+
{
|
528 |
+
"epoch": 4.521625163826998,
|
529 |
+
"grad_norm": 0.4667866826057434,
|
530 |
+
"learning_rate": 3.86959370904325e-05,
|
531 |
+
"loss": 0.0793,
|
532 |
+
"step": 6900
|
533 |
+
},
|
534 |
+
{
|
535 |
+
"epoch": 4.587155963302752,
|
536 |
+
"grad_norm": 4.575076580047607,
|
537 |
+
"learning_rate": 3.8532110091743125e-05,
|
538 |
+
"loss": 0.0811,
|
539 |
+
"step": 7000
|
540 |
+
},
|
541 |
+
{
|
542 |
+
"epoch": 4.652686762778506,
|
543 |
+
"grad_norm": 0.7901601195335388,
|
544 |
+
"learning_rate": 3.8368283093053735e-05,
|
545 |
+
"loss": 0.0871,
|
546 |
+
"step": 7100
|
547 |
+
},
|
548 |
+
{
|
549 |
+
"epoch": 4.718217562254259,
|
550 |
+
"grad_norm": 2.668879270553589,
|
551 |
+
"learning_rate": 3.820445609436435e-05,
|
552 |
+
"loss": 0.0834,
|
553 |
+
"step": 7200
|
554 |
+
},
|
555 |
+
{
|
556 |
+
"epoch": 4.783748361730013,
|
557 |
+
"grad_norm": 6.539068698883057,
|
558 |
+
"learning_rate": 3.804062909567497e-05,
|
559 |
+
"loss": 0.0755,
|
560 |
+
"step": 7300
|
561 |
+
},
|
562 |
+
{
|
563 |
+
"epoch": 4.849279161205767,
|
564 |
+
"grad_norm": 2.785691738128662,
|
565 |
+
"learning_rate": 3.7876802096985586e-05,
|
566 |
+
"loss": 0.0869,
|
567 |
+
"step": 7400
|
568 |
+
},
|
569 |
+
{
|
570 |
+
"epoch": 4.91480996068152,
|
571 |
+
"grad_norm": 1.0723165273666382,
|
572 |
+
"learning_rate": 3.7712975098296196e-05,
|
573 |
+
"loss": 0.1016,
|
574 |
+
"step": 7500
|
575 |
+
},
|
576 |
+
{
|
577 |
+
"epoch": 4.980340760157274,
|
578 |
+
"grad_norm": 7.655533790588379,
|
579 |
+
"learning_rate": 3.754914809960682e-05,
|
580 |
+
"loss": 0.082,
|
581 |
+
"step": 7600
|
582 |
+
},
|
583 |
+
{
|
584 |
+
"epoch": 5.0,
|
585 |
+
"eval_accuracy": 0.9323394495412844,
|
586 |
+
"eval_f1": 0.9303857696202881,
|
587 |
+
"eval_loss": 0.3835083246231079,
|
588 |
+
"eval_runtime": 0.8868,
|
589 |
+
"eval_samples_per_second": 3933.101,
|
590 |
+
"eval_steps_per_second": 62.019,
|
591 |
+
"step": 7630
|
592 |
+
},
|
593 |
+
{
|
594 |
+
"epoch": 5.045871559633028,
|
595 |
+
"grad_norm": 6.8434882164001465,
|
596 |
+
"learning_rate": 3.738532110091743e-05,
|
597 |
+
"loss": 0.0531,
|
598 |
+
"step": 7700
|
599 |
+
},
|
600 |
+
{
|
601 |
+
"epoch": 5.111402359108781,
|
602 |
+
"grad_norm": 11.0233736038208,
|
603 |
+
"learning_rate": 3.722149410222805e-05,
|
604 |
+
"loss": 0.0401,
|
605 |
+
"step": 7800
|
606 |
+
},
|
607 |
+
{
|
608 |
+
"epoch": 5.176933158584535,
|
609 |
+
"grad_norm": 1.0623722076416016,
|
610 |
+
"learning_rate": 3.7057667103538664e-05,
|
611 |
+
"loss": 0.0497,
|
612 |
+
"step": 7900
|
613 |
+
},
|
614 |
+
{
|
615 |
+
"epoch": 5.242463958060289,
|
616 |
+
"grad_norm": 0.9341715574264526,
|
617 |
+
"learning_rate": 3.689384010484928e-05,
|
618 |
+
"loss": 0.0384,
|
619 |
+
"step": 8000
|
620 |
+
},
|
621 |
+
{
|
622 |
+
"epoch": 5.307994757536042,
|
623 |
+
"grad_norm": 0.4840922951698303,
|
624 |
+
"learning_rate": 3.67300131061599e-05,
|
625 |
+
"loss": 0.0419,
|
626 |
+
"step": 8100
|
627 |
+
},
|
628 |
+
{
|
629 |
+
"epoch": 5.373525557011796,
|
630 |
+
"grad_norm": 1.7515119314193726,
|
631 |
+
"learning_rate": 3.6566186107470514e-05,
|
632 |
+
"loss": 0.0467,
|
633 |
+
"step": 8200
|
634 |
+
},
|
635 |
+
{
|
636 |
+
"epoch": 5.43905635648755,
|
637 |
+
"grad_norm": 0.6603133082389832,
|
638 |
+
"learning_rate": 3.640235910878113e-05,
|
639 |
+
"loss": 0.0437,
|
640 |
+
"step": 8300
|
641 |
+
},
|
642 |
+
{
|
643 |
+
"epoch": 5.504587155963303,
|
644 |
+
"grad_norm": 3.0891926288604736,
|
645 |
+
"learning_rate": 3.623853211009174e-05,
|
646 |
+
"loss": 0.048,
|
647 |
+
"step": 8400
|
648 |
+
},
|
649 |
+
{
|
650 |
+
"epoch": 5.5701179554390565,
|
651 |
+
"grad_norm": 0.4996233582496643,
|
652 |
+
"learning_rate": 3.6074705111402365e-05,
|
653 |
+
"loss": 0.0444,
|
654 |
+
"step": 8500
|
655 |
+
},
|
656 |
+
{
|
657 |
+
"epoch": 5.6356487549148095,
|
658 |
+
"grad_norm": 2.793330430984497,
|
659 |
+
"learning_rate": 3.5910878112712975e-05,
|
660 |
+
"loss": 0.0458,
|
661 |
+
"step": 8600
|
662 |
+
},
|
663 |
+
{
|
664 |
+
"epoch": 5.7011795543905635,
|
665 |
+
"grad_norm": 0.8790336847305298,
|
666 |
+
"learning_rate": 3.574705111402359e-05,
|
667 |
+
"loss": 0.0491,
|
668 |
+
"step": 8700
|
669 |
+
},
|
670 |
+
{
|
671 |
+
"epoch": 5.766710353866317,
|
672 |
+
"grad_norm": 0.37869125604629517,
|
673 |
+
"learning_rate": 3.558322411533421e-05,
|
674 |
+
"loss": 0.0322,
|
675 |
+
"step": 8800
|
676 |
+
},
|
677 |
+
{
|
678 |
+
"epoch": 5.832241153342071,
|
679 |
+
"grad_norm": 0.6503167152404785,
|
680 |
+
"learning_rate": 3.5419397116644826e-05,
|
681 |
+
"loss": 0.051,
|
682 |
+
"step": 8900
|
683 |
+
},
|
684 |
+
{
|
685 |
+
"epoch": 5.897771952817824,
|
686 |
+
"grad_norm": 0.16301073133945465,
|
687 |
+
"learning_rate": 3.5255570117955436e-05,
|
688 |
+
"loss": 0.0492,
|
689 |
+
"step": 9000
|
690 |
+
},
|
691 |
+
{
|
692 |
+
"epoch": 5.963302752293578,
|
693 |
+
"grad_norm": 0.3980591893196106,
|
694 |
+
"learning_rate": 3.509174311926606e-05,
|
695 |
+
"loss": 0.038,
|
696 |
+
"step": 9100
|
697 |
+
},
|
698 |
+
{
|
699 |
+
"epoch": 6.0,
|
700 |
+
"eval_accuracy": 0.9352064220183486,
|
701 |
+
"eval_f1": 0.9317561038604617,
|
702 |
+
"eval_loss": 0.3840370178222656,
|
703 |
+
"eval_runtime": 0.8822,
|
704 |
+
"eval_samples_per_second": 3953.595,
|
705 |
+
"eval_steps_per_second": 62.342,
|
706 |
+
"step": 9156
|
707 |
+
},
|
708 |
+
{
|
709 |
+
"epoch": 6.028833551769331,
|
710 |
+
"grad_norm": 0.24182792007923126,
|
711 |
+
"learning_rate": 3.492791612057667e-05,
|
712 |
+
"loss": 0.0335,
|
713 |
+
"step": 9200
|
714 |
+
},
|
715 |
+
{
|
716 |
+
"epoch": 6.094364351245085,
|
717 |
+
"grad_norm": 0.41973385214805603,
|
718 |
+
"learning_rate": 3.476408912188729e-05,
|
719 |
+
"loss": 0.0226,
|
720 |
+
"step": 9300
|
721 |
+
},
|
722 |
+
{
|
723 |
+
"epoch": 6.159895150720839,
|
724 |
+
"grad_norm": 0.933502197265625,
|
725 |
+
"learning_rate": 3.460026212319791e-05,
|
726 |
+
"loss": 0.0268,
|
727 |
+
"step": 9400
|
728 |
+
},
|
729 |
+
{
|
730 |
+
"epoch": 6.225425950196592,
|
731 |
+
"grad_norm": 2.3950750827789307,
|
732 |
+
"learning_rate": 3.443643512450852e-05,
|
733 |
+
"loss": 0.0253,
|
734 |
+
"step": 9500
|
735 |
+
},
|
736 |
+
{
|
737 |
+
"epoch": 6.290956749672346,
|
738 |
+
"grad_norm": 0.6362214088439941,
|
739 |
+
"learning_rate": 3.427260812581914e-05,
|
740 |
+
"loss": 0.0205,
|
741 |
+
"step": 9600
|
742 |
+
},
|
743 |
+
{
|
744 |
+
"epoch": 6.3564875491481,
|
745 |
+
"grad_norm": 0.22217431664466858,
|
746 |
+
"learning_rate": 3.4108781127129755e-05,
|
747 |
+
"loss": 0.0214,
|
748 |
+
"step": 9700
|
749 |
+
},
|
750 |
+
{
|
751 |
+
"epoch": 6.422018348623853,
|
752 |
+
"grad_norm": 0.2135070413351059,
|
753 |
+
"learning_rate": 3.394495412844037e-05,
|
754 |
+
"loss": 0.0268,
|
755 |
+
"step": 9800
|
756 |
+
},
|
757 |
+
{
|
758 |
+
"epoch": 6.487549148099607,
|
759 |
+
"grad_norm": 5.583222389221191,
|
760 |
+
"learning_rate": 3.378112712975098e-05,
|
761 |
+
"loss": 0.0236,
|
762 |
+
"step": 9900
|
763 |
+
},
|
764 |
+
{
|
765 |
+
"epoch": 6.553079947575361,
|
766 |
+
"grad_norm": 0.9507617354393005,
|
767 |
+
"learning_rate": 3.3617300131061605e-05,
|
768 |
+
"loss": 0.0285,
|
769 |
+
"step": 10000
|
770 |
+
},
|
771 |
+
{
|
772 |
+
"epoch": 6.618610747051114,
|
773 |
+
"grad_norm": 3.5924887657165527,
|
774 |
+
"learning_rate": 3.3453473132372215e-05,
|
775 |
+
"loss": 0.0196,
|
776 |
+
"step": 10100
|
777 |
+
},
|
778 |
+
{
|
779 |
+
"epoch": 6.684141546526868,
|
780 |
+
"grad_norm": 0.3054388463497162,
|
781 |
+
"learning_rate": 3.328964613368283e-05,
|
782 |
+
"loss": 0.0162,
|
783 |
+
"step": 10200
|
784 |
+
},
|
785 |
+
{
|
786 |
+
"epoch": 6.749672346002622,
|
787 |
+
"grad_norm": 0.09917047619819641,
|
788 |
+
"learning_rate": 3.312581913499345e-05,
|
789 |
+
"loss": 0.0203,
|
790 |
+
"step": 10300
|
791 |
+
},
|
792 |
+
{
|
793 |
+
"epoch": 6.815203145478375,
|
794 |
+
"grad_norm": 10.647476196289062,
|
795 |
+
"learning_rate": 3.2961992136304066e-05,
|
796 |
+
"loss": 0.0293,
|
797 |
+
"step": 10400
|
798 |
+
},
|
799 |
+
{
|
800 |
+
"epoch": 6.8807339449541285,
|
801 |
+
"grad_norm": 0.5372545123100281,
|
802 |
+
"learning_rate": 3.2798165137614676e-05,
|
803 |
+
"loss": 0.0157,
|
804 |
+
"step": 10500
|
805 |
+
},
|
806 |
+
{
|
807 |
+
"epoch": 6.946264744429882,
|
808 |
+
"grad_norm": 0.11427264660596848,
|
809 |
+
"learning_rate": 3.26343381389253e-05,
|
810 |
+
"loss": 0.0254,
|
811 |
+
"step": 10600
|
812 |
+
},
|
813 |
+
{
|
814 |
+
"epoch": 7.0,
|
815 |
+
"eval_accuracy": 0.9386467889908257,
|
816 |
+
"eval_f1": 0.9363697162292346,
|
817 |
+
"eval_loss": 0.3672682046890259,
|
818 |
+
"eval_runtime": 0.8719,
|
819 |
+
"eval_samples_per_second": 4000.361,
|
820 |
+
"eval_steps_per_second": 63.079,
|
821 |
+
"step": 10682
|
822 |
+
},
|
823 |
+
{
|
824 |
+
"epoch": 7.011795543905635,
|
825 |
+
"grad_norm": 5.333648204803467,
|
826 |
+
"learning_rate": 3.247051114023591e-05,
|
827 |
+
"loss": 0.0202,
|
828 |
+
"step": 10700
|
829 |
+
},
|
830 |
+
{
|
831 |
+
"epoch": 7.077326343381389,
|
832 |
+
"grad_norm": 0.7541437149047852,
|
833 |
+
"learning_rate": 3.230668414154653e-05,
|
834 |
+
"loss": 0.022,
|
835 |
+
"step": 10800
|
836 |
+
},
|
837 |
+
{
|
838 |
+
"epoch": 7.142857142857143,
|
839 |
+
"grad_norm": 0.06409142166376114,
|
840 |
+
"learning_rate": 3.2142857142857144e-05,
|
841 |
+
"loss": 0.0134,
|
842 |
+
"step": 10900
|
843 |
+
},
|
844 |
+
{
|
845 |
+
"epoch": 7.208387942332896,
|
846 |
+
"grad_norm": 6.461215972900391,
|
847 |
+
"learning_rate": 3.197903014416776e-05,
|
848 |
+
"loss": 0.0163,
|
849 |
+
"step": 11000
|
850 |
+
},
|
851 |
+
{
|
852 |
+
"epoch": 7.27391874180865,
|
853 |
+
"grad_norm": 0.5002973675727844,
|
854 |
+
"learning_rate": 3.181520314547838e-05,
|
855 |
+
"loss": 0.0131,
|
856 |
+
"step": 11100
|
857 |
+
},
|
858 |
+
{
|
859 |
+
"epoch": 7.339449541284404,
|
860 |
+
"grad_norm": 0.16848881542682648,
|
861 |
+
"learning_rate": 3.1651376146788995e-05,
|
862 |
+
"loss": 0.0139,
|
863 |
+
"step": 11200
|
864 |
+
},
|
865 |
+
{
|
866 |
+
"epoch": 7.404980340760157,
|
867 |
+
"grad_norm": 0.32861247658729553,
|
868 |
+
"learning_rate": 3.148754914809961e-05,
|
869 |
+
"loss": 0.019,
|
870 |
+
"step": 11300
|
871 |
+
},
|
872 |
+
{
|
873 |
+
"epoch": 7.470511140235911,
|
874 |
+
"grad_norm": 1.0504356622695923,
|
875 |
+
"learning_rate": 3.132372214941022e-05,
|
876 |
+
"loss": 0.0112,
|
877 |
+
"step": 11400
|
878 |
+
},
|
879 |
+
{
|
880 |
+
"epoch": 7.536041939711664,
|
881 |
+
"grad_norm": 0.2850879430770874,
|
882 |
+
"learning_rate": 3.115989515072084e-05,
|
883 |
+
"loss": 0.0164,
|
884 |
+
"step": 11500
|
885 |
+
},
|
886 |
+
{
|
887 |
+
"epoch": 7.601572739187418,
|
888 |
+
"grad_norm": 0.8835840225219727,
|
889 |
+
"learning_rate": 3.0996068152031456e-05,
|
890 |
+
"loss": 0.0195,
|
891 |
+
"step": 11600
|
892 |
+
},
|
893 |
+
{
|
894 |
+
"epoch": 7.667103538663172,
|
895 |
+
"grad_norm": 0.16748446226119995,
|
896 |
+
"learning_rate": 3.083224115334207e-05,
|
897 |
+
"loss": 0.0143,
|
898 |
+
"step": 11700
|
899 |
+
},
|
900 |
+
{
|
901 |
+
"epoch": 7.732634338138926,
|
902 |
+
"grad_norm": 0.5115922689437866,
|
903 |
+
"learning_rate": 3.066841415465269e-05,
|
904 |
+
"loss": 0.0195,
|
905 |
+
"step": 11800
|
906 |
+
},
|
907 |
+
{
|
908 |
+
"epoch": 7.798165137614679,
|
909 |
+
"grad_norm": 0.17508633434772491,
|
910 |
+
"learning_rate": 3.0504587155963303e-05,
|
911 |
+
"loss": 0.0124,
|
912 |
+
"step": 11900
|
913 |
+
},
|
914 |
+
{
|
915 |
+
"epoch": 7.863695937090433,
|
916 |
+
"grad_norm": 0.06938499212265015,
|
917 |
+
"learning_rate": 3.0340760157273916e-05,
|
918 |
+
"loss": 0.0151,
|
919 |
+
"step": 12000
|
920 |
+
},
|
921 |
+
{
|
922 |
+
"epoch": 7.929226736566186,
|
923 |
+
"grad_norm": 1.1097829341888428,
|
924 |
+
"learning_rate": 3.0176933158584537e-05,
|
925 |
+
"loss": 0.0162,
|
926 |
+
"step": 12100
|
927 |
+
},
|
928 |
+
{
|
929 |
+
"epoch": 7.99475753604194,
|
930 |
+
"grad_norm": 0.474563866853714,
|
931 |
+
"learning_rate": 3.0013106159895154e-05,
|
932 |
+
"loss": 0.0142,
|
933 |
+
"step": 12200
|
934 |
+
},
|
935 |
+
{
|
936 |
+
"epoch": 8.0,
|
937 |
+
"eval_accuracy": 0.9369266055045872,
|
938 |
+
"eval_f1": 0.9333123982923296,
|
939 |
+
"eval_loss": 0.39479872584342957,
|
940 |
+
"eval_runtime": 0.8765,
|
941 |
+
"eval_samples_per_second": 3979.645,
|
942 |
+
"eval_steps_per_second": 62.752,
|
943 |
+
"step": 12208
|
944 |
+
},
|
945 |
+
{
|
946 |
+
"epoch": 8.060288335517694,
|
947 |
+
"grad_norm": 2.1137709617614746,
|
948 |
+
"learning_rate": 2.9849279161205767e-05,
|
949 |
+
"loss": 0.0087,
|
950 |
+
"step": 12300
|
951 |
+
},
|
952 |
+
{
|
953 |
+
"epoch": 8.125819134993447,
|
954 |
+
"grad_norm": 0.14595501124858856,
|
955 |
+
"learning_rate": 2.9685452162516387e-05,
|
956 |
+
"loss": 0.0097,
|
957 |
+
"step": 12400
|
958 |
+
},
|
959 |
+
{
|
960 |
+
"epoch": 8.191349934469201,
|
961 |
+
"grad_norm": 0.038920313119888306,
|
962 |
+
"learning_rate": 2.9521625163826998e-05,
|
963 |
+
"loss": 0.0132,
|
964 |
+
"step": 12500
|
965 |
+
},
|
966 |
+
{
|
967 |
+
"epoch": 8.256880733944953,
|
968 |
+
"grad_norm": 0.03425636142492294,
|
969 |
+
"learning_rate": 2.9357798165137618e-05,
|
970 |
+
"loss": 0.0073,
|
971 |
+
"step": 12600
|
972 |
+
},
|
973 |
+
{
|
974 |
+
"epoch": 8.322411533420707,
|
975 |
+
"grad_norm": 0.23988936841487885,
|
976 |
+
"learning_rate": 2.919397116644823e-05,
|
977 |
+
"loss": 0.0091,
|
978 |
+
"step": 12700
|
979 |
+
},
|
980 |
+
{
|
981 |
+
"epoch": 8.387942332896461,
|
982 |
+
"grad_norm": 0.10584782809019089,
|
983 |
+
"learning_rate": 2.9030144167758848e-05,
|
984 |
+
"loss": 0.0083,
|
985 |
+
"step": 12800
|
986 |
+
},
|
987 |
+
{
|
988 |
+
"epoch": 8.453473132372215,
|
989 |
+
"grad_norm": 0.09316133707761765,
|
990 |
+
"learning_rate": 2.8866317169069462e-05,
|
991 |
+
"loss": 0.0107,
|
992 |
+
"step": 12900
|
993 |
+
},
|
994 |
+
{
|
995 |
+
"epoch": 8.51900393184797,
|
996 |
+
"grad_norm": 0.6492702960968018,
|
997 |
+
"learning_rate": 2.8702490170380082e-05,
|
998 |
+
"loss": 0.012,
|
999 |
+
"step": 13000
|
1000 |
+
},
|
1001 |
+
{
|
1002 |
+
"epoch": 8.584534731323721,
|
1003 |
+
"grad_norm": 0.19327221810817719,
|
1004 |
+
"learning_rate": 2.8538663171690692e-05,
|
1005 |
+
"loss": 0.0087,
|
1006 |
+
"step": 13100
|
1007 |
+
},
|
1008 |
+
{
|
1009 |
+
"epoch": 8.650065530799475,
|
1010 |
+
"grad_norm": 0.13705046474933624,
|
1011 |
+
"learning_rate": 2.8374836173001313e-05,
|
1012 |
+
"loss": 0.0082,
|
1013 |
+
"step": 13200
|
1014 |
+
},
|
1015 |
+
{
|
1016 |
+
"epoch": 8.715596330275229,
|
1017 |
+
"grad_norm": 0.16649670898914337,
|
1018 |
+
"learning_rate": 2.8211009174311926e-05,
|
1019 |
+
"loss": 0.0083,
|
1020 |
+
"step": 13300
|
1021 |
+
},
|
1022 |
+
{
|
1023 |
+
"epoch": 8.781127129750983,
|
1024 |
+
"grad_norm": 0.4147738218307495,
|
1025 |
+
"learning_rate": 2.8047182175622543e-05,
|
1026 |
+
"loss": 0.0099,
|
1027 |
+
"step": 13400
|
1028 |
+
},
|
1029 |
+
{
|
1030 |
+
"epoch": 8.846657929226737,
|
1031 |
+
"grad_norm": 0.2398168295621872,
|
1032 |
+
"learning_rate": 2.7883355176933163e-05,
|
1033 |
+
"loss": 0.0039,
|
1034 |
+
"step": 13500
|
1035 |
+
},
|
1036 |
+
{
|
1037 |
+
"epoch": 8.91218872870249,
|
1038 |
+
"grad_norm": 2.4962239265441895,
|
1039 |
+
"learning_rate": 2.7719528178243777e-05,
|
1040 |
+
"loss": 0.0173,
|
1041 |
+
"step": 13600
|
1042 |
+
},
|
1043 |
+
{
|
1044 |
+
"epoch": 8.977719528178245,
|
1045 |
+
"grad_norm": 0.03992962837219238,
|
1046 |
+
"learning_rate": 2.7555701179554394e-05,
|
1047 |
+
"loss": 0.0103,
|
1048 |
+
"step": 13700
|
1049 |
+
},
|
1050 |
+
{
|
1051 |
+
"epoch": 9.0,
|
1052 |
+
"eval_accuracy": 0.9409403669724771,
|
1053 |
+
"eval_f1": 0.939672650552811,
|
1054 |
+
"eval_loss": 0.3894253671169281,
|
1055 |
+
"eval_runtime": 0.8797,
|
1056 |
+
"eval_samples_per_second": 3965.161,
|
1057 |
+
"eval_steps_per_second": 62.524,
|
1058 |
+
"step": 13734
|
1059 |
+
},
|
1060 |
+
{
|
1061 |
+
"epoch": 9.043250327653997,
|
1062 |
+
"grad_norm": 0.19869303703308105,
|
1063 |
+
"learning_rate": 2.7391874180865007e-05,
|
1064 |
+
"loss": 0.0061,
|
1065 |
+
"step": 13800
|
1066 |
+
},
|
1067 |
+
{
|
1068 |
+
"epoch": 9.10878112712975,
|
1069 |
+
"grad_norm": 0.08110935240983963,
|
1070 |
+
"learning_rate": 2.7228047182175624e-05,
|
1071 |
+
"loss": 0.0086,
|
1072 |
+
"step": 13900
|
1073 |
+
},
|
1074 |
+
{
|
1075 |
+
"epoch": 9.174311926605505,
|
1076 |
+
"grad_norm": 3.7491238117218018,
|
1077 |
+
"learning_rate": 2.7064220183486238e-05,
|
1078 |
+
"loss": 0.0092,
|
1079 |
+
"step": 14000
|
1080 |
+
},
|
1081 |
+
{
|
1082 |
+
"epoch": 9.239842726081259,
|
1083 |
+
"grad_norm": 0.04541350528597832,
|
1084 |
+
"learning_rate": 2.6900393184796858e-05,
|
1085 |
+
"loss": 0.0125,
|
1086 |
+
"step": 14100
|
1087 |
+
},
|
1088 |
+
{
|
1089 |
+
"epoch": 9.305373525557012,
|
1090 |
+
"grad_norm": 0.02239099144935608,
|
1091 |
+
"learning_rate": 2.673656618610747e-05,
|
1092 |
+
"loss": 0.007,
|
1093 |
+
"step": 14200
|
1094 |
+
},
|
1095 |
+
{
|
1096 |
+
"epoch": 9.370904325032765,
|
1097 |
+
"grad_norm": 0.5650951862335205,
|
1098 |
+
"learning_rate": 2.657273918741809e-05,
|
1099 |
+
"loss": 0.0047,
|
1100 |
+
"step": 14300
|
1101 |
+
},
|
1102 |
+
{
|
1103 |
+
"epoch": 9.436435124508519,
|
1104 |
+
"grad_norm": 0.012850129045546055,
|
1105 |
+
"learning_rate": 2.6408912188728702e-05,
|
1106 |
+
"loss": 0.0107,
|
1107 |
+
"step": 14400
|
1108 |
+
},
|
1109 |
+
{
|
1110 |
+
"epoch": 9.501965923984272,
|
1111 |
+
"grad_norm": 0.015726063400506973,
|
1112 |
+
"learning_rate": 2.624508519003932e-05,
|
1113 |
+
"loss": 0.0156,
|
1114 |
+
"step": 14500
|
1115 |
+
},
|
1116 |
+
{
|
1117 |
+
"epoch": 9.567496723460026,
|
1118 |
+
"grad_norm": 0.0906534269452095,
|
1119 |
+
"learning_rate": 2.6081258191349932e-05,
|
1120 |
+
"loss": 0.0079,
|
1121 |
+
"step": 14600
|
1122 |
+
},
|
1123 |
+
{
|
1124 |
+
"epoch": 9.63302752293578,
|
1125 |
+
"grad_norm": 0.23419497907161713,
|
1126 |
+
"learning_rate": 2.5917431192660553e-05,
|
1127 |
+
"loss": 0.0051,
|
1128 |
+
"step": 14700
|
1129 |
+
},
|
1130 |
+
{
|
1131 |
+
"epoch": 9.698558322411534,
|
1132 |
+
"grad_norm": 0.0450860969722271,
|
1133 |
+
"learning_rate": 2.575360419397117e-05,
|
1134 |
+
"loss": 0.008,
|
1135 |
+
"step": 14800
|
1136 |
+
},
|
1137 |
+
{
|
1138 |
+
"epoch": 9.764089121887286,
|
1139 |
+
"grad_norm": 3.0917110443115234,
|
1140 |
+
"learning_rate": 2.5589777195281783e-05,
|
1141 |
+
"loss": 0.0044,
|
1142 |
+
"step": 14900
|
1143 |
+
},
|
1144 |
+
{
|
1145 |
+
"epoch": 9.82961992136304,
|
1146 |
+
"grad_norm": 1.0102367401123047,
|
1147 |
+
"learning_rate": 2.5425950196592403e-05,
|
1148 |
+
"loss": 0.0081,
|
1149 |
+
"step": 15000
|
1150 |
+
},
|
1151 |
+
{
|
1152 |
+
"epoch": 9.895150720838794,
|
1153 |
+
"grad_norm": 0.39359351992607117,
|
1154 |
+
"learning_rate": 2.5262123197903013e-05,
|
1155 |
+
"loss": 0.0061,
|
1156 |
+
"step": 15100
|
1157 |
+
},
|
1158 |
+
{
|
1159 |
+
"epoch": 9.960681520314548,
|
1160 |
+
"grad_norm": 0.03703628107905388,
|
1161 |
+
"learning_rate": 2.5098296199213634e-05,
|
1162 |
+
"loss": 0.0074,
|
1163 |
+
"step": 15200
|
1164 |
+
},
|
1165 |
+
{
|
1166 |
+
"epoch": 10.0,
|
1167 |
+
"eval_accuracy": 0.9423738532110092,
|
1168 |
+
"eval_f1": 0.9400918780505937,
|
1169 |
+
"eval_loss": 0.39878183603286743,
|
1170 |
+
"eval_runtime": 0.8893,
|
1171 |
+
"eval_samples_per_second": 3922.19,
|
1172 |
+
"eval_steps_per_second": 61.846,
|
1173 |
+
"step": 15260
|
1174 |
+
},
|
1175 |
+
{
|
1176 |
+
"epoch": 10.026212319790302,
|
1177 |
+
"grad_norm": 0.055755846202373505,
|
1178 |
+
"learning_rate": 2.4934469200524247e-05,
|
1179 |
+
"loss": 0.0079,
|
1180 |
+
"step": 15300
|
1181 |
+
},
|
1182 |
+
{
|
1183 |
+
"epoch": 10.091743119266056,
|
1184 |
+
"grad_norm": 0.21388879418373108,
|
1185 |
+
"learning_rate": 2.4770642201834864e-05,
|
1186 |
+
"loss": 0.0015,
|
1187 |
+
"step": 15400
|
1188 |
+
},
|
1189 |
+
{
|
1190 |
+
"epoch": 10.157273918741808,
|
1191 |
+
"grad_norm": 0.05140744522213936,
|
1192 |
+
"learning_rate": 2.460681520314548e-05,
|
1193 |
+
"loss": 0.0081,
|
1194 |
+
"step": 15500
|
1195 |
+
},
|
1196 |
+
{
|
1197 |
+
"epoch": 10.222804718217562,
|
1198 |
+
"grad_norm": 0.1071576178073883,
|
1199 |
+
"learning_rate": 2.4442988204456098e-05,
|
1200 |
+
"loss": 0.0109,
|
1201 |
+
"step": 15600
|
1202 |
+
},
|
1203 |
+
{
|
1204 |
+
"epoch": 10.288335517693316,
|
1205 |
+
"grad_norm": 0.037079449743032455,
|
1206 |
+
"learning_rate": 2.427916120576671e-05,
|
1207 |
+
"loss": 0.0025,
|
1208 |
+
"step": 15700
|
1209 |
+
},
|
1210 |
+
{
|
1211 |
+
"epoch": 10.35386631716907,
|
1212 |
+
"grad_norm": 0.08620253950357437,
|
1213 |
+
"learning_rate": 2.411533420707733e-05,
|
1214 |
+
"loss": 0.0067,
|
1215 |
+
"step": 15800
|
1216 |
+
},
|
1217 |
+
{
|
1218 |
+
"epoch": 10.419397116644824,
|
1219 |
+
"grad_norm": 0.053181178867816925,
|
1220 |
+
"learning_rate": 2.3951507208387945e-05,
|
1221 |
+
"loss": 0.003,
|
1222 |
+
"step": 15900
|
1223 |
+
},
|
1224 |
+
{
|
1225 |
+
"epoch": 10.484927916120578,
|
1226 |
+
"grad_norm": 0.019558211788535118,
|
1227 |
+
"learning_rate": 2.378768020969856e-05,
|
1228 |
+
"loss": 0.0015,
|
1229 |
+
"step": 16000
|
1230 |
+
},
|
1231 |
+
{
|
1232 |
+
"epoch": 10.55045871559633,
|
1233 |
+
"grad_norm": 0.5422232151031494,
|
1234 |
+
"learning_rate": 2.3623853211009176e-05,
|
1235 |
+
"loss": 0.0182,
|
1236 |
+
"step": 16100
|
1237 |
+
},
|
1238 |
+
{
|
1239 |
+
"epoch": 10.615989515072084,
|
1240 |
+
"grad_norm": 0.15415391325950623,
|
1241 |
+
"learning_rate": 2.3460026212319793e-05,
|
1242 |
+
"loss": 0.0043,
|
1243 |
+
"step": 16200
|
1244 |
+
},
|
1245 |
+
{
|
1246 |
+
"epoch": 10.681520314547837,
|
1247 |
+
"grad_norm": 0.33526667952537537,
|
1248 |
+
"learning_rate": 2.3296199213630406e-05,
|
1249 |
+
"loss": 0.0015,
|
1250 |
+
"step": 16300
|
1251 |
+
},
|
1252 |
+
{
|
1253 |
+
"epoch": 10.747051114023591,
|
1254 |
+
"grad_norm": 0.5435523986816406,
|
1255 |
+
"learning_rate": 2.3132372214941023e-05,
|
1256 |
+
"loss": 0.002,
|
1257 |
+
"step": 16400
|
1258 |
+
},
|
1259 |
+
{
|
1260 |
+
"epoch": 10.812581913499345,
|
1261 |
+
"grad_norm": 0.03789573162794113,
|
1262 |
+
"learning_rate": 2.296854521625164e-05,
|
1263 |
+
"loss": 0.0017,
|
1264 |
+
"step": 16500
|
1265 |
+
},
|
1266 |
+
{
|
1267 |
+
"epoch": 10.8781127129751,
|
1268 |
+
"grad_norm": 0.46037229895591736,
|
1269 |
+
"learning_rate": 2.2804718217562254e-05,
|
1270 |
+
"loss": 0.0047,
|
1271 |
+
"step": 16600
|
1272 |
+
},
|
1273 |
+
{
|
1274 |
+
"epoch": 10.943643512450851,
|
1275 |
+
"grad_norm": 0.018136654049158096,
|
1276 |
+
"learning_rate": 2.264089121887287e-05,
|
1277 |
+
"loss": 0.0026,
|
1278 |
+
"step": 16700
|
1279 |
+
},
|
1280 |
+
{
|
1281 |
+
"epoch": 11.0,
|
1282 |
+
"eval_accuracy": 0.9412270642201835,
|
1283 |
+
"eval_f1": 0.9393162647749803,
|
1284 |
+
"eval_loss": 0.3977106511592865,
|
1285 |
+
"eval_runtime": 0.8728,
|
1286 |
+
"eval_samples_per_second": 3996.231,
|
1287 |
+
"eval_steps_per_second": 63.014,
|
1288 |
+
"step": 16786
|
1289 |
+
},
|
1290 |
+
{
|
1291 |
+
"epoch": 11.009174311926605,
|
1292 |
+
"grad_norm": 0.029348287731409073,
|
1293 |
+
"learning_rate": 2.2477064220183487e-05,
|
1294 |
+
"loss": 0.0071,
|
1295 |
+
"step": 16800
|
1296 |
+
},
|
1297 |
+
{
|
1298 |
+
"epoch": 11.07470511140236,
|
1299 |
+
"grad_norm": 0.030079521238803864,
|
1300 |
+
"learning_rate": 2.2313237221494104e-05,
|
1301 |
+
"loss": 0.0051,
|
1302 |
+
"step": 16900
|
1303 |
+
},
|
1304 |
+
{
|
1305 |
+
"epoch": 11.140235910878113,
|
1306 |
+
"grad_norm": 0.014014150016009808,
|
1307 |
+
"learning_rate": 2.214941022280472e-05,
|
1308 |
+
"loss": 0.0047,
|
1309 |
+
"step": 17000
|
1310 |
+
},
|
1311 |
+
{
|
1312 |
+
"epoch": 11.205766710353867,
|
1313 |
+
"grad_norm": 0.028272485360503197,
|
1314 |
+
"learning_rate": 2.1985583224115335e-05,
|
1315 |
+
"loss": 0.0027,
|
1316 |
+
"step": 17100
|
1317 |
+
},
|
1318 |
+
{
|
1319 |
+
"epoch": 11.271297509829619,
|
1320 |
+
"grad_norm": 0.008005212992429733,
|
1321 |
+
"learning_rate": 2.182175622542595e-05,
|
1322 |
+
"loss": 0.0021,
|
1323 |
+
"step": 17200
|
1324 |
+
},
|
1325 |
+
{
|
1326 |
+
"epoch": 11.336828309305373,
|
1327 |
+
"grad_norm": 0.18947385251522064,
|
1328 |
+
"learning_rate": 2.165792922673657e-05,
|
1329 |
+
"loss": 0.0021,
|
1330 |
+
"step": 17300
|
1331 |
+
},
|
1332 |
+
{
|
1333 |
+
"epoch": 11.402359108781127,
|
1334 |
+
"grad_norm": 0.021595077589154243,
|
1335 |
+
"learning_rate": 2.1494102228047182e-05,
|
1336 |
+
"loss": 0.0112,
|
1337 |
+
"step": 17400
|
1338 |
+
},
|
1339 |
+
{
|
1340 |
+
"epoch": 11.46788990825688,
|
1341 |
+
"grad_norm": 0.10984991490840912,
|
1342 |
+
"learning_rate": 2.13302752293578e-05,
|
1343 |
+
"loss": 0.0023,
|
1344 |
+
"step": 17500
|
1345 |
+
},
|
1346 |
+
{
|
1347 |
+
"epoch": 11.533420707732635,
|
1348 |
+
"grad_norm": 0.04003112018108368,
|
1349 |
+
"learning_rate": 2.1166448230668416e-05,
|
1350 |
+
"loss": 0.0069,
|
1351 |
+
"step": 17600
|
1352 |
+
},
|
1353 |
+
{
|
1354 |
+
"epoch": 11.598951507208389,
|
1355 |
+
"grad_norm": 0.03961130604147911,
|
1356 |
+
"learning_rate": 2.100262123197903e-05,
|
1357 |
+
"loss": 0.0045,
|
1358 |
+
"step": 17700
|
1359 |
+
},
|
1360 |
+
{
|
1361 |
+
"epoch": 11.66448230668414,
|
1362 |
+
"grad_norm": 1.8393652439117432,
|
1363 |
+
"learning_rate": 2.0838794233289646e-05,
|
1364 |
+
"loss": 0.0074,
|
1365 |
+
"step": 17800
|
1366 |
+
},
|
1367 |
+
{
|
1368 |
+
"epoch": 11.730013106159895,
|
1369 |
+
"grad_norm": 0.05228583887219429,
|
1370 |
+
"learning_rate": 2.0674967234600263e-05,
|
1371 |
+
"loss": 0.0074,
|
1372 |
+
"step": 17900
|
1373 |
+
},
|
1374 |
+
{
|
1375 |
+
"epoch": 11.795543905635649,
|
1376 |
+
"grad_norm": 0.06190050393342972,
|
1377 |
+
"learning_rate": 2.0511140235910877e-05,
|
1378 |
+
"loss": 0.0017,
|
1379 |
+
"step": 18000
|
1380 |
+
},
|
1381 |
+
{
|
1382 |
+
"epoch": 11.861074705111402,
|
1383 |
+
"grad_norm": 0.06853855401277542,
|
1384 |
+
"learning_rate": 2.0347313237221497e-05,
|
1385 |
+
"loss": 0.0065,
|
1386 |
+
"step": 18100
|
1387 |
+
},
|
1388 |
+
{
|
1389 |
+
"epoch": 11.926605504587156,
|
1390 |
+
"grad_norm": 0.13988357782363892,
|
1391 |
+
"learning_rate": 2.018348623853211e-05,
|
1392 |
+
"loss": 0.0027,
|
1393 |
+
"step": 18200
|
1394 |
+
},
|
1395 |
+
{
|
1396 |
+
"epoch": 11.99213630406291,
|
1397 |
+
"grad_norm": 0.13300713896751404,
|
1398 |
+
"learning_rate": 2.0019659239842727e-05,
|
1399 |
+
"loss": 0.0006,
|
1400 |
+
"step": 18300
|
1401 |
+
},
|
1402 |
+
{
|
1403 |
+
"epoch": 12.0,
|
1404 |
+
"eval_accuracy": 0.9463876146788991,
|
1405 |
+
"eval_f1": 0.9448165287184782,
|
1406 |
+
"eval_loss": 0.37657901644706726,
|
1407 |
+
"eval_runtime": 0.8735,
|
1408 |
+
"eval_samples_per_second": 3992.99,
|
1409 |
+
"eval_steps_per_second": 62.963,
|
1410 |
+
"step": 18312
|
1411 |
+
},
|
1412 |
+
{
|
1413 |
+
"epoch": 12.057667103538662,
|
1414 |
+
"grad_norm": 0.021236807107925415,
|
1415 |
+
"learning_rate": 1.9855832241153344e-05,
|
1416 |
+
"loss": 0.0056,
|
1417 |
+
"step": 18400
|
1418 |
+
},
|
1419 |
+
{
|
1420 |
+
"epoch": 12.123197903014416,
|
1421 |
+
"grad_norm": 0.03615666553378105,
|
1422 |
+
"learning_rate": 1.9692005242463958e-05,
|
1423 |
+
"loss": 0.0061,
|
1424 |
+
"step": 18500
|
1425 |
+
},
|
1426 |
+
{
|
1427 |
+
"epoch": 12.18872870249017,
|
1428 |
+
"grad_norm": 0.045921873301267624,
|
1429 |
+
"learning_rate": 1.9528178243774575e-05,
|
1430 |
+
"loss": 0.0007,
|
1431 |
+
"step": 18600
|
1432 |
+
},
|
1433 |
+
{
|
1434 |
+
"epoch": 12.254259501965924,
|
1435 |
+
"grad_norm": 0.023187097162008286,
|
1436 |
+
"learning_rate": 1.9364351245085192e-05,
|
1437 |
+
"loss": 0.0006,
|
1438 |
+
"step": 18700
|
1439 |
+
},
|
1440 |
+
{
|
1441 |
+
"epoch": 12.319790301441678,
|
1442 |
+
"grad_norm": 0.007863562554121017,
|
1443 |
+
"learning_rate": 1.9200524246395805e-05,
|
1444 |
+
"loss": 0.0027,
|
1445 |
+
"step": 18800
|
1446 |
+
},
|
1447 |
+
{
|
1448 |
+
"epoch": 12.385321100917432,
|
1449 |
+
"grad_norm": 0.028795627877116203,
|
1450 |
+
"learning_rate": 1.9036697247706422e-05,
|
1451 |
+
"loss": 0.0076,
|
1452 |
+
"step": 18900
|
1453 |
+
},
|
1454 |
+
{
|
1455 |
+
"epoch": 12.450851900393184,
|
1456 |
+
"grad_norm": 0.030297929421067238,
|
1457 |
+
"learning_rate": 1.887287024901704e-05,
|
1458 |
+
"loss": 0.0009,
|
1459 |
+
"step": 19000
|
1460 |
+
},
|
1461 |
+
{
|
1462 |
+
"epoch": 12.516382699868938,
|
1463 |
+
"grad_norm": 1.041812777519226,
|
1464 |
+
"learning_rate": 1.8709043250327653e-05,
|
1465 |
+
"loss": 0.0009,
|
1466 |
+
"step": 19100
|
1467 |
+
},
|
1468 |
+
{
|
1469 |
+
"epoch": 12.581913499344692,
|
1470 |
+
"grad_norm": 0.04054298996925354,
|
1471 |
+
"learning_rate": 1.854521625163827e-05,
|
1472 |
+
"loss": 0.0084,
|
1473 |
+
"step": 19200
|
1474 |
+
},
|
1475 |
+
{
|
1476 |
+
"epoch": 12.647444298820446,
|
1477 |
+
"grad_norm": 0.019086740911006927,
|
1478 |
+
"learning_rate": 1.8381389252948886e-05,
|
1479 |
+
"loss": 0.0011,
|
1480 |
+
"step": 19300
|
1481 |
+
},
|
1482 |
+
{
|
1483 |
+
"epoch": 12.7129750982962,
|
1484 |
+
"grad_norm": 0.04095865413546562,
|
1485 |
+
"learning_rate": 1.82175622542595e-05,
|
1486 |
+
"loss": 0.0009,
|
1487 |
+
"step": 19400
|
1488 |
+
},
|
1489 |
+
{
|
1490 |
+
"epoch": 12.778505897771954,
|
1491 |
+
"grad_norm": 0.021935787051916122,
|
1492 |
+
"learning_rate": 1.805373525557012e-05,
|
1493 |
+
"loss": 0.0005,
|
1494 |
+
"step": 19500
|
1495 |
+
},
|
1496 |
+
{
|
1497 |
+
"epoch": 12.844036697247706,
|
1498 |
+
"grad_norm": 0.1897253543138504,
|
1499 |
+
"learning_rate": 1.7889908256880737e-05,
|
1500 |
+
"loss": 0.0019,
|
1501 |
+
"step": 19600
|
1502 |
+
},
|
1503 |
+
{
|
1504 |
+
"epoch": 12.90956749672346,
|
1505 |
+
"grad_norm": 0.07480347901582718,
|
1506 |
+
"learning_rate": 1.772608125819135e-05,
|
1507 |
+
"loss": 0.0013,
|
1508 |
+
"step": 19700
|
1509 |
+
},
|
1510 |
+
{
|
1511 |
+
"epoch": 12.975098296199214,
|
1512 |
+
"grad_norm": 0.00786515325307846,
|
1513 |
+
"learning_rate": 1.7562254259501968e-05,
|
1514 |
+
"loss": 0.0005,
|
1515 |
+
"step": 19800
|
1516 |
+
},
|
1517 |
+
{
|
1518 |
+
"epoch": 13.0,
|
1519 |
+
"eval_accuracy": 0.9463876146788991,
|
1520 |
+
"eval_f1": 0.9448447433682525,
|
1521 |
+
"eval_loss": 0.40437009930610657,
|
1522 |
+
"eval_runtime": 0.8642,
|
1523 |
+
"eval_samples_per_second": 4036.236,
|
1524 |
+
"eval_steps_per_second": 63.645,
|
1525 |
+
"step": 19838
|
1526 |
+
},
|
1527 |
+
{
|
1528 |
+
"epoch": 13.040629095674968,
|
1529 |
+
"grad_norm": 0.26656442880630493,
|
1530 |
+
"learning_rate": 1.7398427260812584e-05,
|
1531 |
+
"loss": 0.0039,
|
1532 |
+
"step": 19900
|
1533 |
+
},
|
1534 |
+
{
|
1535 |
+
"epoch": 13.106159895150721,
|
1536 |
+
"grad_norm": 0.015545975416898727,
|
1537 |
+
"learning_rate": 1.7234600262123198e-05,
|
1538 |
+
"loss": 0.0006,
|
1539 |
+
"step": 20000
|
1540 |
+
},
|
1541 |
+
{
|
1542 |
+
"epoch": 13.171690694626474,
|
1543 |
+
"grad_norm": 0.012939069420099258,
|
1544 |
+
"learning_rate": 1.7070773263433815e-05,
|
1545 |
+
"loss": 0.0009,
|
1546 |
+
"step": 20100
|
1547 |
+
},
|
1548 |
+
{
|
1549 |
+
"epoch": 13.237221494102227,
|
1550 |
+
"grad_norm": 0.013902663253247738,
|
1551 |
+
"learning_rate": 1.6906946264744432e-05,
|
1552 |
+
"loss": 0.001,
|
1553 |
+
"step": 20200
|
1554 |
+
},
|
1555 |
+
{
|
1556 |
+
"epoch": 13.302752293577981,
|
1557 |
+
"grad_norm": 0.12842603027820587,
|
1558 |
+
"learning_rate": 1.6743119266055045e-05,
|
1559 |
+
"loss": 0.0007,
|
1560 |
+
"step": 20300
|
1561 |
+
},
|
1562 |
+
{
|
1563 |
+
"epoch": 13.368283093053735,
|
1564 |
+
"grad_norm": 0.01566697470843792,
|
1565 |
+
"learning_rate": 1.6579292267365662e-05,
|
1566 |
+
"loss": 0.0017,
|
1567 |
+
"step": 20400
|
1568 |
+
},
|
1569 |
+
{
|
1570 |
+
"epoch": 13.43381389252949,
|
1571 |
+
"grad_norm": 0.028622334823012352,
|
1572 |
+
"learning_rate": 1.641546526867628e-05,
|
1573 |
+
"loss": 0.0005,
|
1574 |
+
"step": 20500
|
1575 |
+
},
|
1576 |
+
{
|
1577 |
+
"epoch": 13.499344692005243,
|
1578 |
+
"grad_norm": 0.01282609160989523,
|
1579 |
+
"learning_rate": 1.6251638269986893e-05,
|
1580 |
+
"loss": 0.0007,
|
1581 |
+
"step": 20600
|
1582 |
+
},
|
1583 |
+
{
|
1584 |
+
"epoch": 13.564875491480995,
|
1585 |
+
"grad_norm": 0.0726955458521843,
|
1586 |
+
"learning_rate": 1.608781127129751e-05,
|
1587 |
+
"loss": 0.0004,
|
1588 |
+
"step": 20700
|
1589 |
+
},
|
1590 |
+
{
|
1591 |
+
"epoch": 13.63040629095675,
|
1592 |
+
"grad_norm": 0.0037081395275890827,
|
1593 |
+
"learning_rate": 1.5923984272608126e-05,
|
1594 |
+
"loss": 0.0035,
|
1595 |
+
"step": 20800
|
1596 |
+
},
|
1597 |
+
{
|
1598 |
+
"epoch": 13.695937090432503,
|
1599 |
+
"grad_norm": 0.11256258934736252,
|
1600 |
+
"learning_rate": 1.5760157273918743e-05,
|
1601 |
+
"loss": 0.0009,
|
1602 |
+
"step": 20900
|
1603 |
+
},
|
1604 |
+
{
|
1605 |
+
"epoch": 13.761467889908257,
|
1606 |
+
"grad_norm": 0.012089048512279987,
|
1607 |
+
"learning_rate": 1.559633027522936e-05,
|
1608 |
+
"loss": 0.0004,
|
1609 |
+
"step": 21000
|
1610 |
+
},
|
1611 |
+
{
|
1612 |
+
"epoch": 13.82699868938401,
|
1613 |
+
"grad_norm": 0.008633548393845558,
|
1614 |
+
"learning_rate": 1.5432503276539974e-05,
|
1615 |
+
"loss": 0.0049,
|
1616 |
+
"step": 21100
|
1617 |
+
},
|
1618 |
+
{
|
1619 |
+
"epoch": 13.892529488859765,
|
1620 |
+
"grad_norm": 0.012773215770721436,
|
1621 |
+
"learning_rate": 1.526867627785059e-05,
|
1622 |
+
"loss": 0.0051,
|
1623 |
+
"step": 21200
|
1624 |
+
},
|
1625 |
+
{
|
1626 |
+
"epoch": 13.958060288335517,
|
1627 |
+
"grad_norm": 0.5564557909965515,
|
1628 |
+
"learning_rate": 1.5104849279161206e-05,
|
1629 |
+
"loss": 0.0026,
|
1630 |
+
"step": 21300
|
1631 |
+
},
|
1632 |
+
{
|
1633 |
+
"epoch": 14.0,
|
1634 |
+
"eval_accuracy": 0.9475344036697247,
|
1635 |
+
"eval_f1": 0.9462485272118298,
|
1636 |
+
"eval_loss": 0.3972223997116089,
|
1637 |
+
"eval_runtime": 0.8884,
|
1638 |
+
"eval_samples_per_second": 3926.295,
|
1639 |
+
"eval_steps_per_second": 61.911,
|
1640 |
+
"step": 21364
|
1641 |
+
},
|
1642 |
+
{
|
1643 |
+
"epoch": 14.02359108781127,
|
1644 |
+
"grad_norm": 0.014650699682533741,
|
1645 |
+
"learning_rate": 1.4941022280471823e-05,
|
1646 |
+
"loss": 0.0004,
|
1647 |
+
"step": 21400
|
1648 |
+
},
|
1649 |
+
{
|
1650 |
+
"epoch": 14.089121887287025,
|
1651 |
+
"grad_norm": 0.7793611288070679,
|
1652 |
+
"learning_rate": 1.4777195281782438e-05,
|
1653 |
+
"loss": 0.0004,
|
1654 |
+
"step": 21500
|
1655 |
+
},
|
1656 |
+
{
|
1657 |
+
"epoch": 14.154652686762779,
|
1658 |
+
"grad_norm": 0.27260562777519226,
|
1659 |
+
"learning_rate": 1.4613368283093053e-05,
|
1660 |
+
"loss": 0.0003,
|
1661 |
+
"step": 21600
|
1662 |
+
},
|
1663 |
+
{
|
1664 |
+
"epoch": 14.220183486238533,
|
1665 |
+
"grad_norm": 0.0209233146160841,
|
1666 |
+
"learning_rate": 1.444954128440367e-05,
|
1667 |
+
"loss": 0.0004,
|
1668 |
+
"step": 21700
|
1669 |
+
},
|
1670 |
+
{
|
1671 |
+
"epoch": 14.285714285714286,
|
1672 |
+
"grad_norm": 0.009809763170778751,
|
1673 |
+
"learning_rate": 1.4285714285714285e-05,
|
1674 |
+
"loss": 0.0009,
|
1675 |
+
"step": 21800
|
1676 |
+
},
|
1677 |
+
{
|
1678 |
+
"epoch": 14.351245085190039,
|
1679 |
+
"grad_norm": 0.004338541068136692,
|
1680 |
+
"learning_rate": 1.41218872870249e-05,
|
1681 |
+
"loss": 0.001,
|
1682 |
+
"step": 21900
|
1683 |
+
},
|
1684 |
+
{
|
1685 |
+
"epoch": 14.416775884665793,
|
1686 |
+
"grad_norm": 0.05535097420215607,
|
1687 |
+
"learning_rate": 1.3958060288335518e-05,
|
1688 |
+
"loss": 0.0003,
|
1689 |
+
"step": 22000
|
1690 |
+
},
|
1691 |
+
{
|
1692 |
+
"epoch": 14.482306684141546,
|
1693 |
+
"grad_norm": 0.013605128042399883,
|
1694 |
+
"learning_rate": 1.3794233289646136e-05,
|
1695 |
+
"loss": 0.0007,
|
1696 |
+
"step": 22100
|
1697 |
+
},
|
1698 |
+
{
|
1699 |
+
"epoch": 14.5478374836173,
|
1700 |
+
"grad_norm": 0.05343281850218773,
|
1701 |
+
"learning_rate": 1.3630406290956751e-05,
|
1702 |
+
"loss": 0.0076,
|
1703 |
+
"step": 22200
|
1704 |
+
},
|
1705 |
+
{
|
1706 |
+
"epoch": 14.613368283093054,
|
1707 |
+
"grad_norm": 0.03259427472949028,
|
1708 |
+
"learning_rate": 1.3466579292267367e-05,
|
1709 |
+
"loss": 0.0006,
|
1710 |
+
"step": 22300
|
1711 |
+
},
|
1712 |
+
{
|
1713 |
+
"epoch": 14.678899082568808,
|
1714 |
+
"grad_norm": 0.01491202600300312,
|
1715 |
+
"learning_rate": 1.3302752293577984e-05,
|
1716 |
+
"loss": 0.0003,
|
1717 |
+
"step": 22400
|
1718 |
+
},
|
1719 |
+
{
|
1720 |
+
"epoch": 14.74442988204456,
|
1721 |
+
"grad_norm": 0.04238777980208397,
|
1722 |
+
"learning_rate": 1.3138925294888599e-05,
|
1723 |
+
"loss": 0.0048,
|
1724 |
+
"step": 22500
|
1725 |
+
},
|
1726 |
+
{
|
1727 |
+
"epoch": 14.809960681520314,
|
1728 |
+
"grad_norm": 0.07502109557390213,
|
1729 |
+
"learning_rate": 1.2975098296199214e-05,
|
1730 |
+
"loss": 0.0003,
|
1731 |
+
"step": 22600
|
1732 |
+
},
|
1733 |
+
{
|
1734 |
+
"epoch": 14.875491480996068,
|
1735 |
+
"grad_norm": 0.013529137708246708,
|
1736 |
+
"learning_rate": 1.281127129750983e-05,
|
1737 |
+
"loss": 0.0003,
|
1738 |
+
"step": 22700
|
1739 |
+
},
|
1740 |
+
{
|
1741 |
+
"epoch": 14.941022280471822,
|
1742 |
+
"grad_norm": 0.005839935038238764,
|
1743 |
+
"learning_rate": 1.2647444298820446e-05,
|
1744 |
+
"loss": 0.0046,
|
1745 |
+
"step": 22800
|
1746 |
+
},
|
1747 |
+
{
|
1748 |
+
"epoch": 15.0,
|
1749 |
+
"eval_accuracy": 0.9489678899082569,
|
1750 |
+
"eval_f1": 0.947361701117435,
|
1751 |
+
"eval_loss": 0.3973062038421631,
|
1752 |
+
"eval_runtime": 0.8841,
|
1753 |
+
"eval_samples_per_second": 3945.29,
|
1754 |
+
"eval_steps_per_second": 62.211,
|
1755 |
+
"step": 22890
|
1756 |
+
},
|
1757 |
+
{
|
1758 |
+
"epoch": 15.006553079947576,
|
1759 |
+
"grad_norm": 0.017636075615882874,
|
1760 |
+
"learning_rate": 1.2483617300131061e-05,
|
1761 |
+
"loss": 0.0024,
|
1762 |
+
"step": 22900
|
1763 |
+
},
|
1764 |
+
{
|
1765 |
+
"epoch": 15.07208387942333,
|
1766 |
+
"grad_norm": 0.022421281784772873,
|
1767 |
+
"learning_rate": 1.2319790301441678e-05,
|
1768 |
+
"loss": 0.0031,
|
1769 |
+
"step": 23000
|
1770 |
+
},
|
1771 |
+
{
|
1772 |
+
"epoch": 15.137614678899082,
|
1773 |
+
"grad_norm": 0.01283260341733694,
|
1774 |
+
"learning_rate": 1.2155963302752295e-05,
|
1775 |
+
"loss": 0.0003,
|
1776 |
+
"step": 23100
|
1777 |
+
},
|
1778 |
+
{
|
1779 |
+
"epoch": 15.203145478374836,
|
1780 |
+
"grad_norm": 0.007434241008013487,
|
1781 |
+
"learning_rate": 1.199213630406291e-05,
|
1782 |
+
"loss": 0.0061,
|
1783 |
+
"step": 23200
|
1784 |
+
},
|
1785 |
+
{
|
1786 |
+
"epoch": 15.26867627785059,
|
1787 |
+
"grad_norm": 0.02019626460969448,
|
1788 |
+
"learning_rate": 1.1828309305373527e-05,
|
1789 |
+
"loss": 0.0003,
|
1790 |
+
"step": 23300
|
1791 |
+
},
|
1792 |
+
{
|
1793 |
+
"epoch": 15.334207077326344,
|
1794 |
+
"grad_norm": 0.010890827514231205,
|
1795 |
+
"learning_rate": 1.1664482306684142e-05,
|
1796 |
+
"loss": 0.0002,
|
1797 |
+
"step": 23400
|
1798 |
+
},
|
1799 |
+
{
|
1800 |
+
"epoch": 15.399737876802098,
|
1801 |
+
"grad_norm": 0.016969241201877594,
|
1802 |
+
"learning_rate": 1.1500655307994758e-05,
|
1803 |
+
"loss": 0.0075,
|
1804 |
+
"step": 23500
|
1805 |
+
},
|
1806 |
+
{
|
1807 |
+
"epoch": 15.46526867627785,
|
1808 |
+
"grad_norm": 0.02037014067173004,
|
1809 |
+
"learning_rate": 1.1336828309305373e-05,
|
1810 |
+
"loss": 0.0007,
|
1811 |
+
"step": 23600
|
1812 |
+
},
|
1813 |
+
{
|
1814 |
+
"epoch": 15.530799475753604,
|
1815 |
+
"grad_norm": 0.00850609689950943,
|
1816 |
+
"learning_rate": 1.117300131061599e-05,
|
1817 |
+
"loss": 0.0002,
|
1818 |
+
"step": 23700
|
1819 |
+
},
|
1820 |
+
{
|
1821 |
+
"epoch": 15.596330275229358,
|
1822 |
+
"grad_norm": 0.003405163995921612,
|
1823 |
+
"learning_rate": 1.1009174311926607e-05,
|
1824 |
+
"loss": 0.0024,
|
1825 |
+
"step": 23800
|
1826 |
+
},
|
1827 |
+
{
|
1828 |
+
"epoch": 15.661861074705111,
|
1829 |
+
"grad_norm": 0.013705270364880562,
|
1830 |
+
"learning_rate": 1.0845347313237222e-05,
|
1831 |
+
"loss": 0.0002,
|
1832 |
+
"step": 23900
|
1833 |
+
},
|
1834 |
+
{
|
1835 |
+
"epoch": 15.727391874180865,
|
1836 |
+
"grad_norm": 0.00401644641533494,
|
1837 |
+
"learning_rate": 1.0681520314547839e-05,
|
1838 |
+
"loss": 0.0002,
|
1839 |
+
"step": 24000
|
1840 |
+
},
|
1841 |
+
{
|
1842 |
+
"epoch": 15.79292267365662,
|
1843 |
+
"grad_norm": 0.06715580821037292,
|
1844 |
+
"learning_rate": 1.0517693315858454e-05,
|
1845 |
+
"loss": 0.0068,
|
1846 |
+
"step": 24100
|
1847 |
+
},
|
1848 |
+
{
|
1849 |
+
"epoch": 15.858453473132371,
|
1850 |
+
"grad_norm": 0.007226752582937479,
|
1851 |
+
"learning_rate": 1.035386631716907e-05,
|
1852 |
+
"loss": 0.0003,
|
1853 |
+
"step": 24200
|
1854 |
+
},
|
1855 |
+
{
|
1856 |
+
"epoch": 15.923984272608125,
|
1857 |
+
"grad_norm": 0.07869122177362442,
|
1858 |
+
"learning_rate": 1.0190039318479686e-05,
|
1859 |
+
"loss": 0.0016,
|
1860 |
+
"step": 24300
|
1861 |
+
},
|
1862 |
+
{
|
1863 |
+
"epoch": 15.98951507208388,
|
1864 |
+
"grad_norm": 0.010308779776096344,
|
1865 |
+
"learning_rate": 1.0026212319790301e-05,
|
1866 |
+
"loss": 0.0001,
|
1867 |
+
"step": 24400
|
1868 |
+
},
|
1869 |
+
{
|
1870 |
+
"epoch": 16.0,
|
1871 |
+
"eval_accuracy": 0.9492545871559633,
|
1872 |
+
"eval_f1": 0.9480250779419357,
|
1873 |
+
"eval_loss": 0.40250906348228455,
|
1874 |
+
"eval_runtime": 0.885,
|
1875 |
+
"eval_samples_per_second": 3941.411,
|
1876 |
+
"eval_steps_per_second": 62.15,
|
1877 |
+
"step": 24416
|
1878 |
+
},
|
1879 |
+
{
|
1880 |
+
"epoch": 16.05504587155963,
|
1881 |
+
"grad_norm": 0.011048965156078339,
|
1882 |
+
"learning_rate": 9.862385321100918e-06,
|
1883 |
+
"loss": 0.0002,
|
1884 |
+
"step": 24500
|
1885 |
+
},
|
1886 |
+
{
|
1887 |
+
"epoch": 16.120576671035387,
|
1888 |
+
"grad_norm": 0.005700926296412945,
|
1889 |
+
"learning_rate": 9.698558322411533e-06,
|
1890 |
+
"loss": 0.001,
|
1891 |
+
"step": 24600
|
1892 |
+
},
|
1893 |
+
{
|
1894 |
+
"epoch": 16.18610747051114,
|
1895 |
+
"grad_norm": 0.00868783425539732,
|
1896 |
+
"learning_rate": 9.53473132372215e-06,
|
1897 |
+
"loss": 0.0002,
|
1898 |
+
"step": 24700
|
1899 |
+
},
|
1900 |
+
{
|
1901 |
+
"epoch": 16.251638269986895,
|
1902 |
+
"grad_norm": 0.003008009400218725,
|
1903 |
+
"learning_rate": 9.370904325032766e-06,
|
1904 |
+
"loss": 0.0002,
|
1905 |
+
"step": 24800
|
1906 |
+
},
|
1907 |
+
{
|
1908 |
+
"epoch": 16.317169069462647,
|
1909 |
+
"grad_norm": 0.039118170738220215,
|
1910 |
+
"learning_rate": 9.20707732634338e-06,
|
1911 |
+
"loss": 0.0017,
|
1912 |
+
"step": 24900
|
1913 |
+
},
|
1914 |
+
{
|
1915 |
+
"epoch": 16.382699868938403,
|
1916 |
+
"grad_norm": 0.003417972009629011,
|
1917 |
+
"learning_rate": 9.043250327653998e-06,
|
1918 |
+
"loss": 0.0002,
|
1919 |
+
"step": 25000
|
1920 |
+
},
|
1921 |
+
{
|
1922 |
+
"epoch": 16.448230668414155,
|
1923 |
+
"grad_norm": 0.012208909727633,
|
1924 |
+
"learning_rate": 8.879423328964615e-06,
|
1925 |
+
"loss": 0.0002,
|
1926 |
+
"step": 25100
|
1927 |
+
},
|
1928 |
+
{
|
1929 |
+
"epoch": 16.513761467889907,
|
1930 |
+
"grad_norm": 0.015429310500621796,
|
1931 |
+
"learning_rate": 8.71559633027523e-06,
|
1932 |
+
"loss": 0.0001,
|
1933 |
+
"step": 25200
|
1934 |
+
},
|
1935 |
+
{
|
1936 |
+
"epoch": 16.579292267365663,
|
1937 |
+
"grad_norm": 0.007409450598061085,
|
1938 |
+
"learning_rate": 8.551769331585847e-06,
|
1939 |
+
"loss": 0.0004,
|
1940 |
+
"step": 25300
|
1941 |
+
},
|
1942 |
+
{
|
1943 |
+
"epoch": 16.644823066841415,
|
1944 |
+
"grad_norm": 0.009294740855693817,
|
1945 |
+
"learning_rate": 8.387942332896462e-06,
|
1946 |
+
"loss": 0.0002,
|
1947 |
+
"step": 25400
|
1948 |
+
},
|
1949 |
+
{
|
1950 |
+
"epoch": 16.71035386631717,
|
1951 |
+
"grad_norm": 0.0043761348351836205,
|
1952 |
+
"learning_rate": 8.224115334207077e-06,
|
1953 |
+
"loss": 0.0033,
|
1954 |
+
"step": 25500
|
1955 |
+
},
|
1956 |
+
{
|
1957 |
+
"epoch": 16.775884665792923,
|
1958 |
+
"grad_norm": 0.017104586586356163,
|
1959 |
+
"learning_rate": 8.060288335517694e-06,
|
1960 |
+
"loss": 0.0002,
|
1961 |
+
"step": 25600
|
1962 |
+
},
|
1963 |
+
{
|
1964 |
+
"epoch": 16.841415465268675,
|
1965 |
+
"grad_norm": 0.0103053729981184,
|
1966 |
+
"learning_rate": 7.89646133682831e-06,
|
1967 |
+
"loss": 0.0002,
|
1968 |
+
"step": 25700
|
1969 |
+
},
|
1970 |
+
{
|
1971 |
+
"epoch": 16.90694626474443,
|
1972 |
+
"grad_norm": 0.008107037283480167,
|
1973 |
+
"learning_rate": 7.732634338138926e-06,
|
1974 |
+
"loss": 0.0002,
|
1975 |
+
"step": 25800
|
1976 |
+
},
|
1977 |
+
{
|
1978 |
+
"epoch": 16.972477064220183,
|
1979 |
+
"grad_norm": 0.025965586304664612,
|
1980 |
+
"learning_rate": 7.568807339449542e-06,
|
1981 |
+
"loss": 0.0003,
|
1982 |
+
"step": 25900
|
1983 |
+
},
|
1984 |
+
{
|
1985 |
+
"epoch": 17.0,
|
1986 |
+
"eval_accuracy": 0.950401376146789,
|
1987 |
+
"eval_f1": 0.949057083016372,
|
1988 |
+
"eval_loss": 0.3941075801849365,
|
1989 |
+
"eval_runtime": 0.8787,
|
1990 |
+
"eval_samples_per_second": 3969.396,
|
1991 |
+
"eval_steps_per_second": 62.591,
|
1992 |
+
"step": 25942
|
1993 |
+
},
|
1994 |
+
{
|
1995 |
+
"epoch": 17.03800786369594,
|
1996 |
+
"grad_norm": 0.0071102771908044815,
|
1997 |
+
"learning_rate": 7.4049803407601575e-06,
|
1998 |
+
"loss": 0.0001,
|
1999 |
+
"step": 26000
|
2000 |
+
},
|
2001 |
+
{
|
2002 |
+
"epoch": 17.10353866317169,
|
2003 |
+
"grad_norm": 0.0024903868325054646,
|
2004 |
+
"learning_rate": 7.241153342070774e-06,
|
2005 |
+
"loss": 0.0003,
|
2006 |
+
"step": 26100
|
2007 |
+
},
|
2008 |
+
{
|
2009 |
+
"epoch": 17.169069462647446,
|
2010 |
+
"grad_norm": 0.00496539194136858,
|
2011 |
+
"learning_rate": 7.07732634338139e-06,
|
2012 |
+
"loss": 0.0001,
|
2013 |
+
"step": 26200
|
2014 |
+
},
|
2015 |
+
{
|
2016 |
+
"epoch": 17.234600262123198,
|
2017 |
+
"grad_norm": 0.005121257156133652,
|
2018 |
+
"learning_rate": 6.913499344692005e-06,
|
2019 |
+
"loss": 0.0001,
|
2020 |
+
"step": 26300
|
2021 |
+
},
|
2022 |
+
{
|
2023 |
+
"epoch": 17.30013106159895,
|
2024 |
+
"grad_norm": 0.0038872575387358665,
|
2025 |
+
"learning_rate": 6.749672346002621e-06,
|
2026 |
+
"loss": 0.0001,
|
2027 |
+
"step": 26400
|
2028 |
+
},
|
2029 |
+
{
|
2030 |
+
"epoch": 17.365661861074706,
|
2031 |
+
"grad_norm": 0.0026088629383593798,
|
2032 |
+
"learning_rate": 6.585845347313238e-06,
|
2033 |
+
"loss": 0.0001,
|
2034 |
+
"step": 26500
|
2035 |
+
},
|
2036 |
+
{
|
2037 |
+
"epoch": 17.431192660550458,
|
2038 |
+
"grad_norm": 0.0027847271412611008,
|
2039 |
+
"learning_rate": 6.422018348623854e-06,
|
2040 |
+
"loss": 0.0004,
|
2041 |
+
"step": 26600
|
2042 |
+
},
|
2043 |
+
{
|
2044 |
+
"epoch": 17.496723460026214,
|
2045 |
+
"grad_norm": 0.010283850133419037,
|
2046 |
+
"learning_rate": 6.25819134993447e-06,
|
2047 |
+
"loss": 0.0001,
|
2048 |
+
"step": 26700
|
2049 |
+
},
|
2050 |
+
{
|
2051 |
+
"epoch": 17.562254259501966,
|
2052 |
+
"grad_norm": 0.0012891000369563699,
|
2053 |
+
"learning_rate": 6.094364351245085e-06,
|
2054 |
+
"loss": 0.0004,
|
2055 |
+
"step": 26800
|
2056 |
+
},
|
2057 |
+
{
|
2058 |
+
"epoch": 17.627785058977718,
|
2059 |
+
"grad_norm": 0.0068209609016776085,
|
2060 |
+
"learning_rate": 5.930537352555701e-06,
|
2061 |
+
"loss": 0.0001,
|
2062 |
+
"step": 26900
|
2063 |
+
},
|
2064 |
+
{
|
2065 |
+
"epoch": 17.693315858453474,
|
2066 |
+
"grad_norm": 0.012583351694047451,
|
2067 |
+
"learning_rate": 5.766710353866317e-06,
|
2068 |
+
"loss": 0.0001,
|
2069 |
+
"step": 27000
|
2070 |
+
},
|
2071 |
+
{
|
2072 |
+
"epoch": 17.758846657929226,
|
2073 |
+
"grad_norm": 0.014517087489366531,
|
2074 |
+
"learning_rate": 5.602883355176933e-06,
|
2075 |
+
"loss": 0.0053,
|
2076 |
+
"step": 27100
|
2077 |
+
},
|
2078 |
+
{
|
2079 |
+
"epoch": 17.82437745740498,
|
2080 |
+
"grad_norm": 0.003594920039176941,
|
2081 |
+
"learning_rate": 5.4390563564875494e-06,
|
2082 |
+
"loss": 0.0001,
|
2083 |
+
"step": 27200
|
2084 |
+
},
|
2085 |
+
{
|
2086 |
+
"epoch": 17.889908256880734,
|
2087 |
+
"grad_norm": 0.014988411217927933,
|
2088 |
+
"learning_rate": 5.2752293577981655e-06,
|
2089 |
+
"loss": 0.0003,
|
2090 |
+
"step": 27300
|
2091 |
+
},
|
2092 |
+
{
|
2093 |
+
"epoch": 17.955439056356486,
|
2094 |
+
"grad_norm": 0.00935112964361906,
|
2095 |
+
"learning_rate": 5.1114023591087816e-06,
|
2096 |
+
"loss": 0.0001,
|
2097 |
+
"step": 27400
|
2098 |
+
},
|
2099 |
+
{
|
2100 |
+
"epoch": 18.0,
|
2101 |
+
"eval_accuracy": 0.9501146788990825,
|
2102 |
+
"eval_f1": 0.948460314214328,
|
2103 |
+
"eval_loss": 0.40203723311424255,
|
2104 |
+
"eval_runtime": 0.9031,
|
2105 |
+
"eval_samples_per_second": 3862.373,
|
2106 |
+
"eval_steps_per_second": 60.903,
|
2107 |
+
"step": 27468
|
2108 |
+
},
|
2109 |
+
{
|
2110 |
+
"epoch": 18.02096985583224,
|
2111 |
+
"grad_norm": 0.0007160278619267046,
|
2112 |
+
"learning_rate": 4.947575360419398e-06,
|
2113 |
+
"loss": 0.0001,
|
2114 |
+
"step": 27500
|
2115 |
+
},
|
2116 |
+
{
|
2117 |
+
"epoch": 18.086500655307994,
|
2118 |
+
"grad_norm": 0.0028267614543437958,
|
2119 |
+
"learning_rate": 4.783748361730013e-06,
|
2120 |
+
"loss": 0.0001,
|
2121 |
+
"step": 27600
|
2122 |
+
},
|
2123 |
+
{
|
2124 |
+
"epoch": 18.15203145478375,
|
2125 |
+
"grad_norm": 0.010204290971159935,
|
2126 |
+
"learning_rate": 4.61992136304063e-06,
|
2127 |
+
"loss": 0.0001,
|
2128 |
+
"step": 27700
|
2129 |
+
},
|
2130 |
+
{
|
2131 |
+
"epoch": 18.2175622542595,
|
2132 |
+
"grad_norm": 0.005847644526511431,
|
2133 |
+
"learning_rate": 4.456094364351245e-06,
|
2134 |
+
"loss": 0.0001,
|
2135 |
+
"step": 27800
|
2136 |
+
},
|
2137 |
+
{
|
2138 |
+
"epoch": 18.283093053735257,
|
2139 |
+
"grad_norm": 0.018655648455023766,
|
2140 |
+
"learning_rate": 4.292267365661861e-06,
|
2141 |
+
"loss": 0.0001,
|
2142 |
+
"step": 27900
|
2143 |
+
},
|
2144 |
+
{
|
2145 |
+
"epoch": 18.34862385321101,
|
2146 |
+
"grad_norm": 0.0011456008069217205,
|
2147 |
+
"learning_rate": 4.128440366972477e-06,
|
2148 |
+
"loss": 0.0006,
|
2149 |
+
"step": 28000
|
2150 |
+
},
|
2151 |
+
{
|
2152 |
+
"epoch": 18.41415465268676,
|
2153 |
+
"grad_norm": 0.0034626726992428303,
|
2154 |
+
"learning_rate": 3.964613368283093e-06,
|
2155 |
+
"loss": 0.0001,
|
2156 |
+
"step": 28100
|
2157 |
+
},
|
2158 |
+
{
|
2159 |
+
"epoch": 18.479685452162517,
|
2160 |
+
"grad_norm": 0.015080388635396957,
|
2161 |
+
"learning_rate": 3.800786369593709e-06,
|
2162 |
+
"loss": 0.0001,
|
2163 |
+
"step": 28200
|
2164 |
+
},
|
2165 |
+
{
|
2166 |
+
"epoch": 18.54521625163827,
|
2167 |
+
"grad_norm": 0.020128346979618073,
|
2168 |
+
"learning_rate": 3.6369593709043257e-06,
|
2169 |
+
"loss": 0.0031,
|
2170 |
+
"step": 28300
|
2171 |
+
},
|
2172 |
+
{
|
2173 |
+
"epoch": 18.610747051114025,
|
2174 |
+
"grad_norm": 0.011367076076567173,
|
2175 |
+
"learning_rate": 3.4731323722149413e-06,
|
2176 |
+
"loss": 0.0001,
|
2177 |
+
"step": 28400
|
2178 |
+
},
|
2179 |
+
{
|
2180 |
+
"epoch": 18.676277850589777,
|
2181 |
+
"grad_norm": 0.0023978736717253923,
|
2182 |
+
"learning_rate": 3.309305373525557e-06,
|
2183 |
+
"loss": 0.0001,
|
2184 |
+
"step": 28500
|
2185 |
+
},
|
2186 |
+
{
|
2187 |
+
"epoch": 18.74180865006553,
|
2188 |
+
"grad_norm": 0.0030761794187128544,
|
2189 |
+
"learning_rate": 3.145478374836173e-06,
|
2190 |
+
"loss": 0.0025,
|
2191 |
+
"step": 28600
|
2192 |
+
},
|
2193 |
+
{
|
2194 |
+
"epoch": 18.807339449541285,
|
2195 |
+
"grad_norm": 0.001883818069472909,
|
2196 |
+
"learning_rate": 2.981651376146789e-06,
|
2197 |
+
"loss": 0.0001,
|
2198 |
+
"step": 28700
|
2199 |
+
},
|
2200 |
+
{
|
2201 |
+
"epoch": 18.872870249017037,
|
2202 |
+
"grad_norm": 0.00347805954515934,
|
2203 |
+
"learning_rate": 2.817824377457405e-06,
|
2204 |
+
"loss": 0.0001,
|
2205 |
+
"step": 28800
|
2206 |
+
},
|
2207 |
+
{
|
2208 |
+
"epoch": 18.938401048492793,
|
2209 |
+
"grad_norm": 0.004366457927972078,
|
2210 |
+
"learning_rate": 2.6539973787680212e-06,
|
2211 |
+
"loss": 0.0001,
|
2212 |
+
"step": 28900
|
2213 |
+
},
|
2214 |
+
{
|
2215 |
+
"epoch": 19.0,
|
2216 |
+
"eval_accuracy": 0.9509747706422018,
|
2217 |
+
"eval_f1": 0.9494461116737494,
|
2218 |
+
"eval_loss": 0.3973633944988251,
|
2219 |
+
"eval_runtime": 0.8752,
|
2220 |
+
"eval_samples_per_second": 3985.248,
|
2221 |
+
"eval_steps_per_second": 62.841,
|
2222 |
+
"step": 28994
|
2223 |
+
},
|
2224 |
+
{
|
2225 |
+
"epoch": 19.003931847968545,
|
2226 |
+
"grad_norm": 0.004239593632519245,
|
2227 |
+
"learning_rate": 2.490170380078637e-06,
|
2228 |
+
"loss": 0.0001,
|
2229 |
+
"step": 29000
|
2230 |
+
},
|
2231 |
+
{
|
2232 |
+
"epoch": 19.0694626474443,
|
2233 |
+
"grad_norm": 0.051923561841249466,
|
2234 |
+
"learning_rate": 2.326343381389253e-06,
|
2235 |
+
"loss": 0.0001,
|
2236 |
+
"step": 29100
|
2237 |
+
},
|
2238 |
+
{
|
2239 |
+
"epoch": 19.134993446920053,
|
2240 |
+
"grad_norm": 0.01663641817867756,
|
2241 |
+
"learning_rate": 2.1625163826998694e-06,
|
2242 |
+
"loss": 0.001,
|
2243 |
+
"step": 29200
|
2244 |
+
},
|
2245 |
+
{
|
2246 |
+
"epoch": 19.200524246395805,
|
2247 |
+
"grad_norm": 0.006036526523530483,
|
2248 |
+
"learning_rate": 1.998689384010485e-06,
|
2249 |
+
"loss": 0.0001,
|
2250 |
+
"step": 29300
|
2251 |
+
},
|
2252 |
+
{
|
2253 |
+
"epoch": 19.26605504587156,
|
2254 |
+
"grad_norm": 0.004666994791477919,
|
2255 |
+
"learning_rate": 1.8348623853211011e-06,
|
2256 |
+
"loss": 0.0001,
|
2257 |
+
"step": 29400
|
2258 |
+
},
|
2259 |
+
{
|
2260 |
+
"epoch": 19.331585845347313,
|
2261 |
+
"grad_norm": 0.0010127611458301544,
|
2262 |
+
"learning_rate": 1.671035386631717e-06,
|
2263 |
+
"loss": 0.0001,
|
2264 |
+
"step": 29500
|
2265 |
+
},
|
2266 |
+
{
|
2267 |
+
"epoch": 19.39711664482307,
|
2268 |
+
"grad_norm": 0.0074880653992295265,
|
2269 |
+
"learning_rate": 1.507208387942333e-06,
|
2270 |
+
"loss": 0.0001,
|
2271 |
+
"step": 29600
|
2272 |
+
},
|
2273 |
+
{
|
2274 |
+
"epoch": 19.46264744429882,
|
2275 |
+
"grad_norm": 0.0026892530731856823,
|
2276 |
+
"learning_rate": 1.3433813892529489e-06,
|
2277 |
+
"loss": 0.0014,
|
2278 |
+
"step": 29700
|
2279 |
+
},
|
2280 |
+
{
|
2281 |
+
"epoch": 19.528178243774573,
|
2282 |
+
"grad_norm": 0.0020644895266741514,
|
2283 |
+
"learning_rate": 1.179554390563565e-06,
|
2284 |
+
"loss": 0.0001,
|
2285 |
+
"step": 29800
|
2286 |
+
},
|
2287 |
+
{
|
2288 |
+
"epoch": 19.59370904325033,
|
2289 |
+
"grad_norm": 0.0029051878955215216,
|
2290 |
+
"learning_rate": 1.0157273918741808e-06,
|
2291 |
+
"loss": 0.0001,
|
2292 |
+
"step": 29900
|
2293 |
+
},
|
2294 |
+
{
|
2295 |
+
"epoch": 19.65923984272608,
|
2296 |
+
"grad_norm": 0.005995690356940031,
|
2297 |
+
"learning_rate": 8.51900393184797e-07,
|
2298 |
+
"loss": 0.0001,
|
2299 |
+
"step": 30000
|
2300 |
+
},
|
2301 |
+
{
|
2302 |
+
"epoch": 19.724770642201836,
|
2303 |
+
"grad_norm": 0.006156248040497303,
|
2304 |
+
"learning_rate": 6.880733944954129e-07,
|
2305 |
+
"loss": 0.0001,
|
2306 |
+
"step": 30100
|
2307 |
+
},
|
2308 |
+
{
|
2309 |
+
"epoch": 19.790301441677588,
|
2310 |
+
"grad_norm": 0.0009661901276558638,
|
2311 |
+
"learning_rate": 5.242463958060289e-07,
|
2312 |
+
"loss": 0.0001,
|
2313 |
+
"step": 30200
|
2314 |
+
},
|
2315 |
+
{
|
2316 |
+
"epoch": 19.855832241153344,
|
2317 |
+
"grad_norm": 0.007489080540835857,
|
2318 |
+
"learning_rate": 3.6041939711664483e-07,
|
2319 |
+
"loss": 0.0001,
|
2320 |
+
"step": 30300
|
2321 |
+
},
|
2322 |
+
{
|
2323 |
+
"epoch": 19.921363040629096,
|
2324 |
+
"grad_norm": 0.006394806317985058,
|
2325 |
+
"learning_rate": 1.9659239842726081e-07,
|
2326 |
+
"loss": 0.0004,
|
2327 |
+
"step": 30400
|
2328 |
+
},
|
2329 |
+
{
|
2330 |
+
"epoch": 19.986893840104848,
|
2331 |
+
"grad_norm": 0.007664592005312443,
|
2332 |
+
"learning_rate": 3.2765399737876805e-08,
|
2333 |
+
"loss": 0.0001,
|
2334 |
+
"step": 30500
|
2335 |
+
},
|
2336 |
+
{
|
2337 |
+
"epoch": 20.0,
|
2338 |
+
"eval_accuracy": 0.9515481651376146,
|
2339 |
+
"eval_f1": 0.9499685377695699,
|
2340 |
+
"eval_loss": 0.398087739944458,
|
2341 |
+
"eval_runtime": 0.9372,
|
2342 |
+
"eval_samples_per_second": 3721.91,
|
2343 |
+
"eval_steps_per_second": 58.688,
|
2344 |
+
"step": 30520
|
2345 |
+
}
|
2346 |
+
],
|
2347 |
+
"logging_steps": 100,
|
2348 |
+
"max_steps": 30520,
|
2349 |
+
"num_input_tokens_seen": 0,
|
2350 |
+
"num_train_epochs": 20,
|
2351 |
+
"save_steps": 500,
|
2352 |
+
"stateful_callbacks": {
|
2353 |
+
"EarlyStoppingCallback": {
|
2354 |
+
"args": {
|
2355 |
+
"early_stopping_patience": 3,
|
2356 |
+
"early_stopping_threshold": 0.0
|
2357 |
+
},
|
2358 |
+
"attributes": {
|
2359 |
+
"early_stopping_patience_counter": 0
|
2360 |
+
}
|
2361 |
+
},
|
2362 |
+
"TrainerControl": {
|
2363 |
+
"args": {
|
2364 |
+
"should_epoch_stop": false,
|
2365 |
+
"should_evaluate": false,
|
2366 |
+
"should_log": false,
|
2367 |
+
"should_save": true,
|
2368 |
+
"should_training_stop": true
|
2369 |
+
},
|
2370 |
+
"attributes": {}
|
2371 |
+
}
|
2372 |
+
},
|
2373 |
+
"total_flos": 2534266005283680.0,
|
2374 |
+
"train_batch_size": 16,
|
2375 |
+
"trial_name": null,
|
2376 |
+
"trial_params": null
|
2377 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46d297cc70e6cb2c9131e9fa9f258eff0dbf932d568657d307fb6830a01bfa86
|
3 |
+
size 5304
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|