diff --git "a/measurement.json" "b/measurement.json" --- "a/measurement.json" +++ "b/measurement.json" @@ -2,8 +2,8 @@ "measurement": { "model.layers.0.self_attn": [ { - "accuracy": 0.9216393907554448, - "total_bits": 89141248, + "accuracy": 0.9140455884626135, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -66,8 +66,8 @@ } }, { - "accuracy": 0.9344925815239549, - "total_bits": 91697152, + "accuracy": 0.9275169823667966, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -130,8 +130,8 @@ } }, { - "accuracy": 0.9372833110392094, - "total_bits": 95234560, + "accuracy": 0.9306517554796301, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -194,8 +194,8 @@ } }, { - "accuracy": 0.963291124207899, - "total_bits": 111748096, + "accuracy": 0.9593154413159937, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -258,8 +258,8 @@ } }, { - "accuracy": 0.9639588991412893, - "total_bits": 132388864, + "accuracy": 0.9601115855039097, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -322,8 +322,8 @@ } }, { - "accuracy": 0.9646026270929724, - "total_bits": 132455936, + "accuracy": 0.9605038251611404, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -386,8 +386,8 @@ } }, { - "accuracy": 0.9742872826755047, - "total_bits": 169089024, + "accuracy": 0.9710041740327142, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -438,8 +438,8 @@ } }, { - "accuracy": 0.9753523314138874, - "total_bits": 169221632, + "accuracy": 0.9719391583639663, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -490,8 +490,8 @@ } }, { - "accuracy": 0.9777251918567345, - "total_bits": 170671104, + "accuracy": 0.9740509147231933, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -542,8 +542,8 @@ } }, { - "accuracy": 0.9788274877937511, - "total_bits": 173039616, + "accuracy": 0.9753892603621352, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -594,8 +594,8 @@ } }, { - "accuracy": 0.9816966140642762, - "total_bits": 174398976, + "accuracy": 0.9795443936018273, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -658,8 +658,8 @@ } }, { - "accuracy": 0.9827177490806207, - "total_bits": 175225856, + "accuracy": 0.9808692514052382, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -722,8 +722,8 @@ } }, { - "accuracy": 0.9832882230402902, - "total_bits": 178728960, + "accuracy": 0.9813125038781436, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -783,8 +783,8 @@ } }, { - "accuracy": 0.9844062622287311, - "total_bits": 181067776, + "accuracy": 0.9830710200767498, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -844,8 +844,8 @@ } }, { - "accuracy": 0.990460420580348, - "total_bits": 219944960, + "accuracy": 0.989644758781651, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -905,8 +905,8 @@ } }, { - "accuracy": 0.9903306900814641, - "total_bits": 223010816, + "accuracy": 0.9913767111793277, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -966,8 +966,8 @@ } }, { - "accuracy": 0.9921794111141935, - "total_bits": 252975104, + "accuracy": 0.9918394083797466, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -1018,8 +1018,8 @@ } }, { - "accuracy": 0.9920730279409327, - "total_bits": 265314304, + "accuracy": 0.9947441303811502, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -1070,8 +1070,8 @@ } }, { - "accuracy": 0.995181388760102, - "total_bits": 336861184, + "accuracy": 0.9977742217761261, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -1124,8 +1124,8 @@ ], "model.layers.0.block_sparse_moe": [ { - "accuracy": 0.9259032818954438, - "total_bits": 3157926400, + "accuracy": 0.9061480336822569, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -1176,8 +1176,8 @@ } }, { - "accuracy": 0.9299558845814317, - "total_bits": 3268026880, + "accuracy": 0.9121193317696452, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -1228,8 +1228,8 @@ } }, { - "accuracy": 0.9367016213946044, - "total_bits": 3652411392, + "accuracy": 0.9265359605196863, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -1277,8 +1277,8 @@ } }, { - "accuracy": 0.9379711882211268, - "total_bits": 4098056192, + "accuracy": 0.9311056419974193, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -1326,8 +1326,8 @@ } }, { - "accuracy": 0.9756955475313589, - "total_bits": 4621411072, + "accuracy": 0.9601812014589086, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -1378,8 +1378,8 @@ } }, { - "accuracy": 0.9776492936071008, - "total_bits": 4737212416, + "accuracy": 0.9636230148607865, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -1430,8 +1430,8 @@ } }, { - "accuracy": 0.9801821867586114, - "total_bits": 5093868288, + "accuracy": 0.9704330296372063, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -1479,8 +1479,8 @@ } }, { - "accuracy": 0.9871682995581068, - "total_bits": 5824164608, + "accuracy": 0.9794988451467361, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -1522,8 +1522,8 @@ } }, { - "accuracy": 0.9882731068646535, - "total_bits": 5910044672, + "accuracy": 0.981427805396379, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -1565,8 +1565,8 @@ } }, { - "accuracy": 0.9881474223220721, - "total_bits": 6006579968, + "accuracy": 0.9803791831946, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -1617,8 +1617,8 @@ } }, { - "accuracy": 0.9879671690869145, - "total_bits": 6122381312, + "accuracy": 0.9827470849122619, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -1669,8 +1669,8 @@ } }, { - "accuracy": 0.993877296539722, - "total_bits": 7391748864, + "accuracy": 0.9900351185060572, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -1721,8 +1721,8 @@ } }, { - "accuracy": 0.994035666633863, - "total_bits": 7507550208, + "accuracy": 0.9914425020615454, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -1773,8 +1773,8 @@ } }, { - "accuracy": 0.996240449429024, - "total_bits": 8550425344, + "accuracy": 0.9945211706872215, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -1816,8 +1816,8 @@ } }, { - "accuracy": 0.9964473845757311, - "total_bits": 8877312000, + "accuracy": 0.9949245448733564, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -1865,8 +1865,8 @@ } }, { - "accuracy": 0.9968076962977648, - "total_bits": 9674229760, + "accuracy": 0.9964140110496373, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -1911,8 +1911,8 @@ } }, { - "accuracy": 0.9978812155604828, - "total_bits": 11318396928, + "accuracy": 0.9984500928367197, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -1953,8 +1953,8 @@ ], "model.layers.1.self_attn": [ { - "accuracy": 0.9081871882081032, - "total_bits": 89141248, + "accuracy": 0.8983938317978755, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -2017,8 +2017,8 @@ } }, { - "accuracy": 0.9161887615919113, - "total_bits": 91697152, + "accuracy": 0.9062281269580126, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -2081,8 +2081,8 @@ } }, { - "accuracy": 0.9271722342818975, - "total_bits": 95234560, + "accuracy": 0.9181110495701432, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -2145,8 +2145,8 @@ } }, { - "accuracy": 0.9468022130895406, - "total_bits": 111748096, + "accuracy": 0.9412351544597186, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -2209,8 +2209,8 @@ } }, { - "accuracy": 0.9485933494288474, - "total_bits": 132388864, + "accuracy": 0.9432488027377985, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -2273,8 +2273,8 @@ } }, { - "accuracy": 0.9567459803074598, - "total_bits": 132455936, + "accuracy": 0.9498741248971783, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -2337,8 +2337,8 @@ } }, { - "accuracy": 0.9582914719358087, - "total_bits": 169089024, + "accuracy": 0.9540702534141019, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -2389,8 +2389,8 @@ } }, { - "accuracy": 0.9663479211740196, - "total_bits": 169221632, + "accuracy": 0.9617532956181094, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -2441,8 +2441,8 @@ } }, { - "accuracy": 0.9630967292468995, - "total_bits": 170671104, + "accuracy": 0.9575334048713557, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -2493,8 +2493,8 @@ } }, { - "accuracy": 0.963470377959311, - "total_bits": 173039616, + "accuracy": 0.9584182359685656, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -2545,8 +2545,8 @@ } }, { - "accuracy": 0.9769110130146146, - "total_bits": 174398976, + "accuracy": 0.9717634496628307, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -2609,8 +2609,8 @@ } }, { - "accuracy": 0.9791024775477126, - "total_bits": 175225856, + "accuracy": 0.9753771505784243, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -2673,8 +2673,8 @@ } }, { - "accuracy": 0.9783758620033041, - "total_bits": 178728960, + "accuracy": 0.9723129952617455, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -2734,8 +2734,8 @@ } }, { - "accuracy": 0.9805777225410566, - "total_bits": 181067776, + "accuracy": 0.9769135575916152, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -2795,8 +2795,8 @@ } }, { - "accuracy": 0.98592586198356, - "total_bits": 219944960, + "accuracy": 0.9852501264394959, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -2856,8 +2856,8 @@ } }, { - "accuracy": 0.9888784742797725, - "total_bits": 223010816, + "accuracy": 0.9894696805567946, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -2917,8 +2917,8 @@ } }, { - "accuracy": 0.987283107242547, - "total_bits": 252975104, + "accuracy": 0.987367980851559, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -2969,8 +2969,8 @@ } }, { - "accuracy": 0.9918474656878971, - "total_bits": 265314304, + "accuracy": 0.9947342782616033, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -3021,8 +3021,8 @@ } }, { - "accuracy": 0.9937774574500509, - "total_bits": 336861184, + "accuracy": 0.9967764277607785, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -3075,8 +3075,8 @@ ], "model.layers.1.block_sparse_moe": [ { - "accuracy": 0.959154338343069, - "total_bits": 3157926400, + "accuracy": 0.9068626355729066, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -3127,8 +3127,8 @@ } }, { - "accuracy": 0.962494786712341, - "total_bits": 3268026880, + "accuracy": 0.909478462766856, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -3179,8 +3179,8 @@ } }, { - "accuracy": 0.9615671030478552, - "total_bits": 3652411392, + "accuracy": 0.9106669569155201, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -3228,8 +3228,8 @@ } }, { - "accuracy": 0.9615655426168814, - "total_bits": 4098056192, + "accuracy": 0.9106827644864097, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -3277,8 +3277,8 @@ } }, { - "accuracy": 0.9943342845072038, - "total_bits": 4621411072, + "accuracy": 0.9892511886282591, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -3329,8 +3329,8 @@ } }, { - "accuracy": 0.9947346317057963, - "total_bits": 4737212416, + "accuracy": 0.9887665644637309, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -3381,8 +3381,8 @@ } }, { - "accuracy": 0.9947685888619162, - "total_bits": 5093868288, + "accuracy": 0.9888562649430241, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -3430,8 +3430,8 @@ } }, { - "accuracy": 0.998123300356383, - "total_bits": 5824164608, + "accuracy": 0.9979546307622513, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -3473,8 +3473,8 @@ } }, { - "accuracy": 0.9974695195378445, - "total_bits": 5910044672, + "accuracy": 0.9976652450513939, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -3516,8 +3516,8 @@ } }, { - "accuracy": 0.998458568516071, - "total_bits": 6006579968, + "accuracy": 0.9980020025977865, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -3568,8 +3568,8 @@ } }, { - "accuracy": 0.9973383280448616, - "total_bits": 6122381312, + "accuracy": 0.9980571724518086, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -3620,8 +3620,8 @@ } }, { - "accuracy": 0.9987634144417825, - "total_bits": 7391748864, + "accuracy": 0.9987960029793612, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -3672,8 +3672,8 @@ } }, { - "accuracy": 0.998042021987203, - "total_bits": 7507550208, + "accuracy": 0.9989841186525155, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -3724,8 +3724,8 @@ } }, { - "accuracy": 0.998934415731128, - "total_bits": 8550425344, + "accuracy": 0.9992318240156237, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -3767,8 +3767,8 @@ } }, { - "accuracy": 0.9991165235423978, - "total_bits": 8877312000, + "accuracy": 0.9993535618564238, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -3816,8 +3816,8 @@ } }, { - "accuracy": 0.9991190262862801, - "total_bits": 9674229760, + "accuracy": 0.99940210238492, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -3862,8 +3862,8 @@ } }, { - "accuracy": 0.9989960586826783, - "total_bits": 11318396928, + "accuracy": 0.9995452196244514, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -3904,8 +3904,8 @@ ], "model.layers.2.self_attn": [ { - "accuracy": 0.9951741425757064, - "total_bits": 89141248, + "accuracy": 0.9881992997252382, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -3968,8 +3968,8 @@ } }, { - "accuracy": 0.9953377145284321, - "total_bits": 91697152, + "accuracy": 0.9883486920589348, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -4032,8 +4032,8 @@ } }, { - "accuracy": 0.995220682976651, - "total_bits": 95234560, + "accuracy": 0.9885892421443714, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -4096,8 +4096,8 @@ } }, { - "accuracy": 0.9955439939221833, - "total_bits": 111748096, + "accuracy": 0.9892666799860308, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -4160,8 +4160,8 @@ } }, { - "accuracy": 0.997809174208669, - "total_bits": 132388864, + "accuracy": 0.9948108981698169, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -4224,8 +4224,8 @@ } }, { - "accuracy": 0.9980103456691722, - "total_bits": 132455936, + "accuracy": 0.9957542511692736, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -4288,8 +4288,8 @@ } }, { - "accuracy": 0.9980752718693111, - "total_bits": 169089024, + "accuracy": 0.9954744850292627, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -4340,8 +4340,8 @@ } }, { - "accuracy": 0.9983023894601502, - "total_bits": 169221632, + "accuracy": 0.9964833018348145, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -4392,8 +4392,8 @@ } }, { - "accuracy": 0.9982182764142635, - "total_bits": 170671104, + "accuracy": 0.9968059769998945, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -4444,8 +4444,8 @@ } }, { - "accuracy": 0.998310721348389, - "total_bits": 173039616, + "accuracy": 0.997020273029193, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -4496,8 +4496,8 @@ } }, { - "accuracy": 0.9988282818558218, - "total_bits": 174398976, + "accuracy": 0.9973894058202859, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -4560,8 +4560,8 @@ } }, { - "accuracy": 0.9989027344709029, - "total_bits": 175225856, + "accuracy": 0.9977199671011476, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -4624,8 +4624,8 @@ } }, { - "accuracy": 0.9988959323745803, - "total_bits": 178728960, + "accuracy": 0.9975537615537178, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -4685,8 +4685,8 @@ } }, { - "accuracy": 0.9989783380551671, - "total_bits": 181067776, + "accuracy": 0.9979189260338899, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -4746,8 +4746,8 @@ } }, { - "accuracy": 0.9992747648793738, - "total_bits": 219944960, + "accuracy": 0.9988219045935693, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -4807,8 +4807,8 @@ } }, { - "accuracy": 0.9993035424231493, - "total_bits": 223010816, + "accuracy": 0.9986071155435639, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -4868,8 +4868,8 @@ } }, { - "accuracy": 0.9993144320469582, - "total_bits": 252975104, + "accuracy": 0.9989693955767507, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -4920,8 +4920,8 @@ } }, { - "accuracy": 0.9993294944761146, - "total_bits": 265314304, + "accuracy": 0.9991031536055743, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -4972,8 +4972,8 @@ } }, { - "accuracy": 0.9993960144220182, - "total_bits": 336861184, + "accuracy": 0.9996990875515621, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -5026,8 +5026,8 @@ ], "model.layers.2.block_sparse_moe": [ { - "accuracy": 0.9963116115686717, - "total_bits": 3157926400, + "accuracy": 0.9910277377566672, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -5078,8 +5078,8 @@ } }, { - "accuracy": 0.9964361457678024, - "total_bits": 3268026880, + "accuracy": 0.9913175469264388, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -5130,8 +5130,8 @@ } }, { - "accuracy": 0.9967866158549441, - "total_bits": 3652411392, + "accuracy": 0.9924922636782867, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -5179,8 +5179,8 @@ } }, { - "accuracy": 0.9968715757568134, - "total_bits": 4098056192, + "accuracy": 0.992906329309335, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -5228,8 +5228,8 @@ } }, { - "accuracy": 0.9983032200616435, - "total_bits": 4621411072, + "accuracy": 0.9955907215444313, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -5280,8 +5280,8 @@ } }, { - "accuracy": 0.9984345275224769, - "total_bits": 4737212416, + "accuracy": 0.9959272847409011, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -5332,8 +5332,8 @@ } }, { - "accuracy": 0.9985409699293086, - "total_bits": 5093868288, + "accuracy": 0.99646768083403, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -5381,8 +5381,8 @@ } }, { - "accuracy": 0.9991537401801907, - "total_bits": 5824164608, + "accuracy": 0.9976669457973912, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -5424,8 +5424,8 @@ } }, { - "accuracy": 0.9992232685690396, - "total_bits": 5910044672, + "accuracy": 0.9978846745270857, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -5467,8 +5467,8 @@ } }, { - "accuracy": 0.9991466796855093, - "total_bits": 6006579968, + "accuracy": 0.9977547861144558, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -5519,8 +5519,8 @@ } }, { - "accuracy": 0.9992554096279491, - "total_bits": 6122381312, + "accuracy": 0.99803229864483, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -5571,8 +5571,8 @@ } }, { - "accuracy": 0.999571862294033, - "total_bits": 7391748864, + "accuracy": 0.9988554860456134, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -5623,8 +5623,8 @@ } }, { - "accuracy": 0.999603937523716, - "total_bits": 7507550208, + "accuracy": 0.9990181947814563, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -5675,8 +5675,8 @@ } }, { - "accuracy": 0.9997561137233788, - "total_bits": 8550425344, + "accuracy": 0.9993641468536225, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -5718,8 +5718,8 @@ } }, { - "accuracy": 0.9997643424485432, - "total_bits": 8877312000, + "accuracy": 0.9994130597219737, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -5767,8 +5767,8 @@ } }, { - "accuracy": 0.9997800599139737, - "total_bits": 9674229760, + "accuracy": 0.9995304489980299, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -5813,8 +5813,8 @@ } }, { - "accuracy": 0.999871382348374, - "total_bits": 11318396928, + "accuracy": 0.9998210227245181, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -5855,8 +5855,8 @@ ], "model.layers.3.self_attn": [ { - "accuracy": 0.9853069963864982, - "total_bits": 89141248, + "accuracy": 0.9563613558420911, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -5919,8 +5919,8 @@ } }, { - "accuracy": 0.9855822678073309, - "total_bits": 91697152, + "accuracy": 0.9562133592553437, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -5983,8 +5983,8 @@ } }, { - "accuracy": 0.9856605771346949, - "total_bits": 95234560, + "accuracy": 0.9552693938021548, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -6047,8 +6047,8 @@ } }, { - "accuracy": 0.9862212757579982, - "total_bits": 111748096, + "accuracy": 0.9555891461204737, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -6111,8 +6111,8 @@ } }, { - "accuracy": 0.9947170411469415, - "total_bits": 132388864, + "accuracy": 0.9863810884417035, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -6175,8 +6175,8 @@ } }, { - "accuracy": 0.9949799413734581, - "total_bits": 132455936, + "accuracy": 0.987611983116949, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -6239,8 +6239,8 @@ } }, { - "accuracy": 0.9950764597742818, - "total_bits": 169089024, + "accuracy": 0.9868284853146179, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -6291,8 +6291,8 @@ } }, { - "accuracy": 0.9953502516291337, - "total_bits": 169221632, + "accuracy": 0.9882485210109735, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -6343,8 +6343,8 @@ } }, { - "accuracy": 0.9957506415812531, - "total_bits": 170671104, + "accuracy": 0.9885538430826273, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -6395,8 +6395,8 @@ } }, { - "accuracy": 0.9957382143475115, - "total_bits": 173039616, + "accuracy": 0.9889651730336482, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -6447,8 +6447,8 @@ } }, { - "accuracy": 0.9972622413333738, - "total_bits": 174398976, + "accuracy": 0.9936429190784111, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -6511,8 +6511,8 @@ } }, { - "accuracy": 0.9976580539441784, - "total_bits": 175225856, + "accuracy": 0.995461463011452, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -6575,8 +6575,8 @@ } }, { - "accuracy": 0.9973613950714935, - "total_bits": 178728960, + "accuracy": 0.9935810816605226, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -6636,8 +6636,8 @@ } }, { - "accuracy": 0.9977424958633492, - "total_bits": 181067776, + "accuracy": 0.9957748085980711, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -6697,8 +6697,8 @@ } }, { - "accuracy": 0.9977118901224458, - "total_bits": 219944960, + "accuracy": 0.9964866833142878, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -6758,8 +6758,8 @@ } }, { - "accuracy": 0.9977828560149646, - "total_bits": 223010816, + "accuracy": 0.9973200031672604, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -6819,8 +6819,8 @@ } }, { - "accuracy": 0.9977400485222461, - "total_bits": 252975104, + "accuracy": 0.996606658824021, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -6871,8 +6871,8 @@ } }, { - "accuracy": 0.9978597690205788, - "total_bits": 265314304, + "accuracy": 0.9978621812515485, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -6923,8 +6923,8 @@ } }, { - "accuracy": 0.998323051877378, - "total_bits": 336861184, + "accuracy": 0.9991464993800037, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -6977,8 +6977,8 @@ ], "model.layers.3.block_sparse_moe": [ { - "accuracy": 0.9947719463671092, - "total_bits": 3157926400, + "accuracy": 0.9876633585663512, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -7029,8 +7029,8 @@ } }, { - "accuracy": 0.9949436041206354, - "total_bits": 3268026880, + "accuracy": 0.9880511111696251, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -7081,8 +7081,8 @@ } }, { - "accuracy": 0.995453665338573, - "total_bits": 3652411392, + "accuracy": 0.9896366395114455, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -7130,8 +7130,8 @@ } }, { - "accuracy": 0.9955786836799234, - "total_bits": 4098056192, + "accuracy": 0.9901984291354893, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -7179,8 +7179,8 @@ } }, { - "accuracy": 0.9975572464900324, - "total_bits": 4621411072, + "accuracy": 0.9938256866880693, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -7231,8 +7231,8 @@ } }, { - "accuracy": 0.9977521098844591, - "total_bits": 4737212416, + "accuracy": 0.9943074553884799, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -7283,8 +7283,8 @@ } }, { - "accuracy": 0.9979033498602803, - "total_bits": 5093868288, + "accuracy": 0.9950314921006793, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -7332,8 +7332,8 @@ } }, { - "accuracy": 0.9987790038212552, - "total_bits": 5824164608, + "accuracy": 0.9967240856385615, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -7375,8 +7375,8 @@ } }, { - "accuracy": 0.9988816456279892, - "total_bits": 5910044672, + "accuracy": 0.9970289560806123, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -7418,8 +7418,8 @@ } }, { - "accuracy": 0.9987680663798528, - "total_bits": 6006579968, + "accuracy": 0.9968487206642749, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -7470,8 +7470,8 @@ } }, { - "accuracy": 0.9989281225389277, - "total_bits": 6122381312, + "accuracy": 0.9972375735851529, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -7522,8 +7522,8 @@ } }, { - "accuracy": 0.999381133919087, - "total_bits": 7391748864, + "accuracy": 0.9983922150822764, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -7574,8 +7574,8 @@ } }, { - "accuracy": 0.9994315291569364, - "total_bits": 7507550208, + "accuracy": 0.9986202234795201, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -7626,8 +7626,8 @@ } }, { - "accuracy": 0.9996455730633897, - "total_bits": 8550425344, + "accuracy": 0.9991049445607132, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -7669,8 +7669,8 @@ } }, { - "accuracy": 0.9996584471336973, - "total_bits": 8877312000, + "accuracy": 0.9991754260718153, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -7718,8 +7718,8 @@ } }, { - "accuracy": 0.9996806491899406, - "total_bits": 9674229760, + "accuracy": 0.9993337291107309, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -7764,8 +7764,8 @@ } }, { - "accuracy": 0.999811367933944, - "total_bits": 11318396928, + "accuracy": 0.9997458011266644, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -7806,8 +7806,8 @@ ], "model.layers.4.self_attn": [ { - "accuracy": 0.9876448546419851, - "total_bits": 89141248, + "accuracy": 0.9705565080512315, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -7870,8 +7870,8 @@ } }, { - "accuracy": 0.9877648536930792, - "total_bits": 91697152, + "accuracy": 0.9704439437482506, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -7934,8 +7934,8 @@ } }, { - "accuracy": 0.988910446933005, - "total_bits": 95234560, + "accuracy": 0.9726865802367684, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -7998,8 +7998,8 @@ } }, { - "accuracy": 0.9894880188512616, - "total_bits": 111748096, + "accuracy": 0.9743199557124171, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -8062,8 +8062,8 @@ } }, { - "accuracy": 0.9928971683839336, - "total_bits": 132388864, + "accuracy": 0.982712718876428, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -8126,8 +8126,8 @@ } }, { - "accuracy": 0.991974107484566, - "total_bits": 132455936, + "accuracy": 0.9812967156758532, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -8190,8 +8190,8 @@ } }, { - "accuracy": 0.9933446043578442, - "total_bits": 169089024, + "accuracy": 0.9828154177375836, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -8242,8 +8242,8 @@ } }, { - "accuracy": 0.9922680477029644, - "total_bits": 169221632, + "accuracy": 0.9815221932367422, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -8294,8 +8294,8 @@ } }, { - "accuracy": 0.9943890780268703, - "total_bits": 170671104, + "accuracy": 0.9869821836618939, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -8346,8 +8346,8 @@ } }, { - "accuracy": 0.9942211179004516, - "total_bits": 173039616, + "accuracy": 0.983409554552054, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -8398,8 +8398,8 @@ } }, { - "accuracy": 0.9961635239742463, - "total_bits": 174398976, + "accuracy": 0.9903068670391804, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -8462,8 +8462,8 @@ } }, { - "accuracy": 0.9962940810655709, - "total_bits": 175225856, + "accuracy": 0.9950095973617863, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -8526,8 +8526,8 @@ } }, { - "accuracy": 0.9962520355620654, - "total_bits": 178728960, + "accuracy": 0.9937181602581404, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -8587,8 +8587,8 @@ } }, { - "accuracy": 0.9964325877226656, - "total_bits": 181067776, + "accuracy": 0.9955955666737282, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -8648,8 +8648,8 @@ } }, { - "accuracy": 0.9969835103256628, - "total_bits": 219944960, + "accuracy": 0.996650769320695, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -8709,8 +8709,8 @@ } }, { - "accuracy": 0.997421583670075, - "total_bits": 223010816, + "accuracy": 0.9970519446105754, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -8770,8 +8770,8 @@ } }, { - "accuracy": 0.9970152335299645, - "total_bits": 252975104, + "accuracy": 0.9968303297682723, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -8822,8 +8822,8 @@ } }, { - "accuracy": 0.9973288156033959, - "total_bits": 265314304, + "accuracy": 0.9977201039237116, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -8874,8 +8874,8 @@ } }, { - "accuracy": 0.9975893879454816, - "total_bits": 336861184, + "accuracy": 0.9991748886077403, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -8928,8 +8928,8 @@ ], "model.layers.4.block_sparse_moe": [ { - "accuracy": 0.9923477138218004, - "total_bits": 3157926400, + "accuracy": 0.9824817578773946, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -8980,8 +8980,8 @@ } }, { - "accuracy": 0.992624792677816, - "total_bits": 3268026880, + "accuracy": 0.9830967509624315, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -9032,8 +9032,8 @@ } }, { - "accuracy": 0.9934411194117274, - "total_bits": 3652411392, + "accuracy": 0.985503437317675, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -9081,8 +9081,8 @@ } }, { - "accuracy": 0.9936295805382542, - "total_bits": 4098056192, + "accuracy": 0.9863135859923204, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -9130,8 +9130,8 @@ } }, { - "accuracy": 0.9964432507840684, - "total_bits": 4621411072, + "accuracy": 0.991301248272066, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -9182,8 +9182,8 @@ } }, { - "accuracy": 0.9967310253268806, - "total_bits": 4737212416, + "accuracy": 0.9919940624240553, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -9234,8 +9234,8 @@ } }, { - "accuracy": 0.9969622984208399, - "total_bits": 5093868288, + "accuracy": 0.9930306453752564, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -9283,8 +9283,8 @@ } }, { - "accuracy": 0.9982142113076407, - "total_bits": 5824164608, + "accuracy": 0.9953650184943399, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -9326,8 +9326,8 @@ } }, { - "accuracy": 0.9983587340684608, - "total_bits": 5910044672, + "accuracy": 0.9957915045306436, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -9369,8 +9369,8 @@ } }, { - "accuracy": 0.9982069257966941, - "total_bits": 6006579968, + "accuracy": 0.9955568111290631, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -9421,8 +9421,8 @@ } }, { - "accuracy": 0.9984400177636417, - "total_bits": 6122381312, + "accuracy": 0.9961134097648028, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -9473,8 +9473,8 @@ } }, { - "accuracy": 0.9990997332097322, - "total_bits": 7391748864, + "accuracy": 0.9977344439357694, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -9525,8 +9525,8 @@ } }, { - "accuracy": 0.9991746042542218, - "total_bits": 7507550208, + "accuracy": 0.9980592962256196, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -9577,8 +9577,8 @@ } }, { - "accuracy": 0.9994834368408192, - "total_bits": 8550425344, + "accuracy": 0.9987294345401097, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -9620,8 +9620,8 @@ } }, { - "accuracy": 0.9995030762293027, - "total_bits": 8877312000, + "accuracy": 0.9988356493713582, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -9669,8 +9669,8 @@ } }, { - "accuracy": 0.9995374370009813, - "total_bits": 9674229760, + "accuracy": 0.9990666903477177, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -9715,8 +9715,8 @@ } }, { - "accuracy": 0.9997267459157229, - "total_bits": 11318396928, + "accuracy": 0.9996400695113152, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -9757,8 +9757,8 @@ ], "model.layers.5.self_attn": [ { - "accuracy": 0.9860821712645702, - "total_bits": 89141248, + "accuracy": 0.9653694152948447, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -9821,8 +9821,8 @@ } }, { - "accuracy": 0.9864921357948333, - "total_bits": 91697152, + "accuracy": 0.9662498408288229, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -9885,8 +9885,8 @@ } }, { - "accuracy": 0.9872733209049329, - "total_bits": 95234560, + "accuracy": 0.968268438213272, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -9949,8 +9949,8 @@ } }, { - "accuracy": 0.9886291341972537, - "total_bits": 111748096, + "accuracy": 0.9705438311211765, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -10013,8 +10013,8 @@ } }, { - "accuracy": 0.9929289844003506, - "total_bits": 132388864, + "accuracy": 0.9819821507844608, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -10077,8 +10077,8 @@ } }, { - "accuracy": 0.9931611975189298, - "total_bits": 132455936, + "accuracy": 0.9833936360664666, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -10141,8 +10141,8 @@ } }, { - "accuracy": 0.993704581662314, - "total_bits": 169089024, + "accuracy": 0.9835892087721732, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -10193,8 +10193,8 @@ } }, { - "accuracy": 0.9940307207289152, - "total_bits": 169221632, + "accuracy": 0.9850916774885263, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -10245,8 +10245,8 @@ } }, { - "accuracy": 0.9953542377770646, - "total_bits": 170671104, + "accuracy": 0.9864177839626791, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -10297,8 +10297,8 @@ } }, { - "accuracy": 0.9958801472384948, - "total_bits": 173039616, + "accuracy": 0.9827725828072289, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -10349,8 +10349,8 @@ } }, { - "accuracy": 0.9962883411790244, - "total_bits": 174398976, + "accuracy": 0.9881300989945885, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -10413,8 +10413,8 @@ } }, { - "accuracy": 0.9963189574336866, - "total_bits": 175225856, + "accuracy": 0.9907148132842849, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -10477,8 +10477,8 @@ } }, { - "accuracy": 0.996484641902498, - "total_bits": 178728960, + "accuracy": 0.9902184755774215, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -10538,8 +10538,8 @@ } }, { - "accuracy": 0.9964942360820714, - "total_bits": 181067776, + "accuracy": 0.9912091016813065, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -10599,8 +10599,8 @@ } }, { - "accuracy": 0.9975955448608147, - "total_bits": 219944960, + "accuracy": 0.9957670488147414, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -10660,8 +10660,8 @@ } }, { - "accuracy": 0.9978813678826555, - "total_bits": 223010816, + "accuracy": 0.995785731865908, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -10721,8 +10721,8 @@ } }, { - "accuracy": 0.9977155212836806, - "total_bits": 252975104, + "accuracy": 0.9960852129115665, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -10773,8 +10773,8 @@ } }, { - "accuracy": 0.9979079265540349, - "total_bits": 265314304, + "accuracy": 0.996337805139774, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -10825,8 +10825,8 @@ } }, { - "accuracy": 0.9977720971655799, - "total_bits": 336861184, + "accuracy": 0.9988039345407742, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -10879,8 +10879,8 @@ ], "model.layers.5.block_sparse_moe": [ { - "accuracy": 0.9894690618966706, - "total_bits": 3157926400, + "accuracy": 0.976207715109922, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -10931,8 +10931,8 @@ } }, { - "accuracy": 0.9898329424322583, - "total_bits": 3268026880, + "accuracy": 0.9769844165130053, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -10983,8 +10983,8 @@ } }, { - "accuracy": 0.9909976982453372, - "total_bits": 3652411392, + "accuracy": 0.9803240355686285, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -11032,8 +11032,8 @@ } }, { - "accuracy": 0.9912623667332809, - "total_bits": 4098056192, + "accuracy": 0.9814538538630586, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -11081,8 +11081,8 @@ } }, { - "accuracy": 0.9951005806506146, - "total_bits": 4621411072, + "accuracy": 0.9881139059580164, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -11133,8 +11133,8 @@ } }, { - "accuracy": 0.9954893226531567, - "total_bits": 4737212416, + "accuracy": 0.9890534981823293, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -11185,8 +11185,8 @@ } }, { - "accuracy": 0.9958136630302761, - "total_bits": 5093868288, + "accuracy": 0.9905058142903727, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -11234,8 +11234,8 @@ } }, { - "accuracy": 0.9975408340178546, - "total_bits": 5824164608, + "accuracy": 0.9936518827817054, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -11277,8 +11277,8 @@ } }, { - "accuracy": 0.9977442574381712, - "total_bits": 5910044672, + "accuracy": 0.9942472535840352, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -11320,8 +11320,8 @@ } }, { - "accuracy": 0.9975322816171683, - "total_bits": 6006579968, + "accuracy": 0.9939190183649771, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -11372,8 +11372,8 @@ } }, { - "accuracy": 0.9978514857648406, - "total_bits": 6122381312, + "accuracy": 0.9946770146125345, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -11424,8 +11424,8 @@ } }, { - "accuracy": 0.9987620445936045, - "total_bits": 7391748864, + "accuracy": 0.9968968659377424, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -11476,8 +11476,8 @@ } }, { - "accuracy": 0.9988688116391131, - "total_bits": 7507550208, + "accuracy": 0.9973409070698835, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -11528,8 +11528,8 @@ } }, { - "accuracy": 0.999295369572792, - "total_bits": 8550425344, + "accuracy": 0.9982633208692278, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -11571,8 +11571,8 @@ } }, { - "accuracy": 0.9993209875246976, - "total_bits": 8877312000, + "accuracy": 0.998408360817848, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -11620,8 +11620,8 @@ } }, { - "accuracy": 0.9993698675716587, - "total_bits": 9674229760, + "accuracy": 0.9987385782515048, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -11666,8 +11666,8 @@ } }, { - "accuracy": 0.9996347270644037, - "total_bits": 11318396928, + "accuracy": 0.9995059050288546, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -11708,8 +11708,8 @@ ], "model.layers.6.self_attn": [ { - "accuracy": 0.9876233876566403, - "total_bits": 89141248, + "accuracy": 0.9690019220579416, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -11772,8 +11772,8 @@ } }, { - "accuracy": 0.988061324111186, - "total_bits": 91697152, + "accuracy": 0.9697185436962172, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -11836,8 +11836,8 @@ } }, { - "accuracy": 0.989029148069676, - "total_bits": 95234560, + "accuracy": 0.9735671730886679, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -11900,8 +11900,8 @@ } }, { - "accuracy": 0.990802888001781, - "total_bits": 111748096, + "accuracy": 0.9766488819150254, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -11964,8 +11964,8 @@ } }, { - "accuracy": 0.9932548563810997, - "total_bits": 132388864, + "accuracy": 0.9812204401532654, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -12028,8 +12028,8 @@ } }, { - "accuracy": 0.9933961878705304, - "total_bits": 132455936, + "accuracy": 0.9815429195587058, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -12092,8 +12092,8 @@ } }, { - "accuracy": 0.9943678066483699, - "total_bits": 169089024, + "accuracy": 0.9831198287865845, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -12144,8 +12144,8 @@ } }, { - "accuracy": 0.9945146123063751, - "total_bits": 169221632, + "accuracy": 0.9834546091442462, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -12196,8 +12196,8 @@ } }, { - "accuracy": 0.9939291632617824, - "total_bits": 170671104, + "accuracy": 0.9855192532122601, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -12248,8 +12248,8 @@ } }, { - "accuracy": 0.9941122072341386, - "total_bits": 173039616, + "accuracy": 0.9874286783160642, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -12300,8 +12300,8 @@ } }, { - "accuracy": 0.9960836011305219, - "total_bits": 174398976, + "accuracy": 0.9911587225142284, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -12364,8 +12364,8 @@ } }, { - "accuracy": 0.9962663647020236, - "total_bits": 175225856, + "accuracy": 0.9908629768906394, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -12428,8 +12428,8 @@ } }, { - "accuracy": 0.9963694706821116, - "total_bits": 178728960, + "accuracy": 0.9906613825005479, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -12489,8 +12489,8 @@ } }, { - "accuracy": 0.9964083754020976, - "total_bits": 181067776, + "accuracy": 0.9885504599078558, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -12550,8 +12550,8 @@ } }, { - "accuracy": 0.9974726435320918, - "total_bits": 219944960, + "accuracy": 0.9947751497093122, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -12611,8 +12611,8 @@ } }, { - "accuracy": 0.9975764221890131, - "total_bits": 223010816, + "accuracy": 0.9960408307561011, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -12672,8 +12672,8 @@ } }, { - "accuracy": 0.9975969253573567, - "total_bits": 252975104, + "accuracy": 0.9950802679559274, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -12724,8 +12724,8 @@ } }, { - "accuracy": 0.9977497810468776, - "total_bits": 265314304, + "accuracy": 0.9967451806041936, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -12776,8 +12776,8 @@ } }, { - "accuracy": 0.9980534856586019, - "total_bits": 336861184, + "accuracy": 0.9987512920761219, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -12830,8 +12830,8 @@ ], "model.layers.6.block_sparse_moe": [ { - "accuracy": 0.9872453521820717, - "total_bits": 3157926400, + "accuracy": 0.9719495508179534, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -12882,8 +12882,8 @@ } }, { - "accuracy": 0.9876829728018492, - "total_bits": 3268026880, + "accuracy": 0.9728600074304268, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -12934,8 +12934,8 @@ } }, { - "accuracy": 0.9891712504322641, - "total_bits": 3652411392, + "accuracy": 0.9769656403223053, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -12983,8 +12983,8 @@ } }, { - "accuracy": 0.9895036881207488, - "total_bits": 4098056192, + "accuracy": 0.9783206881547812, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -13032,8 +13032,8 @@ } }, { - "accuracy": 0.9940833922300953, - "total_bits": 4621411072, + "accuracy": 0.986019779680646, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -13084,8 +13084,8 @@ } }, { - "accuracy": 0.9945542939240113, - "total_bits": 4737212416, + "accuracy": 0.9871307343710214, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -13136,8 +13136,8 @@ } }, { - "accuracy": 0.9949629055918194, - "total_bits": 5093868288, + "accuracy": 0.9888885946274968, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -13185,8 +13185,8 @@ } }, { - "accuracy": 0.9970280108536826, - "total_bits": 5824164608, + "accuracy": 0.9925401268046699, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -13228,8 +13228,8 @@ } }, { - "accuracy": 0.9972737573698396, - "total_bits": 5910044672, + "accuracy": 0.9932433698850218, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -13271,8 +13271,8 @@ } }, { - "accuracy": 0.9970180811651517, - "total_bits": 6006579968, + "accuracy": 0.9928507838776568, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -13323,8 +13323,8 @@ } }, { - "accuracy": 0.9974053164041834, - "total_bits": 6122381312, + "accuracy": 0.9937446459807688, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -13375,8 +13375,8 @@ } }, { - "accuracy": 0.9985050232935464, - "total_bits": 7391748864, + "accuracy": 0.9963527670952317, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -13427,8 +13427,8 @@ } }, { - "accuracy": 0.9986295469352626, - "total_bits": 7507550208, + "accuracy": 0.9968749495492375, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -13479,8 +13479,8 @@ } }, { - "accuracy": 0.9991462525140378, - "total_bits": 8550425344, + "accuracy": 0.9979609537604119, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -13522,8 +13522,8 @@ } }, { - "accuracy": 0.9991776404967823, - "total_bits": 8877312000, + "accuracy": 0.9981297857484606, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -13571,8 +13571,8 @@ } }, { - "accuracy": 0.9992389850704058, - "total_bits": 9674229760, + "accuracy": 0.9985330067065661, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -13617,8 +13617,8 @@ } }, { - "accuracy": 0.9995527412611409, - "total_bits": 11318396928, + "accuracy": 0.9994221888437096, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -13659,8 +13659,8 @@ ], "model.layers.7.self_attn": [ { - "accuracy": 0.9777178156655282, - "total_bits": 89141248, + "accuracy": 0.957857305300422, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -13723,8 +13723,8 @@ } }, { - "accuracy": 0.9780107346596196, - "total_bits": 91697152, + "accuracy": 0.9585770930279978, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -13787,8 +13787,8 @@ } }, { - "accuracy": 0.9795490241376683, - "total_bits": 95234560, + "accuracy": 0.9611924847704358, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -13851,8 +13851,8 @@ } }, { - "accuracy": 0.9814328040229157, - "total_bits": 111748096, + "accuracy": 0.9643437009071931, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -13915,8 +13915,8 @@ } }, { - "accuracy": 0.9896556691965088, - "total_bits": 132388864, + "accuracy": 0.9767331214970909, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -13979,8 +13979,8 @@ } }, { - "accuracy": 0.9899860279401764, - "total_bits": 132455936, + "accuracy": 0.9764058004366234, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -14043,8 +14043,8 @@ } }, { - "accuracy": 0.9909620513790287, - "total_bits": 169089024, + "accuracy": 0.9793589925393462, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -14095,8 +14095,8 @@ } }, { - "accuracy": 0.9913532815407962, - "total_bits": 169221632, + "accuracy": 0.9790458497882355, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -14147,8 +14147,8 @@ } }, { - "accuracy": 0.9923842270218302, - "total_bits": 170671104, + "accuracy": 0.9818778289481997, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -14199,8 +14199,8 @@ } }, { - "accuracy": 0.99225776430103, - "total_bits": 173039616, + "accuracy": 0.9849749426357448, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -14251,8 +14251,8 @@ } }, { - "accuracy": 0.9944272831780836, - "total_bits": 174398976, + "accuracy": 0.990268444395042, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -14315,8 +14315,8 @@ } }, { - "accuracy": 0.9945553371217102, - "total_bits": 175225856, + "accuracy": 0.9911809298355365, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -14379,8 +14379,8 @@ } }, { - "accuracy": 0.9945277282386087, - "total_bits": 178728960, + "accuracy": 0.9911796378510189, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -14440,8 +14440,8 @@ } }, { - "accuracy": 0.9948925675707869, - "total_bits": 181067776, + "accuracy": 0.9929459544073325, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -14501,8 +14501,8 @@ } }, { - "accuracy": 0.9962359591154382, - "total_bits": 219944960, + "accuracy": 0.9950346473779064, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -14562,8 +14562,8 @@ } }, { - "accuracy": 0.9962418622453697, - "total_bits": 223010816, + "accuracy": 0.9960723795556987, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -14623,8 +14623,8 @@ } }, { - "accuracy": 0.9963856458780356, - "total_bits": 252975104, + "accuracy": 0.9956298000543029, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -14675,8 +14675,8 @@ } }, { - "accuracy": 0.9963844940211857, - "total_bits": 265314304, + "accuracy": 0.9969223435473396, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -14727,8 +14727,8 @@ } }, { - "accuracy": 0.9967591413733317, - "total_bits": 336861184, + "accuracy": 0.9988960422169839, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -14781,8 +14781,8 @@ ], "model.layers.7.block_sparse_moe": [ { - "accuracy": 0.9853268422884867, - "total_bits": 3157926400, + "accuracy": 0.9685781079751905, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -14833,8 +14833,8 @@ } }, { - "accuracy": 0.9858203498297371, - "total_bits": 3268026880, + "accuracy": 0.9695832498546224, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -14885,8 +14885,8 @@ } }, { - "accuracy": 0.9874842597055249, - "total_bits": 3652411392, + "accuracy": 0.9739879154367372, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -14934,8 +14934,8 @@ } }, { - "accuracy": 0.9878538625198416, - "total_bits": 4098056192, + "accuracy": 0.9754050033807289, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -14983,8 +14983,8 @@ } }, { - "accuracy": 0.9931694683327805, - "total_bits": 4621411072, + "accuracy": 0.9843613321136218, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -15035,8 +15035,8 @@ } }, { - "accuracy": 0.993732042668853, - "total_bits": 4737212416, + "accuracy": 0.9856292299809866, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -15087,8 +15087,8 @@ } }, { - "accuracy": 0.9941772690217476, - "total_bits": 5093868288, + "accuracy": 0.9874729241710156, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -15136,8 +15136,8 @@ } }, { - "accuracy": 0.9965812790469499, - "total_bits": 5824164608, + "accuracy": 0.9916937593297916, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -15179,8 +15179,8 @@ } }, { - "accuracy": 0.9968595500686206, - "total_bits": 5910044672, + "accuracy": 0.992469720171357, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -15222,8 +15222,8 @@ } }, { - "accuracy": 0.9965478475787677, - "total_bits": 6006579968, + "accuracy": 0.9919906738141435, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -15274,8 +15274,8 @@ } }, { - "accuracy": 0.9970077460311586, - "total_bits": 6122381312, + "accuracy": 0.9930083989893319, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -15326,8 +15326,8 @@ } }, { - "accuracy": 0.9982645384952775, - "total_bits": 7391748864, + "accuracy": 0.995905077765201, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -15378,8 +15378,8 @@ } }, { - "accuracy": 0.99840264350496, - "total_bits": 7507550208, + "accuracy": 0.9965048991871299, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -15430,8 +15430,8 @@ } }, { - "accuracy": 0.998996884587541, - "total_bits": 8550425344, + "accuracy": 0.9977153791769524, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -15473,8 +15473,8 @@ } }, { - "accuracy": 0.9990312092159002, - "total_bits": 8877312000, + "accuracy": 0.9978934594091697, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -15522,8 +15522,8 @@ } }, { - "accuracy": 0.9990968206329853, - "total_bits": 9674229760, + "accuracy": 0.998313895190222, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -15568,8 +15568,8 @@ } }, { - "accuracy": 0.9994494792717887, - "total_bits": 11318396928, + "accuracy": 0.999354508401666, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -15610,8 +15610,8 @@ ], "model.layers.8.self_attn": [ { - "accuracy": 0.975679850555025, - "total_bits": 89141248, + "accuracy": 0.9554507475695573, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -15674,8 +15674,8 @@ } }, { - "accuracy": 0.9764077493455261, - "total_bits": 91697152, + "accuracy": 0.9564762598020025, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -15738,8 +15738,8 @@ } }, { - "accuracy": 0.9773043629247695, - "total_bits": 95234560, + "accuracy": 0.956681863986887, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -15802,8 +15802,8 @@ } }, { - "accuracy": 0.9790577869862318, - "total_bits": 111748096, + "accuracy": 0.9602740391273983, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -15866,8 +15866,8 @@ } }, { - "accuracy": 0.9868759041419253, - "total_bits": 132388864, + "accuracy": 0.9736098177381791, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -15930,8 +15930,8 @@ } }, { - "accuracy": 0.9873604268068448, - "total_bits": 132455936, + "accuracy": 0.9749449636728968, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -15994,8 +15994,8 @@ } }, { - "accuracy": 0.9881445863284171, - "total_bits": 169089024, + "accuracy": 0.9758945000066888, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -16046,8 +16046,8 @@ } }, { - "accuracy": 0.9886909940978512, - "total_bits": 169221632, + "accuracy": 0.9775671299430542, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -16098,8 +16098,8 @@ } }, { - "accuracy": 0.9913531819474883, - "total_bits": 170671104, + "accuracy": 0.9813689626171254, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -16150,8 +16150,8 @@ } }, { - "accuracy": 0.9921035411243793, - "total_bits": 173039616, + "accuracy": 0.9827872099995147, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -16202,8 +16202,8 @@ } }, { - "accuracy": 0.9926168908423278, - "total_bits": 174398976, + "accuracy": 0.9870027104043402, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -16266,8 +16266,8 @@ } }, { - "accuracy": 0.992625526356278, - "total_bits": 175225856, + "accuracy": 0.9897745937778382, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -16330,8 +16330,8 @@ } }, { - "accuracy": 0.9927270227344707, - "total_bits": 178728960, + "accuracy": 0.9896325124136638, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -16391,8 +16391,8 @@ } }, { - "accuracy": 0.9930577417544555, - "total_bits": 181067776, + "accuracy": 0.9908259944277233, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -16452,8 +16452,8 @@ } }, { - "accuracy": 0.9939123782678507, - "total_bits": 219944960, + "accuracy": 0.9940543044140213, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -16513,8 +16513,8 @@ } }, { - "accuracy": 0.9940350227698218, - "total_bits": 223010816, + "accuracy": 0.9954926204190997, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -16574,8 +16574,8 @@ } }, { - "accuracy": 0.9940404299995862, - "total_bits": 252975104, + "accuracy": 0.9946865089877974, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -16626,8 +16626,8 @@ } }, { - "accuracy": 0.9942833143868484, - "total_bits": 265314304, + "accuracy": 0.9964768793979601, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -16678,8 +16678,8 @@ } }, { - "accuracy": 0.9948120584595017, - "total_bits": 336861184, + "accuracy": 0.9988221883068036, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -16732,8 +16732,8 @@ ], "model.layers.8.block_sparse_moe": [ { - "accuracy": 0.9838531234418042, - "total_bits": 3157926400, + "accuracy": 0.9658799633325543, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -16784,8 +16784,8 @@ } }, { - "accuracy": 0.9843875694205053, - "total_bits": 3268026880, + "accuracy": 0.9669509434897918, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -16836,8 +16836,8 @@ } }, { - "accuracy": 0.986291098408401, - "total_bits": 3652411392, + "accuracy": 0.9717928424361162, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -16885,8 +16885,8 @@ } }, { - "accuracy": 0.9867199782165699, - "total_bits": 4098056192, + "accuracy": 0.9732998886029236, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -16934,8 +16934,8 @@ } }, { - "accuracy": 0.9924800038570538, - "total_bits": 4621411072, + "accuracy": 0.983064649044536, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -16986,8 +16986,8 @@ } }, { - "accuracy": 0.9930973041628022, - "total_bits": 4737212416, + "accuracy": 0.9844203894899692, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -17038,8 +17038,8 @@ } }, { - "accuracy": 0.9936270018806681, - "total_bits": 5093868288, + "accuracy": 0.9864367330883397, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -17087,8 +17087,8 @@ } }, { - "accuracy": 0.9962298153404845, - "total_bits": 5824164608, + "accuracy": 0.9910269574029371, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -17130,8 +17130,8 @@ } }, { - "accuracy": 0.9965413579921005, - "total_bits": 5910044672, + "accuracy": 0.9918464016373036, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -17173,8 +17173,8 @@ } }, { - "accuracy": 0.9962059839890571, - "total_bits": 6006579968, + "accuracy": 0.9913276129154838, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -17225,8 +17225,8 @@ } }, { - "accuracy": 0.9967013418645365, - "total_bits": 6122381312, + "accuracy": 0.9924205869610887, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -17277,8 +17277,8 @@ } }, { - "accuracy": 0.9980905673874076, - "total_bits": 7391748864, + "accuracy": 0.9955648497925722, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -17329,8 +17329,8 @@ } }, { - "accuracy": 0.9982411147138919, - "total_bits": 7507550208, + "accuracy": 0.9962060597645177, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -17381,8 +17381,8 @@ } }, { - "accuracy": 0.9988991437130608, - "total_bits": 8550425344, + "accuracy": 0.9975280500948429, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -17424,8 +17424,8 @@ } }, { - "accuracy": 0.998938086853741, - "total_bits": 8877312000, + "accuracy": 0.9977186417909252, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -17473,8 +17473,8 @@ } }, { - "accuracy": 0.9990173240694276, - "total_bits": 9674229760, + "accuracy": 0.9981851406464557, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -17519,8 +17519,8 @@ } }, { - "accuracy": 0.9994025246232923, - "total_bits": 11318396928, + "accuracy": 0.9993043745289469, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -17561,8 +17561,8 @@ ], "model.layers.9.self_attn": [ { - "accuracy": 0.9796696860576048, - "total_bits": 89141248, + "accuracy": 0.9566532354801893, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -17625,8 +17625,8 @@ } }, { - "accuracy": 0.9799871915020049, - "total_bits": 91697152, + "accuracy": 0.9572839143220335, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -17689,8 +17689,8 @@ } }, { - "accuracy": 0.9807842435548082, - "total_bits": 95234560, + "accuracy": 0.9588356871972792, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -17753,8 +17753,8 @@ } }, { - "accuracy": 0.9832563039381057, - "total_bits": 111748096, + "accuracy": 0.9632478174753487, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -17817,8 +17817,8 @@ } }, { - "accuracy": 0.9909234800725244, - "total_bits": 132388864, + "accuracy": 0.9794653536810074, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -17881,8 +17881,8 @@ } }, { - "accuracy": 0.9910329449630808, - "total_bits": 132455936, + "accuracy": 0.9808820870821364, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -17945,8 +17945,8 @@ } }, { - "accuracy": 0.9931442203233019, - "total_bits": 169089024, + "accuracy": 0.9836317642766517, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -17997,8 +17997,8 @@ } }, { - "accuracy": 0.9932669021072797, - "total_bits": 169221632, + "accuracy": 0.9854700443975162, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -18049,8 +18049,8 @@ } }, { - "accuracy": 0.9942199437937234, - "total_bits": 170671104, + "accuracy": 0.9877705864346353, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -18101,8 +18101,8 @@ } }, { - "accuracy": 0.9949501430091914, - "total_bits": 173039616, + "accuracy": 0.9881930210540304, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -18153,8 +18153,8 @@ } }, { - "accuracy": 0.9957104804780101, - "total_bits": 174398976, + "accuracy": 0.9891585698787821, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -18217,8 +18217,8 @@ } }, { - "accuracy": 0.9960011131479405, - "total_bits": 175225856, + "accuracy": 0.9898611327080289, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -18281,8 +18281,8 @@ } }, { - "accuracy": 0.9961391838733107, - "total_bits": 178728960, + "accuracy": 0.9899557712778915, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -18342,8 +18342,8 @@ } }, { - "accuracy": 0.9964529052958824, - "total_bits": 181067776, + "accuracy": 0.991623389403685, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -18403,8 +18403,8 @@ } }, { - "accuracy": 0.9971372256550239, - "total_bits": 219944960, + "accuracy": 0.9948117253661621, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -18464,8 +18464,8 @@ } }, { - "accuracy": 0.9972103480395162, - "total_bits": 223010816, + "accuracy": 0.995495641749585, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -18525,8 +18525,8 @@ } }, { - "accuracy": 0.9974507319420809, - "total_bits": 252975104, + "accuracy": 0.9956566263790592, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -18577,8 +18577,8 @@ } }, { - "accuracy": 0.9976210370805347, - "total_bits": 265314304, + "accuracy": 0.9967000259312044, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -18629,8 +18629,8 @@ } }, { - "accuracy": 0.9984747780981706, - "total_bits": 336861184, + "accuracy": 0.9987194463028572, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -18683,8 +18683,8 @@ ], "model.layers.9.block_sparse_moe": [ { - "accuracy": 0.9820934003801085, - "total_bits": 3157926400, + "accuracy": 0.9633762969169766, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -18735,8 +18735,8 @@ } }, { - "accuracy": 0.98268577887211, - "total_bits": 3268026880, + "accuracy": 0.9645368231285829, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -18787,8 +18787,8 @@ } }, { - "accuracy": 0.9847539598704316, - "total_bits": 3652411392, + "accuracy": 0.9697590936266351, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -18836,8 +18836,8 @@ } }, { - "accuracy": 0.9852285768720321, - "total_bits": 4098056192, + "accuracy": 0.9714154465764295, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -18885,8 +18885,8 @@ } }, { - "accuracy": 0.9916500639519654, - "total_bits": 4621411072, + "accuracy": 0.9817765061598038, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -18937,8 +18937,8 @@ } }, { - "accuracy": 0.9923229153209832, - "total_bits": 4737212416, + "accuracy": 0.983244254544843, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -18989,8 +18989,8 @@ } }, { - "accuracy": 0.9929136104765348, - "total_bits": 5093868288, + "accuracy": 0.9854704850149574, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -19038,8 +19038,8 @@ } }, { - "accuracy": 0.9957746317086276, - "total_bits": 5824164608, + "accuracy": 0.9903233571676537, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -19081,8 +19081,8 @@ } }, { - "accuracy": 0.9960611048445571, - "total_bits": 5910044672, + "accuracy": 0.9911865668254904, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -19124,8 +19124,8 @@ } }, { - "accuracy": 0.9957884931354783, - "total_bits": 6006579968, + "accuracy": 0.9906698129052529, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -19176,8 +19176,8 @@ } }, { - "accuracy": 0.9963358733948553, - "total_bits": 6122381312, + "accuracy": 0.9918353626417229, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -19228,8 +19228,8 @@ } }, { - "accuracy": 0.9978746475972002, - "total_bits": 7391748864, + "accuracy": 0.9952269838395296, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -19280,8 +19280,8 @@ } }, { - "accuracy": 0.9980394233498373, - "total_bits": 7507550208, + "accuracy": 0.9959103314795357, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -19332,8 +19332,8 @@ } }, { - "accuracy": 0.9987561759517121, - "total_bits": 8550425344, + "accuracy": 0.9973359878058545, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -19375,8 +19375,8 @@ } }, { - "accuracy": 0.9988204771470919, - "total_bits": 8877312000, + "accuracy": 0.997540746066079, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -19424,8 +19424,8 @@ } }, { - "accuracy": 0.9989088426336821, - "total_bits": 9674229760, + "accuracy": 0.9980464665077307, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -19470,8 +19470,8 @@ } }, { - "accuracy": 0.999330209382606, - "total_bits": 11318396928, + "accuracy": 0.9992403864898733, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -19512,8 +19512,8 @@ ], "model.layers.10.self_attn": [ { - "accuracy": 0.9764574829023331, - "total_bits": 89141248, + "accuracy": 0.9507078821770847, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -19576,8 +19576,8 @@ } }, { - "accuracy": 0.9770286994753405, - "total_bits": 91697152, + "accuracy": 0.9515647518564947, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -19640,8 +19640,8 @@ } }, { - "accuracy": 0.9782601614715531, - "total_bits": 95234560, + "accuracy": 0.9547730306512676, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -19704,8 +19704,8 @@ } }, { - "accuracy": 0.9811628537718207, - "total_bits": 111748096, + "accuracy": 0.959696608770173, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -19768,8 +19768,8 @@ } }, { - "accuracy": 0.9855241021723486, - "total_bits": 132388864, + "accuracy": 0.9717026404687203, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -19832,8 +19832,8 @@ } }, { - "accuracy": 0.986181124986615, - "total_bits": 132455936, + "accuracy": 0.9729299140744843, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -19896,8 +19896,8 @@ } }, { - "accuracy": 0.9874349442543462, - "total_bits": 169089024, + "accuracy": 0.9750564605055843, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -19948,8 +19948,8 @@ } }, { - "accuracy": 0.988070152583532, - "total_bits": 169221632, + "accuracy": 0.9765761711169034, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -20000,8 +20000,8 @@ } }, { - "accuracy": 0.9912404706701636, - "total_bits": 170671104, + "accuracy": 0.9835318427212769, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -20052,8 +20052,8 @@ } }, { - "accuracy": 0.9918636633956339, - "total_bits": 173039616, + "accuracy": 0.9846575588162523, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -20104,8 +20104,8 @@ } }, { - "accuracy": 0.9927899185277056, - "total_bits": 174398976, + "accuracy": 0.9860518514615251, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -20168,8 +20168,8 @@ } }, { - "accuracy": 0.9931252904061694, - "total_bits": 175225856, + "accuracy": 0.9877015383681282, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -20232,8 +20232,8 @@ } }, { - "accuracy": 0.9931396291649435, - "total_bits": 178728960, + "accuracy": 0.987736590555869, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -20293,8 +20293,8 @@ } }, { - "accuracy": 0.9933806757326238, - "total_bits": 181067776, + "accuracy": 0.9898313195153605, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -20354,8 +20354,8 @@ } }, { - "accuracy": 0.9947485265147407, - "total_bits": 219944960, + "accuracy": 0.9924223404668737, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -20415,8 +20415,8 @@ } }, { - "accuracy": 0.99490431684535, - "total_bits": 223010816, + "accuracy": 0.9954973050407716, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -20476,8 +20476,8 @@ } }, { - "accuracy": 0.9949823204660788, - "total_bits": 252975104, + "accuracy": 0.9931076619541273, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -20528,8 +20528,8 @@ } }, { - "accuracy": 0.9950790845323354, - "total_bits": 265314304, + "accuracy": 0.996783760812832, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -20580,8 +20580,8 @@ } }, { - "accuracy": 0.9964772042440018, - "total_bits": 336861184, + "accuracy": 0.9984850724722492, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -20634,8 +20634,8 @@ ], "model.layers.10.block_sparse_moe": [ { - "accuracy": 0.9797901315614581, - "total_bits": 3157926400, + "accuracy": 0.9600282781175338, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -20686,8 +20686,8 @@ } }, { - "accuracy": 0.9805119219236076, - "total_bits": 3268026880, + "accuracy": 0.9613443473353982, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -20738,8 +20738,8 @@ } }, { - "accuracy": 0.9830116357188672, - "total_bits": 3652411392, + "accuracy": 0.967422500019893, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -20787,8 +20787,8 @@ } }, { - "accuracy": 0.9835851672105491, - "total_bits": 4098056192, + "accuracy": 0.9693920565769076, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -20836,8 +20836,8 @@ } }, { - "accuracy": 0.9905841624713503, - "total_bits": 4621411072, + "accuracy": 0.9799457186891232, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -20888,8 +20888,8 @@ } }, { - "accuracy": 0.9913550375204068, - "total_bits": 4737212416, + "accuracy": 0.9815951659111306, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -20940,8 +20940,8 @@ } }, { - "accuracy": 0.9920729999430478, - "total_bits": 5093868288, + "accuracy": 0.9841899939783616, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -20989,8 +20989,8 @@ } }, { - "accuracy": 0.9952271341171581, - "total_bits": 5824164608, + "accuracy": 0.9892366192216286, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -21032,8 +21032,8 @@ } }, { - "accuracy": 0.9956401876115706, - "total_bits": 5910044672, + "accuracy": 0.9902913318219362, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -21075,8 +21075,8 @@ } }, { - "accuracy": 0.9952422082715202, - "total_bits": 6006579968, + "accuracy": 0.9897105138079496, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -21127,8 +21127,8 @@ } }, { - "accuracy": 0.9958630765322596, - "total_bits": 6122381312, + "accuracy": 0.9910190774025978, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -21179,8 +21179,8 @@ } }, { - "accuracy": 0.9976025679206941, - "total_bits": 7391748864, + "accuracy": 0.9947312080912525, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -21231,8 +21231,8 @@ } }, { - "accuracy": 0.9977857030025916, - "total_bits": 7507550208, + "accuracy": 0.9955035212879011, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -21283,8 +21283,8 @@ } }, { - "accuracy": 0.9986054354449152, - "total_bits": 8550425344, + "accuracy": 0.9970415145435254, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -21326,8 +21326,8 @@ } }, { - "accuracy": 0.998663479447714, - "total_bits": 8877312000, + "accuracy": 0.9972939756917185, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -21375,8 +21375,8 @@ } }, { - "accuracy": 0.9987716376417666, - "total_bits": 9674229760, + "accuracy": 0.9979028383731929, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -21421,8 +21421,8 @@ } }, { - "accuracy": 0.9992389820363314, - "total_bits": 11318396928, + "accuracy": 0.9991707487906751, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -21463,8 +21463,8 @@ ], "model.layers.11.self_attn": [ { - "accuracy": 0.9733715525362641, - "total_bits": 89141248, + "accuracy": 0.9480585553101264, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -21527,8 +21527,8 @@ } }, { - "accuracy": 0.9738407410914078, - "total_bits": 91697152, + "accuracy": 0.9492768661584705, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -21591,8 +21591,8 @@ } }, { - "accuracy": 0.9747383556095883, - "total_bits": 95234560, + "accuracy": 0.9494522007298656, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -21655,8 +21655,8 @@ } }, { - "accuracy": 0.9781599895795807, - "total_bits": 111748096, + "accuracy": 0.9558493265649304, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -21719,8 +21719,8 @@ } }, { - "accuracy": 0.9860172977205366, - "total_bits": 132388864, + "accuracy": 0.9697391738300212, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -21783,8 +21783,8 @@ } }, { - "accuracy": 0.9862418688717298, - "total_bits": 132455936, + "accuracy": 0.9706478255102411, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -21847,8 +21847,8 @@ } }, { - "accuracy": 0.988617202907335, - "total_bits": 169089024, + "accuracy": 0.9739242826180998, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -21899,8 +21899,8 @@ } }, { - "accuracy": 0.9888195842504501, - "total_bits": 169221632, + "accuracy": 0.9750338272715453, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -21951,8 +21951,8 @@ } }, { - "accuracy": 0.9897305784979835, - "total_bits": 170671104, + "accuracy": 0.9748053156363312, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -22003,8 +22003,8 @@ } }, { - "accuracy": 0.9905224278336391, - "total_bits": 173039616, + "accuracy": 0.9809897498053033, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -22055,8 +22055,8 @@ } }, { - "accuracy": 0.9928549988544546, - "total_bits": 174398976, + "accuracy": 0.9847488868399523, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -22119,8 +22119,8 @@ } }, { - "accuracy": 0.9931158214749303, - "total_bits": 175225856, + "accuracy": 0.9877576902508736, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -22183,8 +22183,8 @@ } }, { - "accuracy": 0.9932530181540642, - "total_bits": 178728960, + "accuracy": 0.9879636259720428, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -22244,8 +22244,8 @@ } }, { - "accuracy": 0.9935169582313392, - "total_bits": 181067776, + "accuracy": 0.9876856952760136, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -22305,8 +22305,8 @@ } }, { - "accuracy": 0.994473487167852, - "total_bits": 219944960, + "accuracy": 0.993241044554452, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -22366,8 +22366,8 @@ } }, { - "accuracy": 0.9947877723607235, - "total_bits": 223010816, + "accuracy": 0.9950156124541536, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -22427,8 +22427,8 @@ } }, { - "accuracy": 0.9947857173101511, - "total_bits": 252975104, + "accuracy": 0.9941857220619568, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -22479,8 +22479,8 @@ } }, { - "accuracy": 0.9950033594213892, - "total_bits": 265314304, + "accuracy": 0.9966756289140903, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -22531,8 +22531,8 @@ } }, { - "accuracy": 0.9958009064284852, - "total_bits": 336861184, + "accuracy": 0.9983837441122887, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -22585,8 +22585,8 @@ ], "model.layers.11.block_sparse_moe": [ { - "accuracy": 0.9774733807425946, - "total_bits": 3157926400, + "accuracy": 0.9571995388832875, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -22637,8 +22637,8 @@ } }, { - "accuracy": 0.9782874267548323, - "total_bits": 3268026880, + "accuracy": 0.9586562415352091, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -22689,8 +22689,8 @@ } }, { - "accuracy": 0.9811127801658586, - "total_bits": 3652411392, + "accuracy": 0.9651775022794027, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -22738,8 +22738,8 @@ } }, { - "accuracy": 0.9817700608982705, - "total_bits": 4098056192, + "accuracy": 0.9673221578414086, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -22787,8 +22787,8 @@ } }, { - "accuracy": 0.9894503048853949, - "total_bits": 4621411072, + "accuracy": 0.9783728326437995, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -22839,8 +22839,8 @@ } }, { - "accuracy": 0.9903252185613383, - "total_bits": 4737212416, + "accuracy": 0.9801771265338175, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -22891,8 +22891,8 @@ } }, { - "accuracy": 0.9911608938127756, - "total_bits": 5093868288, + "accuracy": 0.9830119757825742, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -22940,8 +22940,8 @@ } }, { - "accuracy": 0.994622046360746, - "total_bits": 5824164608, + "accuracy": 0.9883486347680446, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -22983,8 +22983,8 @@ } }, { - "accuracy": 0.9950964349554852, - "total_bits": 5910044672, + "accuracy": 0.9895116694970056, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -23026,8 +23026,8 @@ } }, { - "accuracy": 0.9946644001465756, - "total_bits": 6006579968, + "accuracy": 0.9888882472296245, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -23078,8 +23078,8 @@ } }, { - "accuracy": 0.9953678465099074, - "total_bits": 6122381312, + "accuracy": 0.9903137713408796, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -23130,8 +23130,8 @@ } }, { - "accuracy": 0.9973084576195106, - "total_bits": 7391748864, + "accuracy": 0.9943096857678029, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -23182,8 +23182,8 @@ } }, { - "accuracy": 0.9975077644194243, - "total_bits": 7507550208, + "accuracy": 0.9951471788881463, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -23234,8 +23234,8 @@ } }, { - "accuracy": 0.9984237869575736, - "total_bits": 8550425344, + "accuracy": 0.9967950813334028, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -23277,8 +23277,8 @@ } }, { - "accuracy": 0.9984905631499714, - "total_bits": 8877312000, + "accuracy": 0.9970782165037235, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -23326,8 +23326,8 @@ } }, { - "accuracy": 0.9986156240120181, - "total_bits": 9674229760, + "accuracy": 0.9977483126604056, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -23372,8 +23372,8 @@ } }, { - "accuracy": 0.9991250445018522, - "total_bits": 11318396928, + "accuracy": 0.9990916172873767, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -23414,8 +23414,8 @@ ], "model.layers.12.self_attn": [ { - "accuracy": 0.9708089563064277, - "total_bits": 89141248, + "accuracy": 0.9407800055923872, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -23478,8 +23478,8 @@ } }, { - "accuracy": 0.9723074191715568, - "total_bits": 91697152, + "accuracy": 0.9422117535723373, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -23542,8 +23542,8 @@ } }, { - "accuracy": 0.9737873944686726, - "total_bits": 95234560, + "accuracy": 0.945788890356198, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -23606,8 +23606,8 @@ } }, { - "accuracy": 0.9774536238983274, - "total_bits": 111748096, + "accuracy": 0.9518155162222683, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -23670,8 +23670,8 @@ } }, { - "accuracy": 0.9839307117508724, - "total_bits": 132388864, + "accuracy": 0.9639137858757749, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -23734,8 +23734,8 @@ } }, { - "accuracy": 0.9843092535738833, - "total_bits": 132455936, + "accuracy": 0.9640667054336518, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -23798,8 +23798,8 @@ } }, { - "accuracy": 0.9868137129815295, - "total_bits": 169089024, + "accuracy": 0.9678705315163825, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -23850,8 +23850,8 @@ } }, { - "accuracy": 0.9872112587909214, - "total_bits": 169221632, + "accuracy": 0.9681359695096035, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -23902,8 +23902,8 @@ } }, { - "accuracy": 0.988186648930423, - "total_bits": 170671104, + "accuracy": 0.9707617083040532, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -23954,8 +23954,8 @@ } }, { - "accuracy": 0.9895867514424026, - "total_bits": 173039616, + "accuracy": 0.9710851081763394, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -24006,8 +24006,8 @@ } }, { - "accuracy": 0.9910167078778613, - "total_bits": 174398976, + "accuracy": 0.9831977170251776, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -24070,8 +24070,8 @@ } }, { - "accuracy": 0.9915765909245238, - "total_bits": 175225856, + "accuracy": 0.985465218371246, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -24134,8 +24134,8 @@ } }, { - "accuracy": 0.9916286769148428, - "total_bits": 178728960, + "accuracy": 0.9852282966894563, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -24195,8 +24195,8 @@ } }, { - "accuracy": 0.9923267834819853, - "total_bits": 181067776, + "accuracy": 0.9863889662083238, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -24256,8 +24256,8 @@ } }, { - "accuracy": 0.9936101045750547, - "total_bits": 219944960, + "accuracy": 0.9919001725429553, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -24317,8 +24317,8 @@ } }, { - "accuracy": 0.9938472589128651, - "total_bits": 223010816, + "accuracy": 0.9927827261271887, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -24378,8 +24378,8 @@ } }, { - "accuracy": 0.9939423508185428, - "total_bits": 252975104, + "accuracy": 0.9928575994854327, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -24430,8 +24430,8 @@ } }, { - "accuracy": 0.9942719180544373, - "total_bits": 265314304, + "accuracy": 0.9945695431088097, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -24482,8 +24482,8 @@ } }, { - "accuracy": 0.9941800113883801, - "total_bits": 336861184, + "accuracy": 0.9978717267968022, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -24536,8 +24536,8 @@ ], "model.layers.12.block_sparse_moe": [ { - "accuracy": 0.9757613015826792, - "total_bits": 3157926400, + "accuracy": 0.9556598619674332, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -24588,8 +24588,8 @@ } }, { - "accuracy": 0.9766764892265201, - "total_bits": 3268026880, + "accuracy": 0.9572261371649802, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -24640,8 +24640,8 @@ } }, { - "accuracy": 0.979647456551902, - "total_bits": 3652411392, + "accuracy": 0.9637906584539451, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -24689,8 +24689,8 @@ } }, { - "accuracy": 0.9803361588856205, - "total_bits": 4098056192, + "accuracy": 0.965922221832443, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -24738,8 +24738,8 @@ } }, { - "accuracy": 0.988618680567015, - "total_bits": 4621411072, + "accuracy": 0.9776607557141688, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -24790,8 +24790,8 @@ } }, { - "accuracy": 0.9895875527290627, - "total_bits": 4737212416, + "accuracy": 0.9795251938339788, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -24842,8 +24842,8 @@ } }, { - "accuracy": 0.9904635529965162, - "total_bits": 5093868288, + "accuracy": 0.9823558143398259, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -24891,8 +24891,8 @@ } }, { - "accuracy": 0.9942064118513372, - "total_bits": 5824164608, + "accuracy": 0.9880022228026064, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -24934,8 +24934,8 @@ } }, { - "accuracy": 0.9947184650809504, - "total_bits": 5910044672, + "accuracy": 0.9891640619607642, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -24977,8 +24977,8 @@ } }, { - "accuracy": 0.9942244723206386, - "total_bits": 6006579968, + "accuracy": 0.988511429328355, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -25029,8 +25029,8 @@ } }, { - "accuracy": 0.9949943932588212, - "total_bits": 6122381312, + "accuracy": 0.9899759060499491, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -25081,8 +25081,8 @@ } }, { - "accuracy": 0.9970723112928681, - "total_bits": 7391748864, + "accuracy": 0.9941097390910727, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -25133,8 +25133,8 @@ } }, { - "accuracy": 0.9972957612917526, - "total_bits": 7507550208, + "accuracy": 0.9949719366923091, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -25185,8 +25185,8 @@ } }, { - "accuracy": 0.9982628588550142, - "total_bits": 8550425344, + "accuracy": 0.9966843631300435, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -25228,8 +25228,8 @@ } }, { - "accuracy": 0.9983614889279124, - "total_bits": 8877312000, + "accuracy": 0.9969654065207578, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -25277,8 +25277,8 @@ } }, { - "accuracy": 0.9984910816856427, - "total_bits": 9674229760, + "accuracy": 0.9976198102085618, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -25323,8 +25323,8 @@ } }, { - "accuracy": 0.9990406542492565, - "total_bits": 11318396928, + "accuracy": 0.9990634609075641, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -25365,8 +25365,8 @@ ], "model.layers.13.self_attn": [ { - "accuracy": 0.9707584965508431, - "total_bits": 89141248, + "accuracy": 0.9459845565143041, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -25429,8 +25429,8 @@ } }, { - "accuracy": 0.9718851272482425, - "total_bits": 91697152, + "accuracy": 0.948091006197501, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -25493,8 +25493,8 @@ } }, { - "accuracy": 0.9739969543879852, - "total_bits": 95234560, + "accuracy": 0.951265208888799, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -25557,8 +25557,8 @@ } }, { - "accuracy": 0.9790830040583387, - "total_bits": 111748096, + "accuracy": 0.9596501993364654, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -25621,8 +25621,8 @@ } }, { - "accuracy": 0.9849753058515489, - "total_bits": 132388864, + "accuracy": 0.9724868709745351, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -25685,8 +25685,8 @@ } }, { - "accuracy": 0.9850553942960687, - "total_bits": 132455936, + "accuracy": 0.9729858353093732, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -25749,8 +25749,8 @@ } }, { - "accuracy": 0.98919587029377, - "total_bits": 169089024, + "accuracy": 0.979647349944571, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -25801,8 +25801,8 @@ } }, { - "accuracy": 0.9892737554037012, - "total_bits": 169221632, + "accuracy": 0.9802964030823205, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -25853,8 +25853,8 @@ } }, { - "accuracy": 0.9920541802130174, - "total_bits": 170671104, + "accuracy": 0.9844908807135653, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -25905,8 +25905,8 @@ } }, { - "accuracy": 0.9924534329620656, - "total_bits": 173039616, + "accuracy": 0.981905620341422, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -25957,8 +25957,8 @@ } }, { - "accuracy": 0.9928897241479717, - "total_bits": 174398976, + "accuracy": 0.9862219468341209, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -26021,8 +26021,8 @@ } }, { - "accuracy": 0.9933642201940529, - "total_bits": 175225856, + "accuracy": 0.9863935713801766, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -26085,8 +26085,8 @@ } }, { - "accuracy": 0.9936873932019807, - "total_bits": 178728960, + "accuracy": 0.9867382444354007, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -26146,8 +26146,8 @@ } }, { - "accuracy": 0.9941226172668394, - "total_bits": 181067776, + "accuracy": 0.9858805107505759, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -26207,8 +26207,8 @@ } }, { - "accuracy": 0.995746691187378, - "total_bits": 219944960, + "accuracy": 0.9919062872140785, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -26268,8 +26268,8 @@ } }, { - "accuracy": 0.9959273100685095, - "total_bits": 223010816, + "accuracy": 0.9923584465577733, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -26329,8 +26329,8 @@ } }, { - "accuracy": 0.9964019657491008, - "total_bits": 252975104, + "accuracy": 0.9930100588317146, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -26381,8 +26381,8 @@ } }, { - "accuracy": 0.9965225748310331, - "total_bits": 265314304, + "accuracy": 0.9934999739780324, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -26433,8 +26433,8 @@ } }, { - "accuracy": 0.9971891410241369, - "total_bits": 336861184, + "accuracy": 0.9982385287094075, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -26487,8 +26487,8 @@ ], "model.layers.13.block_sparse_moe": [ { - "accuracy": 0.9721719652879983, - "total_bits": 3157926400, + "accuracy": 0.950018395204097, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -26539,8 +26539,8 @@ } }, { - "accuracy": 0.9732124981237575, - "total_bits": 3268026880, + "accuracy": 0.9517145716235973, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -26591,8 +26591,8 @@ } }, { - "accuracy": 0.9765201359987259, - "total_bits": 3652411392, + "accuracy": 0.9592050902429037, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -26640,8 +26640,8 @@ } }, { - "accuracy": 0.9772818417986855, - "total_bits": 4098056192, + "accuracy": 0.9617277451907285, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -26689,8 +26689,8 @@ } }, { - "accuracy": 0.9869125465629622, - "total_bits": 4621411072, + "accuracy": 0.9745810469903518, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -26741,8 +26741,8 @@ } }, { - "accuracy": 0.9880268336855806, - "total_bits": 4737212416, + "accuracy": 0.9767066062486265, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -26793,8 +26793,8 @@ } }, { - "accuracy": 0.9889790539164096, - "total_bits": 5093868288, + "accuracy": 0.980006101570325, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -26842,8 +26842,8 @@ } }, { - "accuracy": 0.9933497479651123, - "total_bits": 5824164608, + "accuracy": 0.9863103156239958, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -26885,8 +26885,8 @@ } }, { - "accuracy": 0.993949091876857, - "total_bits": 5910044672, + "accuracy": 0.9876835544419009, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -26928,8 +26928,8 @@ } }, { - "accuracy": 0.9933805078617297, - "total_bits": 6006579968, + "accuracy": 0.9869514503952814, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -26980,8 +26980,8 @@ } }, { - "accuracy": 0.9942750816699117, - "total_bits": 6122381312, + "accuracy": 0.9886369838495739, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -27032,8 +27032,8 @@ } }, { - "accuracy": 0.9966737050999654, - "total_bits": 7391748864, + "accuracy": 0.9933367443882162, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -27084,8 +27084,8 @@ } }, { - "accuracy": 0.9969529492664151, - "total_bits": 7507550208, + "accuracy": 0.9943158307214617, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -27136,8 +27136,8 @@ } }, { - "accuracy": 0.9980733398988377, - "total_bits": 8550425344, + "accuracy": 0.9962551453572814, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -27179,8 +27179,8 @@ } }, { - "accuracy": 0.998153586697299, - "total_bits": 8877312000, + "accuracy": 0.9965825055114692, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -27228,8 +27228,8 @@ } }, { - "accuracy": 0.9982957493411959, - "total_bits": 9674229760, + "accuracy": 0.9973369745457603, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -27274,8 +27274,8 @@ } }, { - "accuracy": 0.9989663626547554, - "total_bits": 11318396928, + "accuracy": 0.9989629722786049, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -27316,8 +27316,8 @@ ], "model.layers.14.self_attn": [ { - "accuracy": 0.9641065050382167, - "total_bits": 89141248, + "accuracy": 0.9401806449168362, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -27380,8 +27380,8 @@ } }, { - "accuracy": 0.9657321753911674, - "total_bits": 91697152, + "accuracy": 0.9425502150552347, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -27444,8 +27444,8 @@ } }, { - "accuracy": 0.968136184499599, - "total_bits": 95234560, + "accuracy": 0.9461621883674525, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -27508,8 +27508,8 @@ } }, { - "accuracy": 0.9741643202723935, - "total_bits": 111748096, + "accuracy": 0.9560625343583524, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -27572,8 +27572,8 @@ } }, { - "accuracy": 0.9829264268628322, - "total_bits": 132388864, + "accuracy": 0.9673867629608139, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -27636,8 +27636,8 @@ } }, { - "accuracy": 0.9831782493856736, - "total_bits": 132455936, + "accuracy": 0.9697604862158187, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -27700,8 +27700,8 @@ } }, { - "accuracy": 0.9882475630729459, - "total_bits": 169089024, + "accuracy": 0.9752689408196602, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -27752,8 +27752,8 @@ } }, { - "accuracy": 0.988559122721199, - "total_bits": 169221632, + "accuracy": 0.9782839452091139, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -27804,8 +27804,8 @@ } }, { - "accuracy": 0.989835862768814, - "total_bits": 170671104, + "accuracy": 0.979090646491386, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -27856,8 +27856,8 @@ } }, { - "accuracy": 0.9902668939903378, - "total_bits": 173039616, + "accuracy": 0.9773875969403889, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -27908,8 +27908,8 @@ } }, { - "accuracy": 0.9911461090086959, - "total_bits": 174398976, + "accuracy": 0.9848465789691545, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -27972,8 +27972,8 @@ } }, { - "accuracy": 0.9917607354291249, - "total_bits": 175225856, + "accuracy": 0.9863923994271317, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -28036,8 +28036,8 @@ } }, { - "accuracy": 0.992074178357143, - "total_bits": 178728960, + "accuracy": 0.9863438932516146, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -28097,8 +28097,8 @@ } }, { - "accuracy": 0.9927609959268011, - "total_bits": 181067776, + "accuracy": 0.9881843874900369, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -28158,8 +28158,8 @@ } }, { - "accuracy": 0.9951277840009425, - "total_bits": 219944960, + "accuracy": 0.9918412281767814, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -28219,8 +28219,8 @@ } }, { - "accuracy": 0.9955333441030234, - "total_bits": 223010816, + "accuracy": 0.9929877627364476, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -28280,8 +28280,8 @@ } }, { - "accuracy": 0.9959065288421698, - "total_bits": 252975104, + "accuracy": 0.9933236322831362, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -28332,8 +28332,8 @@ } }, { - "accuracy": 0.9962801556539489, - "total_bits": 265314304, + "accuracy": 0.9948037892972934, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -28384,8 +28384,8 @@ } }, { - "accuracy": 0.996871530849603, - "total_bits": 336861184, + "accuracy": 0.9979686091755866, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -28438,8 +28438,8 @@ ], "model.layers.14.block_sparse_moe": [ { - "accuracy": 0.9698167688911781, - "total_bits": 3157926400, + "accuracy": 0.9483937787590548, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -28490,8 +28490,8 @@ } }, { - "accuracy": 0.9709328467724845, - "total_bits": 3268026880, + "accuracy": 0.9501248945016414, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -28542,8 +28542,8 @@ } }, { - "accuracy": 0.9745238845935091, - "total_bits": 3652411392, + "accuracy": 0.9577048811479472, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -28591,8 +28591,8 @@ } }, { - "accuracy": 0.9753873583395034, - "total_bits": 4098056192, + "accuracy": 0.9602909557870589, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -28640,8 +28640,8 @@ } }, { - "accuracy": 0.9857487074332312, - "total_bits": 4621411072, + "accuracy": 0.9738168045005295, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -28692,8 +28692,8 @@ } }, { - "accuracy": 0.9869400708703324, - "total_bits": 4737212416, + "accuracy": 0.9760183882026467, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -28744,8 +28744,8 @@ } }, { - "accuracy": 0.9880210132687353, - "total_bits": 5093868288, + "accuracy": 0.9793616587412544, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -28793,8 +28793,8 @@ } }, { - "accuracy": 0.9926821021945216, - "total_bits": 5824164608, + "accuracy": 0.9858145917096408, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -28836,8 +28836,8 @@ } }, { - "accuracy": 0.993338937551016, - "total_bits": 5910044672, + "accuracy": 0.987245007432648, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -28879,8 +28879,8 @@ } }, { - "accuracy": 0.9927881031180732, - "total_bits": 6006579968, + "accuracy": 0.9865464899048675, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -28931,8 +28931,8 @@ } }, { - "accuracy": 0.9937305047933478, - "total_bits": 6122381312, + "accuracy": 0.9882844709354686, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -28983,8 +28983,8 @@ } }, { - "accuracy": 0.9963549681851873, - "total_bits": 7391748864, + "accuracy": 0.9931153879224439, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -29035,8 +29035,8 @@ } }, { - "accuracy": 0.9966320907551562, - "total_bits": 7507550208, + "accuracy": 0.9941280739076319, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -29087,8 +29087,8 @@ } }, { - "accuracy": 0.9978540273878025, - "total_bits": 8550425344, + "accuracy": 0.996097138915502, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -29130,8 +29130,8 @@ } }, { - "accuracy": 0.9979705966761685, - "total_bits": 8877312000, + "accuracy": 0.9964659420293174, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -29179,8 +29179,8 @@ } }, { - "accuracy": 0.9981346285057953, - "total_bits": 9674229760, + "accuracy": 0.9972379904356785, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -29225,8 +29225,8 @@ } }, { - "accuracy": 0.9988347834732849, - "total_bits": 11318396928, + "accuracy": 0.9988783098888234, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -29267,8 +29267,8 @@ ], "model.layers.15.self_attn": [ { - "accuracy": 0.9590973767917603, - "total_bits": 89141248, + "accuracy": 0.9315137156518176, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -29331,8 +29331,8 @@ } }, { - "accuracy": 0.9599927307572216, - "total_bits": 91697152, + "accuracy": 0.9329985165968537, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -29395,8 +29395,8 @@ } }, { - "accuracy": 0.9621528154239058, - "total_bits": 95234560, + "accuracy": 0.9391444061184302, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -29459,8 +29459,8 @@ } }, { - "accuracy": 0.967536962358281, - "total_bits": 111748096, + "accuracy": 0.948023259465117, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -29523,8 +29523,8 @@ } }, { - "accuracy": 0.980201457045041, - "total_bits": 132388864, + "accuracy": 0.9637441590311937, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -29587,8 +29587,8 @@ } }, { - "accuracy": 0.9808398804161698, - "total_bits": 132455936, + "accuracy": 0.9653778614592738, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -29651,8 +29651,8 @@ } }, { - "accuracy": 0.9851523487595841, - "total_bits": 169089024, + "accuracy": 0.9709783159196377, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -29703,8 +29703,8 @@ } }, { - "accuracy": 0.9858836574712768, - "total_bits": 169221632, + "accuracy": 0.972968077956466, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -29755,8 +29755,8 @@ } }, { - "accuracy": 0.9872574509936385, - "total_bits": 170671104, + "accuracy": 0.9761303568084259, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -29807,8 +29807,8 @@ } }, { - "accuracy": 0.987837110820692, - "total_bits": 173039616, + "accuracy": 0.9743171202426311, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -29859,8 +29859,8 @@ } }, { - "accuracy": 0.989590612181928, - "total_bits": 174398976, + "accuracy": 0.9815413581090979, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -29923,8 +29923,8 @@ } }, { - "accuracy": 0.9897958611836657, - "total_bits": 175225856, + "accuracy": 0.9833644789323444, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -29987,8 +29987,8 @@ } }, { - "accuracy": 0.990158248343505, - "total_bits": 178728960, + "accuracy": 0.9830658198043238, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -30048,8 +30048,8 @@ } }, { - "accuracy": 0.9905650251312181, - "total_bits": 181067776, + "accuracy": 0.9850413865642622, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -30109,8 +30109,8 @@ } }, { - "accuracy": 0.9948125910595991, - "total_bits": 219944960, + "accuracy": 0.988824480533367, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -30170,8 +30170,8 @@ } }, { - "accuracy": 0.9953576484695077, - "total_bits": 223010816, + "accuracy": 0.9914607986356714, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -30231,8 +30231,8 @@ } }, { - "accuracy": 0.9956032854970545, - "total_bits": 252975104, + "accuracy": 0.9899360687413719, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -30283,8 +30283,8 @@ } }, { - "accuracy": 0.9962465860444354, - "total_bits": 265314304, + "accuracy": 0.9933738394465763, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -30335,8 +30335,8 @@ } }, { - "accuracy": 0.9977214289683616, - "total_bits": 336861184, + "accuracy": 0.9975221217137005, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -30389,8 +30389,8 @@ ], "model.layers.15.block_sparse_moe": [ { - "accuracy": 0.9678959377342835, - "total_bits": 3157926400, + "accuracy": 0.9478678296436556, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -30441,8 +30441,8 @@ } }, { - "accuracy": 0.9690284366952255, - "total_bits": 3268026880, + "accuracy": 0.9495850859675556, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -30493,8 +30493,8 @@ } }, { - "accuracy": 0.9726049138698727, - "total_bits": 3652411392, + "accuracy": 0.9569320738082752, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -30542,8 +30542,8 @@ } }, { - "accuracy": 0.9734748881310225, - "total_bits": 4098056192, + "accuracy": 0.9594856215408072, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -30591,8 +30591,8 @@ } }, { - "accuracy": 0.984790644957684, - "total_bits": 4621411072, + "accuracy": 0.97352615967975, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -30643,8 +30643,8 @@ } }, { - "accuracy": 0.9860330135561526, - "total_bits": 4737212416, + "accuracy": 0.9756834985455498, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -30695,8 +30695,8 @@ } }, { - "accuracy": 0.9871029739151709, - "total_bits": 5093868288, + "accuracy": 0.9789416713756509, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -30744,8 +30744,8 @@ } }, { - "accuracy": 0.9922356500755996, - "total_bits": 5824164608, + "accuracy": 0.9857172227057163, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -30787,8 +30787,8 @@ } }, { - "accuracy": 0.992918349133106, - "total_bits": 5910044672, + "accuracy": 0.9871110374951968, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -30830,8 +30830,8 @@ } }, { - "accuracy": 0.9923026785545517, - "total_bits": 6006579968, + "accuracy": 0.9864328918338288, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -30882,8 +30882,8 @@ } }, { - "accuracy": 0.9933102391951252, - "total_bits": 6122381312, + "accuracy": 0.9881537759210914, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -30934,8 +30934,8 @@ } }, { - "accuracy": 0.9961258379626088, - "total_bits": 7391748864, + "accuracy": 0.9930758495247574, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -30986,8 +30986,8 @@ } }, { - "accuracy": 0.9964437023299979, - "total_bits": 7507550208, + "accuracy": 0.9940747889195336, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -31038,8 +31038,8 @@ } }, { - "accuracy": 0.99775944029534, - "total_bits": 8550425344, + "accuracy": 0.9960961487595341, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -31081,8 +31081,8 @@ } }, { - "accuracy": 0.9978638121319818, - "total_bits": 8877312000, + "accuracy": 0.9964593769218482, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -31130,8 +31130,8 @@ } }, { - "accuracy": 0.9980280658637639, - "total_bits": 9674229760, + "accuracy": 0.9971955397631973, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -31176,8 +31176,8 @@ } }, { - "accuracy": 0.9988173715973971, - "total_bits": 11318396928, + "accuracy": 0.9989019536442356, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -31218,8 +31218,8 @@ ], "model.layers.16.self_attn": [ { - "accuracy": 0.959370452677831, - "total_bits": 89141248, + "accuracy": 0.9397711423225701, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -31282,8 +31282,8 @@ } }, { - "accuracy": 0.9603458575438708, - "total_bits": 91697152, + "accuracy": 0.9413204359007068, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -31346,8 +31346,8 @@ } }, { - "accuracy": 0.9630648691672832, - "total_bits": 95234560, + "accuracy": 0.9457903093425557, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -31410,8 +31410,8 @@ } }, { - "accuracy": 0.9686298384331167, - "total_bits": 111748096, + "accuracy": 0.9541207190486602, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -31474,8 +31474,8 @@ } }, { - "accuracy": 0.9781083543784916, - "total_bits": 132388864, + "accuracy": 0.9638152428669855, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -31538,8 +31538,8 @@ } }, { - "accuracy": 0.9779296274064109, - "total_bits": 132455936, + "accuracy": 0.9640122182318009, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -31602,8 +31602,8 @@ } }, { - "accuracy": 0.9828085996559821, - "total_bits": 169089024, + "accuracy": 0.9704888662090525, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -31654,8 +31654,8 @@ } }, { - "accuracy": 0.982520624413155, - "total_bits": 169221632, + "accuracy": 0.9706140501948539, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -31706,8 +31706,8 @@ } }, { - "accuracy": 0.9854462618823163, - "total_bits": 170671104, + "accuracy": 0.9779586265503895, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -31758,8 +31758,8 @@ } }, { - "accuracy": 0.9857024035300128, - "total_bits": 173039616, + "accuracy": 0.9775831491569988, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -31810,8 +31810,8 @@ } }, { - "accuracy": 0.9897344874334522, - "total_bits": 174398976, + "accuracy": 0.9842410907440353, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -31874,8 +31874,8 @@ } }, { - "accuracy": 0.9904425577260554, - "total_bits": 175225856, + "accuracy": 0.9858191250095842, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -31938,8 +31938,8 @@ } }, { - "accuracy": 0.990795384044759, - "total_bits": 178728960, + "accuracy": 0.9862943066982552, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -31999,8 +31999,8 @@ } }, { - "accuracy": 0.9913682635815348, - "total_bits": 181067776, + "accuracy": 0.987902037857566, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -32060,8 +32060,8 @@ } }, { - "accuracy": 0.9937779428146314, - "total_bits": 219944960, + "accuracy": 0.991395413322607, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -32121,8 +32121,8 @@ } }, { - "accuracy": 0.9938872661732603, - "total_bits": 223010816, + "accuracy": 0.993933234240103, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -32182,8 +32182,8 @@ } }, { - "accuracy": 0.9944491329661105, - "total_bits": 252975104, + "accuracy": 0.992712245308212, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -32234,8 +32234,8 @@ } }, { - "accuracy": 0.9945927519293036, - "total_bits": 265314304, + "accuracy": 0.9958388847080641, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -32286,8 +32286,8 @@ } }, { - "accuracy": 0.9968691545363981, - "total_bits": 336861184, + "accuracy": 0.9980905061456724, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -32340,8 +32340,8 @@ ], "model.layers.16.block_sparse_moe": [ { - "accuracy": 0.9670369853265584, - "total_bits": 3157926400, + "accuracy": 0.9493119687540457, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -32392,8 +32392,8 @@ } }, { - "accuracy": 0.9681932952953503, - "total_bits": 3268026880, + "accuracy": 0.9509472267236561, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -32444,8 +32444,8 @@ } }, { - "accuracy": 0.9717505836160854, - "total_bits": 3652411392, + "accuracy": 0.9579482565168291, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -32493,8 +32493,8 @@ } }, { - "accuracy": 0.9726418619975448, - "total_bits": 4098056192, + "accuracy": 0.9604926454485394, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -32542,8 +32542,8 @@ } }, { - "accuracy": 0.9843301583314314, - "total_bits": 4621411072, + "accuracy": 0.9742433395877015, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -32594,8 +32594,8 @@ } }, { - "accuracy": 0.9856621689395979, - "total_bits": 4737212416, + "accuracy": 0.9763855305500329, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -32646,8 +32646,8 @@ } }, { - "accuracy": 0.9867399072390981, - "total_bits": 5093868288, + "accuracy": 0.9795200815133285, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -32695,8 +32695,8 @@ } }, { - "accuracy": 0.9919831329316366, - "total_bits": 5824164608, + "accuracy": 0.9860216007364215, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -32738,8 +32738,8 @@ } }, { - "accuracy": 0.9926942909660283, - "total_bits": 5910044672, + "accuracy": 0.9874117685976671, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -32781,8 +32781,8 @@ } }, { - "accuracy": 0.992066587874433, - "total_bits": 6006579968, + "accuracy": 0.9867868451110553, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -32833,8 +32833,8 @@ } }, { - "accuracy": 0.9931204621680081, - "total_bits": 6122381312, + "accuracy": 0.9884927677485393, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -32885,8 +32885,8 @@ } }, { - "accuracy": 0.9959960502601461, - "total_bits": 7391748864, + "accuracy": 0.9932484008822939, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -32937,8 +32937,8 @@ } }, { - "accuracy": 0.9963182286010124, - "total_bits": 7507550208, + "accuracy": 0.9942399785795715, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -32989,8 +32989,8 @@ } }, { - "accuracy": 0.9976596086489735, - "total_bits": 8550425344, + "accuracy": 0.9961670425982447, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -33032,8 +33032,8 @@ } }, { - "accuracy": 0.9977803544097696, - "total_bits": 8877312000, + "accuracy": 0.9965491881121125, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -33081,8 +33081,8 @@ } }, { - "accuracy": 0.9979445637436584, - "total_bits": 9674229760, + "accuracy": 0.9972608376228891, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -33127,8 +33127,8 @@ } }, { - "accuracy": 0.9987378744554007, - "total_bits": 11318396928, + "accuracy": 0.9989062223030487, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -33169,8 +33169,8 @@ ], "model.layers.17.self_attn": [ { - "accuracy": 0.9640702531905845, - "total_bits": 89141248, + "accuracy": 0.9441127792233601, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -33233,8 +33233,8 @@ } }, { - "accuracy": 0.9653855575015768, - "total_bits": 91697152, + "accuracy": 0.9459331755060703, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -33297,8 +33297,8 @@ } }, { - "accuracy": 0.9677549366606399, - "total_bits": 95234560, + "accuracy": 0.9503993256948888, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -33361,8 +33361,8 @@ } }, { - "accuracy": 0.9745324800023809, - "total_bits": 111748096, + "accuracy": 0.9591194891836494, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -33425,8 +33425,8 @@ } }, { - "accuracy": 0.9807347662281245, - "total_bits": 132388864, + "accuracy": 0.9686797726317309, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -33489,8 +33489,8 @@ } }, { - "accuracy": 0.9810525577049702, - "total_bits": 132455936, + "accuracy": 0.9708043848804664, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -33553,8 +33553,8 @@ } }, { - "accuracy": 0.9854455384192988, - "total_bits": 169089024, + "accuracy": 0.9748455978115089, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -33605,8 +33605,8 @@ } }, { - "accuracy": 0.9857985890703276, - "total_bits": 169221632, + "accuracy": 0.9774760155705735, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -33657,8 +33657,8 @@ } }, { - "accuracy": 0.9872057716711424, - "total_bits": 170671104, + "accuracy": 0.9809882376866881, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -33709,8 +33709,8 @@ } }, { - "accuracy": 0.9874689207063057, - "total_bits": 173039616, + "accuracy": 0.9769386594125535, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -33761,8 +33761,8 @@ } }, { - "accuracy": 0.9895567100029439, - "total_bits": 174398976, + "accuracy": 0.9848150178877404, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -33825,8 +33825,8 @@ } }, { - "accuracy": 0.9903636685921811, - "total_bits": 175225856, + "accuracy": 0.9864302433707053, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -33889,8 +33889,8 @@ } }, { - "accuracy": 0.9904211115208454, - "total_bits": 178728960, + "accuracy": 0.9860561677924125, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -33950,8 +33950,8 @@ } }, { - "accuracy": 0.9910918730311096, - "total_bits": 181067776, + "accuracy": 0.9885645840695361, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -34011,8 +34011,8 @@ } }, { - "accuracy": 0.9934179077972658, - "total_bits": 219944960, + "accuracy": 0.9910164866159903, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -34072,8 +34072,8 @@ } }, { - "accuracy": 0.9937908199499361, - "total_bits": 223010816, + "accuracy": 0.9939018072909676, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -34133,8 +34133,8 @@ } }, { - "accuracy": 0.9940386263479013, - "total_bits": 252975104, + "accuracy": 0.9921027920863708, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -34185,8 +34185,8 @@ } }, { - "accuracy": 0.994434541236842, - "total_bits": 265314304, + "accuracy": 0.995693151067826, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -34237,8 +34237,8 @@ } }, { - "accuracy": 0.9942275465873536, - "total_bits": 336861184, + "accuracy": 0.9979479199464549, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -34291,8 +34291,8 @@ ], "model.layers.17.block_sparse_moe": [ { - "accuracy": 0.9619532555807382, - "total_bits": 3157926400, + "accuracy": 0.9433453799574636, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -34343,8 +34343,8 @@ } }, { - "accuracy": 0.9632865062449127, - "total_bits": 3268026880, + "accuracy": 0.9451717849588022, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -34395,8 +34395,8 @@ } }, { - "accuracy": 0.9673623585840687, - "total_bits": 3652411392, + "accuracy": 0.9531102606561035, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -34444,8 +34444,8 @@ } }, { - "accuracy": 0.9684132559923455, - "total_bits": 4098056192, + "accuracy": 0.9561459923861548, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -34493,8 +34493,8 @@ } }, { - "accuracy": 0.9819445156026632, - "total_bits": 4621411072, + "accuracy": 0.9710059444187209, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -34545,8 +34545,8 @@ } }, { - "accuracy": 0.9834352825419046, - "total_bits": 4737212416, + "accuracy": 0.9733943465107586, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -34597,8 +34597,8 @@ } }, { - "accuracy": 0.9847124894731678, - "total_bits": 5093868288, + "accuracy": 0.9771110385772772, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -34646,8 +34646,8 @@ } }, { - "accuracy": 0.9907269149553031, - "total_bits": 5824164608, + "accuracy": 0.9841811259539099, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -34689,8 +34689,8 @@ } }, { - "accuracy": 0.9915567447023932, - "total_bits": 5910044672, + "accuracy": 0.9858026889560279, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -34732,8 +34732,8 @@ } }, { - "accuracy": 0.9908680923981592, - "total_bits": 6006579968, + "accuracy": 0.985133044217946, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -34784,8 +34784,8 @@ } }, { - "accuracy": 0.9920561438775621, - "total_bits": 6122381312, + "accuracy": 0.9870425224653445, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -34836,8 +34836,8 @@ } }, { - "accuracy": 0.9953877500374801, - "total_bits": 7391748864, + "accuracy": 0.9924097861949122, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -34888,8 +34888,8 @@ } }, { - "accuracy": 0.9957579407491721, - "total_bits": 7507550208, + "accuracy": 0.9935185532667674, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -34940,8 +34940,8 @@ } }, { - "accuracy": 0.997298397240229, - "total_bits": 8550425344, + "accuracy": 0.9956838821562997, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -34983,8 +34983,8 @@ } }, { - "accuracy": 0.9974382257641992, - "total_bits": 8877312000, + "accuracy": 0.9961252271314152, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -35032,8 +35032,8 @@ } }, { - "accuracy": 0.9976339394488605, - "total_bits": 9674229760, + "accuracy": 0.9969792739866534, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -35078,8 +35078,8 @@ } }, { - "accuracy": 0.9985394349350827, - "total_bits": 11318396928, + "accuracy": 0.998768920704606, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -35120,8 +35120,8 @@ ], "model.layers.18.self_attn": [ { - "accuracy": 0.9619958242401481, - "total_bits": 89141248, + "accuracy": 0.944100035878364, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -35184,8 +35184,8 @@ } }, { - "accuracy": 0.9636114530730993, - "total_bits": 91697152, + "accuracy": 0.9462692486122251, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -35248,8 +35248,8 @@ } }, { - "accuracy": 0.9660536989104003, - "total_bits": 95234560, + "accuracy": 0.9492746348842047, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -35312,8 +35312,8 @@ } }, { - "accuracy": 0.9713986091082916, - "total_bits": 111748096, + "accuracy": 0.9558806358254515, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -35376,8 +35376,8 @@ } }, { - "accuracy": 0.9802581529365852, - "total_bits": 132388864, + "accuracy": 0.964816223597154, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -35440,8 +35440,8 @@ } }, { - "accuracy": 0.980929841985926, - "total_bits": 132455936, + "accuracy": 0.9708674799767323, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -35504,8 +35504,8 @@ } }, { - "accuracy": 0.9848515004268847, - "total_bits": 169089024, + "accuracy": 0.9695597412064672, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -35556,8 +35556,8 @@ } }, { - "accuracy": 0.9857066384865902, - "total_bits": 169221632, + "accuracy": 0.9768148829753045, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -35608,8 +35608,8 @@ } }, { - "accuracy": 0.9871395940426737, - "total_bits": 170671104, + "accuracy": 0.9817102105880622, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -35660,8 +35660,8 @@ } }, { - "accuracy": 0.987566783616785, - "total_bits": 173039616, + "accuracy": 0.98296342206595, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -35712,8 +35712,8 @@ } }, { - "accuracy": 0.989837403758429, - "total_bits": 174398976, + "accuracy": 0.9855629270023201, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -35776,8 +35776,8 @@ } }, { - "accuracy": 0.9906529374420643, - "total_bits": 175225856, + "accuracy": 0.9868671474832809, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -35840,8 +35840,8 @@ } }, { - "accuracy": 0.9906610392499715, - "total_bits": 178728960, + "accuracy": 0.9864950178307481, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -35901,8 +35901,8 @@ } }, { - "accuracy": 0.9916324679215904, - "total_bits": 181067776, + "accuracy": 0.9878727095638169, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -35962,8 +35962,8 @@ } }, { - "accuracy": 0.9936991619761102, - "total_bits": 219944960, + "accuracy": 0.9910703614878003, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -36023,8 +36023,8 @@ } }, { - "accuracy": 0.9946673862286843, - "total_bits": 223010816, + "accuracy": 0.9943521275708918, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -36084,8 +36084,8 @@ } }, { - "accuracy": 0.9942939005850349, - "total_bits": 252975104, + "accuracy": 0.9919499395182356, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -36136,8 +36136,8 @@ } }, { - "accuracy": 0.995337316126097, - "total_bits": 265314304, + "accuracy": 0.996120992465876, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -36188,8 +36188,8 @@ } }, { - "accuracy": 0.9961533050664002, - "total_bits": 336861184, + "accuracy": 0.997838843450154, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -36242,8 +36242,8 @@ ], "model.layers.18.block_sparse_moe": [ { - "accuracy": 0.9608455330599099, - "total_bits": 3157926400, + "accuracy": 0.9435433966573328, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -36294,8 +36294,8 @@ } }, { - "accuracy": 0.962269222130999, - "total_bits": 3268026880, + "accuracy": 0.9453884628601372, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -36346,8 +36346,8 @@ } }, { - "accuracy": 0.9661699881544337, - "total_bits": 3652411392, + "accuracy": 0.9531949894153513, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -36395,8 +36395,8 @@ } }, { - "accuracy": 0.9671995616517961, - "total_bits": 4098056192, + "accuracy": 0.9562870243680663, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -36444,8 +36444,8 @@ } }, { - "accuracy": 0.9813781370176002, - "total_bits": 4621411072, + "accuracy": 0.9710167328012176, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -36496,8 +36496,8 @@ } }, { - "accuracy": 0.9829166517592967, - "total_bits": 4737212416, + "accuracy": 0.9734021785843652, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -36548,8 +36548,8 @@ } }, { - "accuracy": 0.9841475901775993, - "total_bits": 5093868288, + "accuracy": 0.9771070145943668, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -36597,8 +36597,8 @@ } }, { - "accuracy": 0.9904084240552038, - "total_bits": 5824164608, + "accuracy": 0.9841118264157558, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -36640,8 +36640,8 @@ } }, { - "accuracy": 0.9912707750336267, - "total_bits": 5910044672, + "accuracy": 0.985742881661281, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -36683,8 +36683,8 @@ } }, { - "accuracy": 0.9905794380465522, - "total_bits": 6006579968, + "accuracy": 0.9851467484404566, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -36735,8 +36735,8 @@ } }, { - "accuracy": 0.9918109069403727, - "total_bits": 6122381312, + "accuracy": 0.9870551651983988, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -36787,8 +36787,8 @@ } }, { - "accuracy": 0.9952514531032648, - "total_bits": 7391748864, + "accuracy": 0.992422969553445, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -36839,8 +36839,8 @@ } }, { - "accuracy": 0.9956486481823958, - "total_bits": 7507550208, + "accuracy": 0.993527192309557, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -36891,8 +36891,8 @@ } }, { - "accuracy": 0.9972340460808482, - "total_bits": 8550425344, + "accuracy": 0.9956765620154329, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -36934,8 +36934,8 @@ } }, { - "accuracy": 0.9973869249806739, - "total_bits": 8877312000, + "accuracy": 0.996143257143558, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -36983,8 +36983,8 @@ } }, { - "accuracy": 0.9975777000800008, - "total_bits": 9674229760, + "accuracy": 0.9969888347659435, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -37029,8 +37029,8 @@ } }, { - "accuracy": 0.9985425291088177, - "total_bits": 11318396928, + "accuracy": 0.998764097399544, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -37071,8 +37071,8 @@ ], "model.layers.19.self_attn": [ { - "accuracy": 0.9618697152473032, - "total_bits": 89141248, + "accuracy": 0.9485322367399931, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -37135,8 +37135,8 @@ } }, { - "accuracy": 0.9629219453781843, - "total_bits": 91697152, + "accuracy": 0.9500857577077113, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -37199,8 +37199,8 @@ } }, { - "accuracy": 0.9648101897910237, - "total_bits": 95234560, + "accuracy": 0.9527639365987852, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -37263,8 +37263,8 @@ } }, { - "accuracy": 0.9697224091505632, - "total_bits": 111748096, + "accuracy": 0.9597272773971781, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -37327,8 +37327,8 @@ } }, { - "accuracy": 0.9802168817259371, - "total_bits": 132388864, + "accuracy": 0.9724234773311764, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -37391,8 +37391,8 @@ } }, { - "accuracy": 0.9805285421898589, - "total_bits": 132455936, + "accuracy": 0.9728889220277779, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -37455,8 +37455,8 @@ } }, { - "accuracy": 0.9848017314798199, - "total_bits": 169089024, + "accuracy": 0.9783574150351342, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -37507,8 +37507,8 @@ } }, { - "accuracy": 0.9851405883091502, - "total_bits": 169221632, + "accuracy": 0.9789633155451156, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -37559,8 +37559,8 @@ } }, { - "accuracy": 0.9869623391423374, - "total_bits": 170671104, + "accuracy": 0.9831256423785817, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -37611,8 +37611,8 @@ } }, { - "accuracy": 0.9874746386776678, - "total_bits": 173039616, + "accuracy": 0.9846008321765112, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -37663,8 +37663,8 @@ } }, { - "accuracy": 0.9902264169068076, - "total_bits": 174398976, + "accuracy": 0.9853332217608113, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -37727,8 +37727,8 @@ } }, { - "accuracy": 0.9907891191542149, - "total_bits": 175225856, + "accuracy": 0.986055432484136, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -37791,8 +37791,8 @@ } }, { - "accuracy": 0.991070969204884, - "total_bits": 178728960, + "accuracy": 0.986793791555101, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -37852,8 +37852,8 @@ } }, { - "accuracy": 0.9915635046490934, - "total_bits": 181067776, + "accuracy": 0.9884449641394895, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -37913,8 +37913,8 @@ } }, { - "accuracy": 0.9941413525375538, - "total_bits": 219944960, + "accuracy": 0.9924045145962737, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -37974,8 +37974,8 @@ } }, { - "accuracy": 0.9945205125550274, - "total_bits": 223010816, + "accuracy": 0.994551501236856, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -38035,8 +38035,8 @@ } }, { - "accuracy": 0.9948092977574561, - "total_bits": 252975104, + "accuracy": 0.9934489832521649, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -38087,8 +38087,8 @@ } }, { - "accuracy": 0.9951526508666575, - "total_bits": 265314304, + "accuracy": 0.9961645602088538, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -38139,8 +38139,8 @@ } }, { - "accuracy": 0.9965047651348868, - "total_bits": 336861184, + "accuracy": 0.9983886633017391, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -38193,8 +38193,8 @@ ], "model.layers.19.block_sparse_moe": [ { - "accuracy": 0.9605060436297208, - "total_bits": 3157926400, + "accuracy": 0.9458390633226372, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -38245,8 +38245,8 @@ } }, { - "accuracy": 0.9619000393431634, - "total_bits": 3268026880, + "accuracy": 0.9476022410672158, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -38297,8 +38297,8 @@ } }, { - "accuracy": 0.965509470552206, - "total_bits": 3652411392, + "accuracy": 0.9545397699112073, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -38346,8 +38346,8 @@ } }, { - "accuracy": 0.9665207583457232, - "total_bits": 4098056192, + "accuracy": 0.9574298035586253, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -38395,8 +38395,8 @@ } }, { - "accuracy": 0.9811376298312098, - "total_bits": 4621411072, + "accuracy": 0.9722289530036505, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -38447,8 +38447,8 @@ } }, { - "accuracy": 0.9826658011879772, - "total_bits": 4737212416, + "accuracy": 0.9744843047810718, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -38499,8 +38499,8 @@ } }, { - "accuracy": 0.9838444605702534, - "total_bits": 5093868288, + "accuracy": 0.9778556946548633, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -38548,8 +38548,8 @@ } }, { - "accuracy": 0.9902370001072995, - "total_bits": 5824164608, + "accuracy": 0.9847209021099843, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -38591,8 +38591,8 @@ } }, { - "accuracy": 0.9911079448647797, - "total_bits": 5910044672, + "accuracy": 0.9862794500950258, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -38634,8 +38634,8 @@ } }, { - "accuracy": 0.9904438515077345, - "total_bits": 6006579968, + "accuracy": 0.9857712129014544, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -38686,8 +38686,8 @@ } }, { - "accuracy": 0.9916651400271803, - "total_bits": 6122381312, + "accuracy": 0.9875724982994143, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -38738,8 +38738,8 @@ } }, { - "accuracy": 0.995155921584228, - "total_bits": 7391748864, + "accuracy": 0.9927372878082679, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -38790,8 +38790,8 @@ } }, { - "accuracy": 0.9955167253501713, - "total_bits": 7507550208, + "accuracy": 0.9937820014092722, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -38842,8 +38842,8 @@ } }, { - "accuracy": 0.9971365985838929, - "total_bits": 8550425344, + "accuracy": 0.9958304779102036, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -38885,8 +38885,8 @@ } }, { - "accuracy": 0.9973052763089072, - "total_bits": 8877312000, + "accuracy": 0.9962991468200926, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -38934,8 +38934,8 @@ } }, { - "accuracy": 0.997486461448716, - "total_bits": 9674229760, + "accuracy": 0.9970697336102603, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -38980,8 +38980,8 @@ } }, { - "accuracy": 0.9984404019560316, - "total_bits": 11318396928, + "accuracy": 0.9987910888248734, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -39022,8 +39022,8 @@ ], "model.layers.20.self_attn": [ { - "accuracy": 0.9680223814211786, - "total_bits": 89141248, + "accuracy": 0.9571388598415069, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -39086,8 +39086,8 @@ } }, { - "accuracy": 0.968970934394747, - "total_bits": 91697152, + "accuracy": 0.9585761747439392, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -39150,8 +39150,8 @@ } }, { - "accuracy": 0.97148372605443, - "total_bits": 95234560, + "accuracy": 0.9620799219119363, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -39214,8 +39214,8 @@ } }, { - "accuracy": 0.976756701245904, - "total_bits": 111748096, + "accuracy": 0.968682430771878, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -39278,8 +39278,8 @@ } }, { - "accuracy": 0.9834967607166618, - "total_bits": 132388864, + "accuracy": 0.9772915548819583, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -39342,8 +39342,8 @@ } }, { - "accuracy": 0.983747111982666, - "total_bits": 132455936, + "accuracy": 0.9779594209976494, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -39406,8 +39406,8 @@ } }, { - "accuracy": 0.9878225040156394, - "total_bits": 169089024, + "accuracy": 0.9827937356458278, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -39458,8 +39458,8 @@ } }, { - "accuracy": 0.9881299724802375, - "total_bits": 169221632, + "accuracy": 0.9834765909618, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -39510,8 +39510,8 @@ } }, { - "accuracy": 0.9905845007160679, - "total_bits": 170671104, + "accuracy": 0.9875709253101377, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -39562,8 +39562,8 @@ } }, { - "accuracy": 0.9911281993845478, - "total_bits": 173039616, + "accuracy": 0.9879511782346526, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -39614,8 +39614,8 @@ } }, { - "accuracy": 0.9918190269963816, - "total_bits": 174398976, + "accuracy": 0.9893064059870085, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -39678,8 +39678,8 @@ } }, { - "accuracy": 0.9924588934518397, - "total_bits": 175225856, + "accuracy": 0.9898616429418325, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -39742,8 +39742,8 @@ } }, { - "accuracy": 0.992661271680845, - "total_bits": 178728960, + "accuracy": 0.9901805944682565, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -39803,8 +39803,8 @@ } }, { - "accuracy": 0.9933676616929006, - "total_bits": 181067776, + "accuracy": 0.9903349073138088, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -39864,8 +39864,8 @@ } }, { - "accuracy": 0.9942209921136964, - "total_bits": 219944960, + "accuracy": 0.9940110469178762, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -39925,8 +39925,8 @@ } }, { - "accuracy": 0.9947423288540449, - "total_bits": 223010816, + "accuracy": 0.9956042855228588, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -39986,8 +39986,8 @@ } }, { - "accuracy": 0.9947923153231386, - "total_bits": 252975104, + "accuracy": 0.9949965392515878, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -40038,8 +40038,8 @@ } }, { - "accuracy": 0.9953411461610813, - "total_bits": 265314304, + "accuracy": 0.997376124960283, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -40090,8 +40090,8 @@ } }, { - "accuracy": 0.9952344309131149, - "total_bits": 336861184, + "accuracy": 0.9986645225562825, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -40144,8 +40144,8 @@ ], "model.layers.20.block_sparse_moe": [ { - "accuracy": 0.962718888418749, - "total_bits": 3157926400, + "accuracy": 0.951097471290268, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -40196,8 +40196,8 @@ } }, { - "accuracy": 0.9639781415462494, - "total_bits": 3268026880, + "accuracy": 0.9526104652322829, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -40248,8 +40248,8 @@ } }, { - "accuracy": 0.9670027187094092, - "total_bits": 3652411392, + "accuracy": 0.9582708954694681, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -40297,8 +40297,8 @@ } }, { - "accuracy": 0.967860420467332, - "total_bits": 4098056192, + "accuracy": 0.9606669471831992, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -40346,8 +40346,8 @@ } }, { - "accuracy": 0.982133450685069, - "total_bits": 4621411072, + "accuracy": 0.9749162701191381, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -40398,8 +40398,8 @@ } }, { - "accuracy": 0.9835580709041096, - "total_bits": 4737212416, + "accuracy": 0.9769000987289473, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -40450,8 +40450,8 @@ } }, { - "accuracy": 0.9845191793865524, - "total_bits": 5093868288, + "accuracy": 0.9796456600306556, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -40499,8 +40499,8 @@ } }, { - "accuracy": 0.9908427226473577, - "total_bits": 5824164608, + "accuracy": 0.98632077210641, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -40542,8 +40542,8 @@ } }, { - "accuracy": 0.991645262867678, - "total_bits": 5910044672, + "accuracy": 0.9876580036216183, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -40585,8 +40585,8 @@ } }, { - "accuracy": 0.9909580756211653, - "total_bits": 6006579968, + "accuracy": 0.9871875237731729, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -40637,8 +40637,8 @@ } }, { - "accuracy": 0.9921257598907687, - "total_bits": 6122381312, + "accuracy": 0.9887832585664, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -40689,8 +40689,8 @@ } }, { - "accuracy": 0.9954459748696536, - "total_bits": 7391748864, + "accuracy": 0.9934780390904052, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -40741,8 +40741,8 @@ } }, { - "accuracy": 0.9958449788537109, - "total_bits": 7507550208, + "accuracy": 0.9943943501421018, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -40793,8 +40793,8 @@ } }, { - "accuracy": 0.9973745016613975, - "total_bits": 8550425344, + "accuracy": 0.9962841690066853, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -40836,8 +40836,8 @@ } }, { - "accuracy": 0.9975133398693288, - "total_bits": 8877312000, + "accuracy": 0.9966729487059638, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -40885,8 +40885,8 @@ } }, { - "accuracy": 0.9976645504211774, - "total_bits": 9674229760, + "accuracy": 0.9972836173401447, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -40931,8 +40931,8 @@ } }, { - "accuracy": 0.9986586710365373, - "total_bits": 11318396928, + "accuracy": 0.9989181938863112, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -40973,8 +40973,8 @@ ], "model.layers.21.self_attn": [ { - "accuracy": 0.973712076433003, - "total_bits": 89141248, + "accuracy": 0.9614716713549569, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -41037,8 +41037,8 @@ } }, { - "accuracy": 0.9748337833443657, - "total_bits": 91697152, + "accuracy": 0.963086214964278, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -41101,8 +41101,8 @@ } }, { - "accuracy": 0.976894712774083, - "total_bits": 95234560, + "accuracy": 0.9637787014944479, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -41165,8 +41165,8 @@ } }, { - "accuracy": 0.9810931807151064, - "total_bits": 111748096, + "accuracy": 0.9681923598400317, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -41229,8 +41229,8 @@ } }, { - "accuracy": 0.9860269532655366, - "total_bits": 132388864, + "accuracy": 0.9816436593537219, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -41293,8 +41293,8 @@ } }, { - "accuracy": 0.9856525801005773, - "total_bits": 132455936, + "accuracy": 0.9820080667268485, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -41357,8 +41357,8 @@ } }, { - "accuracy": 0.9894815822481178, - "total_bits": 169089024, + "accuracy": 0.9861928703903686, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -41409,8 +41409,8 @@ } }, { - "accuracy": 0.9888410058338195, - "total_bits": 169221632, + "accuracy": 0.9866449932305841, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -41461,8 +41461,8 @@ } }, { - "accuracy": 0.9910960726556368, - "total_bits": 170671104, + "accuracy": 0.9880408434109995, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -41513,8 +41513,8 @@ } }, { - "accuracy": 0.9915940199571196, - "total_bits": 173039616, + "accuracy": 0.9871747837023577, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -41565,8 +41565,8 @@ } }, { - "accuracy": 0.9932493119849823, - "total_bits": 174398976, + "accuracy": 0.9911120076139923, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -41629,8 +41629,8 @@ } }, { - "accuracy": 0.9938530832296237, - "total_bits": 175225856, + "accuracy": 0.9914777223602869, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -41693,8 +41693,8 @@ } }, { - "accuracy": 0.9939716806984507, - "total_bits": 178728960, + "accuracy": 0.9917892163211945, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -41754,8 +41754,8 @@ } }, { - "accuracy": 0.9943377963209059, - "total_bits": 181067776, + "accuracy": 0.9922467299256823, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -41815,8 +41815,8 @@ } }, { - "accuracy": 0.9961623651761329, - "total_bits": 219944960, + "accuracy": 0.9952219360347954, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -41876,8 +41876,8 @@ } }, { - "accuracy": 0.9966895936377114, - "total_bits": 223010816, + "accuracy": 0.9961221647790808, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -41937,8 +41937,8 @@ } }, { - "accuracy": 0.9966860678687226, - "total_bits": 252975104, + "accuracy": 0.996071398643835, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -41989,8 +41989,8 @@ } }, { - "accuracy": 0.9974010805162834, - "total_bits": 265314304, + "accuracy": 0.99745149084265, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -42041,8 +42041,8 @@ } }, { - "accuracy": 0.9981015480734641, - "total_bits": 336861184, + "accuracy": 0.9989069738749095, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -42095,8 +42095,8 @@ ], "model.layers.21.block_sparse_moe": [ { - "accuracy": 0.9679888725513592, - "total_bits": 3157926400, + "accuracy": 0.9585679813171737, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -42147,8 +42147,8 @@ } }, { - "accuracy": 0.9690974837867543, - "total_bits": 3268026880, + "accuracy": 0.9598965388722718, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -42199,8 +42199,8 @@ } }, { - "accuracy": 0.971617178991437, - "total_bits": 3652411392, + "accuracy": 0.9643607013276778, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -42248,8 +42248,8 @@ } }, { - "accuracy": 0.9723064342979342, - "total_bits": 4098056192, + "accuracy": 0.9662402466055937, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -42297,8 +42297,8 @@ } }, { - "accuracy": 0.9846780726802535, - "total_bits": 4621411072, + "accuracy": 0.9788769351725932, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -42349,8 +42349,8 @@ } }, { - "accuracy": 0.9859261962701567, - "total_bits": 4737212416, + "accuracy": 0.9805424835940357, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -42401,8 +42401,8 @@ } }, { - "accuracy": 0.9866656510275789, - "total_bits": 5093868288, + "accuracy": 0.9826520873466507, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -42450,8 +42450,8 @@ } }, { - "accuracy": 0.9921755813411437, - "total_bits": 5824164608, + "accuracy": 0.988494412144064, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -42493,8 +42493,8 @@ } }, { - "accuracy": 0.9928646004409529, - "total_bits": 5910044672, + "accuracy": 0.9895966727781342, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -42536,8 +42536,8 @@ } }, { - "accuracy": 0.9922199362481479, - "total_bits": 6006579968, + "accuracy": 0.9891919067449635, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -42588,8 +42588,8 @@ } }, { - "accuracy": 0.993255367007805, - "total_bits": 6122381312, + "accuracy": 0.9905454216204816, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -42640,8 +42640,8 @@ } }, { - "accuracy": 0.9960741178074386, - "total_bits": 7391748864, + "accuracy": 0.9944881126721157, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -42692,8 +42692,8 @@ } }, { - "accuracy": 0.9964278877305333, - "total_bits": 7507550208, + "accuracy": 0.9952716810366837, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -42744,8 +42744,8 @@ } }, { - "accuracy": 0.9977186070464086, - "total_bits": 8550425344, + "accuracy": 0.9968497314948763, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -42787,8 +42787,8 @@ } }, { - "accuracy": 0.997828164807288, - "total_bits": 8877312000, + "accuracy": 0.997183458886866, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -42836,8 +42836,8 @@ } }, { - "accuracy": 0.9979401064119884, - "total_bits": 9674229760, + "accuracy": 0.9976429737362196, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -42882,8 +42882,8 @@ } }, { - "accuracy": 0.9987843541239272, - "total_bits": 11318396928, + "accuracy": 0.9990675145027126, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -42924,8 +42924,8 @@ ], "model.layers.22.self_attn": [ { - "accuracy": 0.9748783389804885, - "total_bits": 89141248, + "accuracy": 0.9658556540380232, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -42988,8 +42988,8 @@ } }, { - "accuracy": 0.9758972082054242, - "total_bits": 91697152, + "accuracy": 0.9671570854843594, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -43052,8 +43052,8 @@ } }, { - "accuracy": 0.9775230638915673, - "total_bits": 95234560, + "accuracy": 0.9696609929087572, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -43116,8 +43116,8 @@ } }, { - "accuracy": 0.9811748015927151, - "total_bits": 111748096, + "accuracy": 0.9741346929222345, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -43180,8 +43180,8 @@ } }, { - "accuracy": 0.9868949815281667, - "total_bits": 132388864, + "accuracy": 0.9828697993652895, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -43244,8 +43244,8 @@ } }, { - "accuracy": 0.9874337196815759, - "total_bits": 132455936, + "accuracy": 0.9834088937786873, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -43308,8 +43308,8 @@ } }, { - "accuracy": 0.9895889365579933, - "total_bits": 169089024, + "accuracy": 0.9865410050697392, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -43360,8 +43360,8 @@ } }, { - "accuracy": 0.9901814742479473, - "total_bits": 169221632, + "accuracy": 0.987132969967206, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -43412,8 +43412,8 @@ } }, { - "accuracy": 0.9912732605007477, - "total_bits": 170671104, + "accuracy": 0.9894154179055477, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -43464,8 +43464,8 @@ } }, { - "accuracy": 0.991943085740786, - "total_bits": 173039616, + "accuracy": 0.9899448634096188, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -43516,8 +43516,8 @@ } }, { - "accuracy": 0.9936841668677516, - "total_bits": 174398976, + "accuracy": 0.9917863208102062, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -43580,8 +43580,8 @@ } }, { - "accuracy": 0.9942693437624257, - "total_bits": 175225856, + "accuracy": 0.9924155626795255, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -43644,8 +43644,8 @@ } }, { - "accuracy": 0.994240858592093, - "total_bits": 178728960, + "accuracy": 0.992160980036715, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -43705,8 +43705,8 @@ } }, { - "accuracy": 0.9948541059275158, - "total_bits": 181067776, + "accuracy": 0.9931687896241783, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -43766,8 +43766,8 @@ } }, { - "accuracy": 0.9954292678157799, - "total_bits": 219944960, + "accuracy": 0.9955981879174942, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -43827,8 +43827,8 @@ } }, { - "accuracy": 0.9960733750194777, - "total_bits": 223010816, + "accuracy": 0.9967137904714036, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -43888,8 +43888,8 @@ } }, { - "accuracy": 0.9958125102857593, - "total_bits": 252975104, + "accuracy": 0.9963487146997068, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -43940,8 +43940,8 @@ } }, { - "accuracy": 0.9964599051163532, - "total_bits": 265314304, + "accuracy": 0.9979091617969971, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -43992,8 +43992,8 @@ } }, { - "accuracy": 0.9968976144882618, - "total_bits": 336861184, + "accuracy": 0.9989409253003032, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -44046,8 +44046,8 @@ ], "model.layers.22.block_sparse_moe": [ { - "accuracy": 0.9689381477655843, - "total_bits": 3157926400, + "accuracy": 0.9606032093288377, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -44098,8 +44098,8 @@ } }, { - "accuracy": 0.9699937690747902, - "total_bits": 3268026880, + "accuracy": 0.9618584033451043, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -44150,8 +44150,8 @@ } }, { - "accuracy": 0.9723186002811417, - "total_bits": 3652411392, + "accuracy": 0.9658069988654461, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -44199,8 +44199,8 @@ } }, { - "accuracy": 0.9729601240251213, - "total_bits": 4098056192, + "accuracy": 0.9674937780073378, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -44248,8 +44248,8 @@ } }, { - "accuracy": 0.9850894927512854, - "total_bits": 4621411072, + "accuracy": 0.9798952678684145, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -44300,8 +44300,8 @@ } }, { - "accuracy": 0.9862935291021131, - "total_bits": 4737212416, + "accuracy": 0.9814559902151814, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -44352,8 +44352,8 @@ } }, { - "accuracy": 0.9869725609896705, - "total_bits": 5093868288, + "accuracy": 0.9833232552045956, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -44401,8 +44401,8 @@ } }, { - "accuracy": 0.9924011828843504, - "total_bits": 5824164608, + "accuracy": 0.9890725023724372, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -44444,8 +44444,8 @@ } }, { - "accuracy": 0.9930563777743373, - "total_bits": 5910044672, + "accuracy": 0.9900932982418453, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -44487,8 +44487,8 @@ } }, { - "accuracy": 0.9924545378889889, - "total_bits": 6006579968, + "accuracy": 0.9897368808306055, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -44539,8 +44539,8 @@ } }, { - "accuracy": 0.9934407482505776, - "total_bits": 6122381312, + "accuracy": 0.9909964375401614, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -44591,8 +44591,8 @@ } }, { - "accuracy": 0.9962082956626546, - "total_bits": 7391748864, + "accuracy": 0.9947803463292075, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -44643,8 +44643,8 @@ } }, { - "accuracy": 0.996566523463116, - "total_bits": 7507550208, + "accuracy": 0.9955046800823766, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -44695,8 +44695,8 @@ } }, { - "accuracy": 0.9978337517386535, - "total_bits": 8550425344, + "accuracy": 0.9970297187646793, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -44738,8 +44738,8 @@ } }, { - "accuracy": 0.9979386955164955, - "total_bits": 8877312000, + "accuracy": 0.9973342259181663, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -44787,8 +44787,8 @@ } }, { - "accuracy": 0.9980445033579599, - "total_bits": 9674229760, + "accuracy": 0.9977369857024314, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -44833,8 +44833,8 @@ } }, { - "accuracy": 0.9989151611662237, - "total_bits": 11318396928, + "accuracy": 0.9991186083934736, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -44875,8 +44875,8 @@ ], "model.layers.23.self_attn": [ { - "accuracy": 0.9742569550871849, - "total_bits": 89141248, + "accuracy": 0.9685092685685959, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -44939,8 +44939,8 @@ } }, { - "accuracy": 0.9753613315988332, - "total_bits": 91697152, + "accuracy": 0.9699112821544986, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -45003,8 +45003,8 @@ } }, { - "accuracy": 0.9776590454857796, - "total_bits": 95234560, + "accuracy": 0.9722835852880962, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -45067,8 +45067,8 @@ } }, { - "accuracy": 0.9813938762526959, - "total_bits": 111748096, + "accuracy": 0.9769911869370844, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -45131,8 +45131,8 @@ } }, { - "accuracy": 0.9871659032651223, - "total_bits": 132388864, + "accuracy": 0.9829571839072742, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -45195,8 +45195,8 @@ } }, { - "accuracy": 0.9875873733544722, - "total_bits": 132455936, + "accuracy": 0.983315801320714, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -45259,8 +45259,8 @@ } }, { - "accuracy": 0.9904178305296227, - "total_bits": 169089024, + "accuracy": 0.9868680951476563, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -45311,8 +45311,8 @@ } }, { - "accuracy": 0.9909435475128703, - "total_bits": 169221632, + "accuracy": 0.9872727024485357, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -45363,8 +45363,8 @@ } }, { - "accuracy": 0.9919433863833547, - "total_bits": 170671104, + "accuracy": 0.9891475647891639, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -45415,8 +45415,8 @@ } }, { - "accuracy": 0.992396839952562, - "total_bits": 173039616, + "accuracy": 0.9906569576996844, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -45467,8 +45467,8 @@ } }, { - "accuracy": 0.9935010971094016, - "total_bits": 174398976, + "accuracy": 0.9919595860192203, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -45531,8 +45531,8 @@ } }, { - "accuracy": 0.9940103182743769, - "total_bits": 175225856, + "accuracy": 0.9927812288879068, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -45595,8 +45595,8 @@ } }, { - "accuracy": 0.994139974412974, - "total_bits": 178728960, + "accuracy": 0.9929468157642987, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -45656,8 +45656,8 @@ } }, { - "accuracy": 0.9946768460213207, - "total_bits": 181067776, + "accuracy": 0.9934534622225328, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -45717,8 +45717,8 @@ } }, { - "accuracy": 0.9963386028684909, - "total_bits": 219944960, + "accuracy": 0.9957751470756193, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -45778,8 +45778,8 @@ } }, { - "accuracy": 0.9966852312645642, - "total_bits": 223010816, + "accuracy": 0.9966476757144846, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -45839,8 +45839,8 @@ } }, { - "accuracy": 0.9967921290954109, - "total_bits": 252975104, + "accuracy": 0.9964959309108963, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -45891,8 +45891,8 @@ } }, { - "accuracy": 0.9972438357508508, - "total_bits": 265314304, + "accuracy": 0.9978829896790558, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -45943,8 +45943,8 @@ } }, { - "accuracy": 0.9980888139325543, - "total_bits": 336861184, + "accuracy": 0.9990285984331422, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -45997,8 +45997,8 @@ ], "model.layers.23.block_sparse_moe": [ { - "accuracy": 0.9694182423409075, - "total_bits": 3157926400, + "accuracy": 0.9611967644304968, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -46049,8 +46049,8 @@ } }, { - "accuracy": 0.9704542552353814, - "total_bits": 3268026880, + "accuracy": 0.9624502428341657, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -46101,8 +46101,8 @@ } }, { - "accuracy": 0.9726278051966801, - "total_bits": 3652411392, + "accuracy": 0.9660994482401293, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -46150,8 +46150,8 @@ } }, { - "accuracy": 0.9732641597511247, - "total_bits": 4098056192, + "accuracy": 0.9677195202675648, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -46199,8 +46199,8 @@ } }, { - "accuracy": 0.9852849282906391, - "total_bits": 4621411072, + "accuracy": 0.9801789390621707, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -46251,8 +46251,8 @@ } }, { - "accuracy": 0.9864621026208624, - "total_bits": 4737212416, + "accuracy": 0.981716951195267, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -46303,8 +46303,8 @@ } }, { - "accuracy": 0.9871148550300859, - "total_bits": 5093868288, + "accuracy": 0.9834444262523903, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -46352,8 +46352,8 @@ } }, { - "accuracy": 0.9924563854001462, - "total_bits": 5824164608, + "accuracy": 0.9891805180668598, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -46395,8 +46395,8 @@ } }, { - "accuracy": 0.99311517883325, - "total_bits": 5910044672, + "accuracy": 0.9901989057834726, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -46438,8 +46438,8 @@ } }, { - "accuracy": 0.9925458891375456, - "total_bits": 6006579968, + "accuracy": 0.9898750726424623, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -46490,8 +46490,8 @@ } }, { - "accuracy": 0.9935092757805251, - "total_bits": 6122381312, + "accuracy": 0.9911205033640726, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -46542,8 +46542,8 @@ } }, { - "accuracy": 0.9962445042037871, - "total_bits": 7391748864, + "accuracy": 0.9948472670657793, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -46594,8 +46594,8 @@ } }, { - "accuracy": 0.9965910957253072, - "total_bits": 7507550208, + "accuracy": 0.9955630879703676, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -46646,8 +46646,8 @@ } }, { - "accuracy": 0.9978387226583436, - "total_bits": 8550425344, + "accuracy": 0.997055670570262, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -46689,8 +46689,8 @@ } }, { - "accuracy": 0.9979497042368166, - "total_bits": 8877312000, + "accuracy": 0.9973680251569021, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -46738,8 +46738,8 @@ } }, { - "accuracy": 0.9980517537042033, - "total_bits": 9674229760, + "accuracy": 0.9977425871202286, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -46784,8 +46784,8 @@ } }, { - "accuracy": 0.9988967076315021, - "total_bits": 11318396928, + "accuracy": 0.9991185466215029, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -46826,8 +46826,8 @@ ], "model.layers.24.self_attn": [ { - "accuracy": 0.9740914977155626, - "total_bits": 89141248, + "accuracy": 0.9678602940985002, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -46890,8 +46890,8 @@ } }, { - "accuracy": 0.9748175790300593, - "total_bits": 91697152, + "accuracy": 0.9690271494328044, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -46954,8 +46954,8 @@ } }, { - "accuracy": 0.9771252854261547, - "total_bits": 95234560, + "accuracy": 0.9723096738161985, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -47018,8 +47018,8 @@ } }, { - "accuracy": 0.9803449434693903, - "total_bits": 111748096, + "accuracy": 0.976385011425009, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -47082,8 +47082,8 @@ } }, { - "accuracy": 0.9867288576788269, - "total_bits": 132388864, + "accuracy": 0.9836261290765833, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -47146,8 +47146,8 @@ } }, { - "accuracy": 0.9871685982798226, - "total_bits": 132455936, + "accuracy": 0.9833707114303252, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -47210,8 +47210,8 @@ } }, { - "accuracy": 0.9898095043608919, - "total_bits": 169089024, + "accuracy": 0.9875793449318735, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -47262,8 +47262,8 @@ } }, { - "accuracy": 0.990306748310104, - "total_bits": 169221632, + "accuracy": 0.9874281481752405, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -47314,8 +47314,8 @@ } }, { - "accuracy": 0.9920920132135507, - "total_bits": 170671104, + "accuracy": 0.9896163154189708, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -47366,8 +47366,8 @@ } }, { - "accuracy": 0.9929028827755246, - "total_bits": 173039616, + "accuracy": 0.9913152364970301, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -47418,8 +47418,8 @@ } }, { - "accuracy": 0.9932358057412785, - "total_bits": 174398976, + "accuracy": 0.9919906977956998, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -47482,8 +47482,8 @@ } }, { - "accuracy": 0.9936602996021975, - "total_bits": 175225856, + "accuracy": 0.992558812169591, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -47546,8 +47546,8 @@ } }, { - "accuracy": 0.9940266808262095, - "total_bits": 178728960, + "accuracy": 0.9928934029594529, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -47607,8 +47607,8 @@ } }, { - "accuracy": 0.9952896337781567, - "total_bits": 181067776, + "accuracy": 0.9939502080669627, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -47668,8 +47668,8 @@ } }, { - "accuracy": 0.9951775002118666, - "total_bits": 219944960, + "accuracy": 0.9961327695309592, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -47729,8 +47729,8 @@ } }, { - "accuracy": 0.9970928482507588, - "total_bits": 223010816, + "accuracy": 0.9967975805375318, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -47790,8 +47790,8 @@ } }, { - "accuracy": 0.9954914510308299, - "total_bits": 252975104, + "accuracy": 0.996871327082772, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -47842,8 +47842,8 @@ } }, { - "accuracy": 0.9977710084640421, - "total_bits": 265314304, + "accuracy": 0.9976253974855354, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -47894,8 +47894,8 @@ } }, { - "accuracy": 0.9965078795066802, - "total_bits": 336861184, + "accuracy": 0.9991063233583191, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -47948,8 +47948,8 @@ ], "model.layers.24.block_sparse_moe": [ { - "accuracy": 0.9695710528176278, - "total_bits": 3157926400, + "accuracy": 0.9617564277723432, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -48000,8 +48000,8 @@ } }, { - "accuracy": 0.9706222326494753, - "total_bits": 3268026880, + "accuracy": 0.9630127949058078, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -48052,8 +48052,8 @@ } }, { - "accuracy": 0.9726516192313284, - "total_bits": 3652411392, + "accuracy": 0.9663749410246965, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -48101,8 +48101,8 @@ } }, { - "accuracy": 0.9732508810702711, - "total_bits": 4098056192, + "accuracy": 0.9679116605839226, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -48150,8 +48150,8 @@ } }, { - "accuracy": 0.9853506950312294, - "total_bits": 4621411072, + "accuracy": 0.9805133761256002, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -48202,8 +48202,8 @@ } }, { - "accuracy": 0.9865381749114022, - "total_bits": 4737212416, + "accuracy": 0.9820475122833159, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -48254,8 +48254,8 @@ } }, { - "accuracy": 0.9871378486859612, - "total_bits": 5093868288, + "accuracy": 0.9836307984805899, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -48303,8 +48303,8 @@ } }, { - "accuracy": 0.9924627496220637, - "total_bits": 5824164608, + "accuracy": 0.9893371923681116, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -48346,8 +48346,8 @@ } }, { - "accuracy": 0.9931269118969794, - "total_bits": 5910044672, + "accuracy": 0.9903444714727812, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -48389,8 +48389,8 @@ } }, { - "accuracy": 0.9925702369073406, - "total_bits": 6006579968, + "accuracy": 0.9900465708924457, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -48441,8 +48441,8 @@ } }, { - "accuracy": 0.9935367518046405, - "total_bits": 6122381312, + "accuracy": 0.9912856264927541, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -48493,8 +48493,8 @@ } }, { - "accuracy": 0.9962355686730007, - "total_bits": 7391748864, + "accuracy": 0.9949317736682133, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -48545,8 +48545,8 @@ } }, { - "accuracy": 0.9965827519627055, - "total_bits": 7507550208, + "accuracy": 0.995644605565758, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -48597,8 +48597,8 @@ } }, { - "accuracy": 0.9978012071587727, - "total_bits": 8550425344, + "accuracy": 0.9970862021000357, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -48640,8 +48640,8 @@ } }, { - "accuracy": 0.9979250974211027, - "total_bits": 8877312000, + "accuracy": 0.9974045859380567, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -48689,8 +48689,8 @@ } }, { - "accuracy": 0.9980178959958721, - "total_bits": 9674229760, + "accuracy": 0.9977457999229955, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -48735,8 +48735,8 @@ } }, { - "accuracy": 0.9988403065581224, - "total_bits": 11318396928, + "accuracy": 0.9990999239835219, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -48777,8 +48777,8 @@ ], "model.layers.25.self_attn": [ { - "accuracy": 0.975635024253279, - "total_bits": 89141248, + "accuracy": 0.9708985030010808, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -48841,8 +48841,8 @@ } }, { - "accuracy": 0.9766089981421828, - "total_bits": 91697152, + "accuracy": 0.972206968581304, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -48905,8 +48905,8 @@ } }, { - "accuracy": 0.9786366783082485, - "total_bits": 95234560, + "accuracy": 0.9744501184904948, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -48969,8 +48969,8 @@ } }, { - "accuracy": 0.9832760692224838, - "total_bits": 111748096, + "accuracy": 0.9795461723406333, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -49033,8 +49033,8 @@ } }, { - "accuracy": 0.9874225473613478, - "total_bits": 132388864, + "accuracy": 0.9848889632994542, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -49097,8 +49097,8 @@ } }, { - "accuracy": 0.9877796964719892, - "total_bits": 132455936, + "accuracy": 0.9852601309103193, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -49161,8 +49161,8 @@ } }, { - "accuracy": 0.9909104927210137, - "total_bits": 169089024, + "accuracy": 0.9891446505644126, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -49213,8 +49213,8 @@ } }, { - "accuracy": 0.9914923442993313, - "total_bits": 169221632, + "accuracy": 0.9896625918627251, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -49265,8 +49265,8 @@ } }, { - "accuracy": 0.9919015273044351, - "total_bits": 170671104, + "accuracy": 0.9898654494463699, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -49317,8 +49317,8 @@ } }, { - "accuracy": 0.9921668485330883, - "total_bits": 173039616, + "accuracy": 0.9914886290280265, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -49369,8 +49369,8 @@ } }, { - "accuracy": 0.9937642703298479, - "total_bits": 174398976, + "accuracy": 0.9925200783836772, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -49433,8 +49433,8 @@ } }, { - "accuracy": 0.9942245111160446, - "total_bits": 175225856, + "accuracy": 0.9929078810673673, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -49497,8 +49497,8 @@ } }, { - "accuracy": 0.9944022337440401, - "total_bits": 178728960, + "accuracy": 0.9932044373053941, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -49558,8 +49558,8 @@ } }, { - "accuracy": 0.9948370722704567, - "total_bits": 181067776, + "accuracy": 0.9938755617331481, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -49619,8 +49619,8 @@ } }, { - "accuracy": 0.9967017735325499, - "total_bits": 219944960, + "accuracy": 0.9961820873941178, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -49680,8 +49680,8 @@ } }, { - "accuracy": 0.9969872905348893, - "total_bits": 223010816, + "accuracy": 0.9969081127601385, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -49741,8 +49741,8 @@ } }, { - "accuracy": 0.9972726964188041, - "total_bits": 252975104, + "accuracy": 0.9969699422181293, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -49793,8 +49793,8 @@ } }, { - "accuracy": 0.9975797830702504, - "total_bits": 265314304, + "accuracy": 0.998099936712606, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -49845,8 +49845,8 @@ } }, { - "accuracy": 0.9982512140923063, - "total_bits": 336861184, + "accuracy": 0.9991208893306975, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -49899,8 +49899,8 @@ ], "model.layers.25.block_sparse_moe": [ { - "accuracy": 0.969007869483903, - "total_bits": 3157926400, + "accuracy": 0.9608771585626528, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -49951,8 +49951,8 @@ } }, { - "accuracy": 0.9700913252308965, - "total_bits": 3268026880, + "accuracy": 0.9621818309824448, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -50003,8 +50003,8 @@ } }, { - "accuracy": 0.9721200618660077, - "total_bits": 3652411392, + "accuracy": 0.9656426642613951, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -50052,8 +50052,8 @@ } }, { - "accuracy": 0.9727274455362931, - "total_bits": 4098056192, + "accuracy": 0.9672192028956488, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -50101,8 +50101,8 @@ } }, { - "accuracy": 0.9850613007438369, - "total_bits": 4621411072, + "accuracy": 0.9800265031226445, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -50153,8 +50153,8 @@ } }, { - "accuracy": 0.9863119252840988, - "total_bits": 4737212416, + "accuracy": 0.9816338271775749, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -50205,8 +50205,8 @@ } }, { - "accuracy": 0.9869226065347902, - "total_bits": 5093868288, + "accuracy": 0.9832767940824851, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -50254,8 +50254,8 @@ } }, { - "accuracy": 0.9923209655680694, - "total_bits": 5824164608, + "accuracy": 0.9890697244118201, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -50297,8 +50297,8 @@ } }, { - "accuracy": 0.9930098742770497, - "total_bits": 5910044672, + "accuracy": 0.9901265691005392, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -50340,8 +50340,8 @@ } }, { - "accuracy": 0.9924367456114851, - "total_bits": 6006579968, + "accuracy": 0.989802484458778, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -50392,8 +50392,8 @@ } }, { - "accuracy": 0.9934251722879708, - "total_bits": 6122381312, + "accuracy": 0.9910829363579978, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -50444,8 +50444,8 @@ } }, { - "accuracy": 0.9961792923568282, - "total_bits": 7391748864, + "accuracy": 0.9948068855446763, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -50496,8 +50496,8 @@ } }, { - "accuracy": 0.9965334588196129, - "total_bits": 7507550208, + "accuracy": 0.995543447072123, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -50548,8 +50548,8 @@ } }, { - "accuracy": 0.9977794888545759, - "total_bits": 8550425344, + "accuracy": 0.9970156851813954, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -50591,8 +50591,8 @@ } }, { - "accuracy": 0.997903248113289, - "total_bits": 8877312000, + "accuracy": 0.9973408240184654, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -50640,8 +50640,8 @@ } }, { - "accuracy": 0.9979982225049753, - "total_bits": 9674229760, + "accuracy": 0.9976942881567084, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -50686,8 +50686,8 @@ } }, { - "accuracy": 0.9988456519713509, - "total_bits": 11318396928, + "accuracy": 0.9990787384076611, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -50728,8 +50728,8 @@ ], "model.layers.26.self_attn": [ { - "accuracy": 0.9757101300638169, - "total_bits": 89141248, + "accuracy": 0.9712514829880092, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -50792,8 +50792,8 @@ } }, { - "accuracy": 0.9766908466117457, - "total_bits": 91697152, + "accuracy": 0.9726009742007591, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -50856,8 +50856,8 @@ } }, { - "accuracy": 0.9790366943925619, - "total_bits": 95234560, + "accuracy": 0.9749679694650695, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -50920,8 +50920,8 @@ } }, { - "accuracy": 0.9833608352928422, - "total_bits": 111748096, + "accuracy": 0.9800952247751411, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -50984,8 +50984,8 @@ } }, { - "accuracy": 0.9877208736143075, - "total_bits": 132388864, + "accuracy": 0.9846268180990592, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -51048,8 +51048,8 @@ } }, { - "accuracy": 0.9880152547848411, - "total_bits": 132455936, + "accuracy": 0.9851246081816498, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -51112,8 +51112,8 @@ } }, { - "accuracy": 0.991233472363092, - "total_bits": 169089024, + "accuracy": 0.9886959431751166, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -51164,8 +51164,8 @@ } }, { - "accuracy": 0.9915979636425618, - "total_bits": 169221632, + "accuracy": 0.9893573532026494, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -51216,8 +51216,8 @@ } }, { - "accuracy": 0.9924220569955651, - "total_bits": 170671104, + "accuracy": 0.990293762733927, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -51268,8 +51268,8 @@ } }, { - "accuracy": 0.9933688686869573, - "total_bits": 173039616, + "accuracy": 0.9906472531729378, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -51320,8 +51320,8 @@ } }, { - "accuracy": 0.9937991050246637, - "total_bits": 174398976, + "accuracy": 0.9924779808497988, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -51384,8 +51384,8 @@ } }, { - "accuracy": 0.994322852260666, - "total_bits": 175225856, + "accuracy": 0.993175953910395, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -51448,8 +51448,8 @@ } }, { - "accuracy": 0.994469574856339, - "total_bits": 178728960, + "accuracy": 0.9932232893406763, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -51509,8 +51509,8 @@ } }, { - "accuracy": 0.9952107277349569, - "total_bits": 181067776, + "accuracy": 0.9940604012226686, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -51570,8 +51570,8 @@ } }, { - "accuracy": 0.9964439557370497, - "total_bits": 219944960, + "accuracy": 0.9960990392719395, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -51631,8 +51631,8 @@ } }, { - "accuracy": 0.997316460343427, - "total_bits": 223010816, + "accuracy": 0.9969739962980384, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -51692,8 +51692,8 @@ } }, { - "accuracy": 0.996952786546899, - "total_bits": 252975104, + "accuracy": 0.9968800586575526, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -51744,8 +51744,8 @@ } }, { - "accuracy": 0.9979918636527145, - "total_bits": 265314304, + "accuracy": 0.9980785678053508, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -51796,8 +51796,8 @@ } }, { - "accuracy": 0.9979405645208317, - "total_bits": 336861184, + "accuracy": 0.9991170226767281, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -51850,8 +51850,8 @@ ], "model.layers.26.block_sparse_moe": [ { - "accuracy": 0.9688440517056733, - "total_bits": 3157926400, + "accuracy": 0.9609163351997267, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -51902,8 +51902,8 @@ } }, { - "accuracy": 0.969919809489511, - "total_bits": 3268026880, + "accuracy": 0.9621957196213771, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -51954,8 +51954,8 @@ } }, { - "accuracy": 0.9719193944474682, - "total_bits": 3652411392, + "accuracy": 0.9656279655464459, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -52003,8 +52003,8 @@ } }, { - "accuracy": 0.9725231698248535, - "total_bits": 4098056192, + "accuracy": 0.9671895041828975, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -52052,8 +52052,8 @@ } }, { - "accuracy": 0.9849821657990105, - "total_bits": 4621411072, + "accuracy": 0.9800438512465917, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -52104,8 +52104,8 @@ } }, { - "accuracy": 0.986206456553191, - "total_bits": 4737212416, + "accuracy": 0.9816233195306268, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -52156,8 +52156,8 @@ } }, { - "accuracy": 0.9868080813321285, - "total_bits": 5093868288, + "accuracy": 0.9832675329380436, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -52205,8 +52205,8 @@ } }, { - "accuracy": 0.9922645826591179, - "total_bits": 5824164608, + "accuracy": 0.9890836578706512, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -52248,8 +52248,8 @@ } }, { - "accuracy": 0.9929646781820338, - "total_bits": 5910044672, + "accuracy": 0.9901303600927349, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -52291,8 +52291,8 @@ } }, { - "accuracy": 0.992394350701943, - "total_bits": 6006579968, + "accuracy": 0.9898150818480644, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -52343,8 +52343,8 @@ } }, { - "accuracy": 0.993379261897644, - "total_bits": 6122381312, + "accuracy": 0.9910817972559016, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -52395,8 +52395,8 @@ } }, { - "accuracy": 0.9961563112156, - "total_bits": 7391748864, + "accuracy": 0.9948202593222959, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -52447,8 +52447,8 @@ } }, { - "accuracy": 0.9965067493903916, - "total_bits": 7507550208, + "accuracy": 0.9955441953607078, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -52499,8 +52499,8 @@ } }, { - "accuracy": 0.9977713252083049, - "total_bits": 8550425344, + "accuracy": 0.9970282937174488, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -52542,8 +52542,8 @@ } }, { - "accuracy": 0.9979054052091669, - "total_bits": 8877312000, + "accuracy": 0.9973521569809236, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -52591,8 +52591,8 @@ } }, { - "accuracy": 0.9980004595417995, - "total_bits": 9674229760, + "accuracy": 0.9977095829362952, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -52637,8 +52637,8 @@ } }, { - "accuracy": 0.9988718555978267, - "total_bits": 11318396928, + "accuracy": 0.9990974362963243, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -52679,8 +52679,8 @@ ], "model.layers.27.self_attn": [ { - "accuracy": 0.9725009835092351, - "total_bits": 89141248, + "accuracy": 0.9696724039386027, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -52743,8 +52743,8 @@ } }, { - "accuracy": 0.9730375582585111, - "total_bits": 91697152, + "accuracy": 0.9710927317501046, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -52807,8 +52807,8 @@ } }, { - "accuracy": 0.9783742883009836, - "total_bits": 95234560, + "accuracy": 0.9745434671349358, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -52871,8 +52871,8 @@ } }, { - "accuracy": 0.9819762164261192, - "total_bits": 111748096, + "accuracy": 0.9785372794431169, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -52935,8 +52935,8 @@ } }, { - "accuracy": 0.987598208419513, - "total_bits": 132388864, + "accuracy": 0.984625406563282, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -52999,8 +52999,8 @@ } }, { - "accuracy": 0.9879429566208273, - "total_bits": 132455936, + "accuracy": 0.9851422458741581, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -53063,8 +53063,8 @@ } }, { - "accuracy": 0.9908011484076269, - "total_bits": 169089024, + "accuracy": 0.9883124567568302, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -53115,8 +53115,8 @@ } }, { - "accuracy": 0.9913011809112504, - "total_bits": 169221632, + "accuracy": 0.9889232778659789, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -53167,8 +53167,8 @@ } }, { - "accuracy": 0.9914118485176004, - "total_bits": 170671104, + "accuracy": 0.989853358551045, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -53219,8 +53219,8 @@ } }, { - "accuracy": 0.9922518230741844, - "total_bits": 173039616, + "accuracy": 0.9912076629989315, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -53271,8 +53271,8 @@ } }, { - "accuracy": 0.9935984785261098, - "total_bits": 174398976, + "accuracy": 0.9923049864519271, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -53335,8 +53335,8 @@ } }, { - "accuracy": 0.9939869531372096, - "total_bits": 175225856, + "accuracy": 0.9930695742878015, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -53399,8 +53399,8 @@ } }, { - "accuracy": 0.994155131658772, - "total_bits": 178728960, + "accuracy": 0.9931208349516965, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -53460,8 +53460,8 @@ } }, { - "accuracy": 0.9949826712836511, - "total_bits": 181067776, + "accuracy": 0.9929904946111492, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -53521,8 +53521,8 @@ } }, { - "accuracy": 0.9966216761094984, - "total_bits": 219944960, + "accuracy": 0.9960446418881475, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -53582,8 +53582,8 @@ } }, { - "accuracy": 0.9971121547860093, - "total_bits": 223010816, + "accuracy": 0.9967940555143286, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -53643,8 +53643,8 @@ } }, { - "accuracy": 0.997134191042278, - "total_bits": 252975104, + "accuracy": 0.9967583258221566, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -53695,8 +53695,8 @@ } }, { - "accuracy": 0.9979563828528626, - "total_bits": 265314304, + "accuracy": 0.9980719853883784, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -53747,8 +53747,8 @@ } }, { - "accuracy": 0.9985125597304432, - "total_bits": 336861184, + "accuracy": 0.9991200245767686, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -53801,8 +53801,8 @@ ], "model.layers.27.block_sparse_moe": [ { - "accuracy": 0.9666476508136839, - "total_bits": 3157926400, + "accuracy": 0.9583663901430555, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -53853,8 +53853,8 @@ } }, { - "accuracy": 0.9677922299597412, - "total_bits": 3268026880, + "accuracy": 0.9597476501949131, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -53905,8 +53905,8 @@ } }, { - "accuracy": 0.9698727611685172, - "total_bits": 3652411392, + "accuracy": 0.9633681625418831, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -53954,8 +53954,8 @@ } }, { - "accuracy": 0.9705174380214885, - "total_bits": 4098056192, + "accuracy": 0.9650208015809767, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -54003,8 +54003,8 @@ } }, { - "accuracy": 0.9838481965707615, - "total_bits": 4621411072, + "accuracy": 0.9786635297932662, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -54055,8 +54055,8 @@ } }, { - "accuracy": 0.9851878265617415, - "total_bits": 4737212416, + "accuracy": 0.9803789716825122, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -54107,8 +54107,8 @@ } }, { - "accuracy": 0.985828154196497, - "total_bits": 5093868288, + "accuracy": 0.9821334987937007, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -54156,8 +54156,8 @@ } }, { - "accuracy": 0.9916401625669096, - "total_bits": 5824164608, + "accuracy": 0.9882934561173897, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -54199,8 +54199,8 @@ } }, { - "accuracy": 0.9924063989019487, - "total_bits": 5910044672, + "accuracy": 0.9894293021789053, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -54242,8 +54242,8 @@ } }, { - "accuracy": 0.9918072349391878, - "total_bits": 6006579968, + "accuracy": 0.9891005825484172, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -54294,8 +54294,8 @@ } }, { - "accuracy": 0.9928845343529247, - "total_bits": 6122381312, + "accuracy": 0.990461702545872, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -54346,8 +54346,8 @@ } }, { - "accuracy": 0.9958604652347276, - "total_bits": 7391748864, + "accuracy": 0.9944460228434764, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -54398,8 +54398,8 @@ } }, { - "accuracy": 0.9962322873907397, - "total_bits": 7507550208, + "accuracy": 0.9952261450598598, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -54450,8 +54450,8 @@ } }, { - "accuracy": 0.9975779563683318, - "total_bits": 8550425344, + "accuracy": 0.996797082505509, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -54493,8 +54493,8 @@ } }, { - "accuracy": 0.9977254044715664, - "total_bits": 8877312000, + "accuracy": 0.9971702168950287, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -54542,8 +54542,8 @@ } }, { - "accuracy": 0.9978272604421363, - "total_bits": 9674229760, + "accuracy": 0.9975547504072892, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -54588,8 +54588,8 @@ } }, { - "accuracy": 0.9987383494517417, - "total_bits": 11318396928, + "accuracy": 0.9990390755256158, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -54630,8 +54630,8 @@ ], "model.layers.28.self_attn": [ { - "accuracy": 0.968386668828316, - "total_bits": 89141248, + "accuracy": 0.964356071286602, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -54694,8 +54694,8 @@ } }, { - "accuracy": 0.9699094152310863, - "total_bits": 91697152, + "accuracy": 0.9659521095454693, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -54758,8 +54758,8 @@ } }, { - "accuracy": 0.9738133638165891, - "total_bits": 95234560, + "accuracy": 0.9696340323425829, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -54822,8 +54822,8 @@ } }, { - "accuracy": 0.9786806276533753, - "total_bits": 111748096, + "accuracy": 0.9752059754973743, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -54886,8 +54886,8 @@ } }, { - "accuracy": 0.9834078575950116, - "total_bits": 132388864, + "accuracy": 0.9811968024587259, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -54950,8 +54950,8 @@ } }, { - "accuracy": 0.984735572594218, - "total_bits": 132455936, + "accuracy": 0.9823216560325818, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -55014,8 +55014,8 @@ } }, { - "accuracy": 0.9868350697797723, - "total_bits": 169089024, + "accuracy": 0.985807766977814, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -55066,8 +55066,8 @@ } }, { - "accuracy": 0.9883866619784385, - "total_bits": 169221632, + "accuracy": 0.9871083967591403, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -55118,8 +55118,8 @@ } }, { - "accuracy": 0.9912850014516152, - "total_bits": 170671104, + "accuracy": 0.989371751260478, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -55170,8 +55170,8 @@ } }, { - "accuracy": 0.9917223777156323, - "total_bits": 173039616, + "accuracy": 0.9894701458542841, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -55222,8 +55222,8 @@ } }, { - "accuracy": 0.9921401064202655, - "total_bits": 174398976, + "accuracy": 0.9913793814193923, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -55286,8 +55286,8 @@ } }, { - "accuracy": 0.9927344562020153, - "total_bits": 175225856, + "accuracy": 0.9921445137297269, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -55350,8 +55350,8 @@ } }, { - "accuracy": 0.9927482484490611, - "total_bits": 178728960, + "accuracy": 0.9925117603997933, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -55411,8 +55411,8 @@ } }, { - "accuracy": 0.993530078267213, - "total_bits": 181067776, + "accuracy": 0.9932125890554744, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -55472,8 +55472,8 @@ } }, { - "accuracy": 0.9948926795623265, - "total_bits": 219944960, + "accuracy": 0.9957904105867783, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -55533,8 +55533,8 @@ } }, { - "accuracy": 0.9952299927244894, - "total_bits": 223010816, + "accuracy": 0.9965570663807739, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -55594,8 +55594,8 @@ } }, { - "accuracy": 0.9952933359018061, - "total_bits": 252975104, + "accuracy": 0.9967241092890617, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -55646,8 +55646,8 @@ } }, { - "accuracy": 0.995679962070426, - "total_bits": 265314304, + "accuracy": 0.9978031752634706, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -55698,8 +55698,8 @@ } }, { - "accuracy": 0.9966582462511724, - "total_bits": 336861184, + "accuracy": 0.9991185926210164, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -55752,8 +55752,8 @@ ], "model.layers.28.block_sparse_moe": [ { - "accuracy": 0.9642029053065926, - "total_bits": 3157926400, + "accuracy": 0.9553704623831436, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -55804,8 +55804,8 @@ } }, { - "accuracy": 0.9654948977986351, - "total_bits": 3268026880, + "accuracy": 0.9568884387845173, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -55856,8 +55856,8 @@ } }, { - "accuracy": 0.9676485249074176, - "total_bits": 3652411392, + "accuracy": 0.9608359276608098, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -55905,8 +55905,8 @@ } }, { - "accuracy": 0.9683901380049065, - "total_bits": 4098056192, + "accuracy": 0.9626975421560928, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -55954,8 +55954,8 @@ } }, { - "accuracy": 0.9826172718312591, - "total_bits": 4621411072, + "accuracy": 0.9770455712568946, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -56006,8 +56006,8 @@ } }, { - "accuracy": 0.9840587000362575, - "total_bits": 4737212416, + "accuracy": 0.9789364513708279, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -56058,8 +56058,8 @@ } }, { - "accuracy": 0.9847759025869891, - "total_bits": 5093868288, + "accuracy": 0.980882446790929, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -56107,8 +56107,8 @@ } }, { - "accuracy": 0.9908995085861534, - "total_bits": 5824164608, + "accuracy": 0.9873030422459124, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -56150,8 +56150,8 @@ } }, { - "accuracy": 0.9917332165059634, - "total_bits": 5910044672, + "accuracy": 0.9885337238083594, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -56193,8 +56193,8 @@ } }, { - "accuracy": 0.9911614380544052, - "total_bits": 6006579968, + "accuracy": 0.9882496923091821, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -56245,8 +56245,8 @@ } }, { - "accuracy": 0.9923151141556446, - "total_bits": 6122381312, + "accuracy": 0.9897335465211654, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -56297,8 +56297,8 @@ } }, { - "accuracy": 0.9955114201729884, - "total_bits": 7391748864, + "accuracy": 0.9940038786298828, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -56349,8 +56349,8 @@ } }, { - "accuracy": 0.9958766292984365, - "total_bits": 7507550208, + "accuracy": 0.9948603856610134, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -56401,8 +56401,8 @@ } }, { - "accuracy": 0.9973166022682562, - "total_bits": 8550425344, + "accuracy": 0.9965157303304295, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -56444,8 +56444,8 @@ } }, { - "accuracy": 0.9975079808355076, - "total_bits": 8877312000, + "accuracy": 0.9969408901670249, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -56493,8 +56493,8 @@ } }, { - "accuracy": 0.9976214789348887, - "total_bits": 9674229760, + "accuracy": 0.9973778746170865, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -56539,8 +56539,8 @@ } }, { - "accuracy": 0.9985512509592809, - "total_bits": 11318396928, + "accuracy": 0.9989481774064188, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -56581,8 +56581,8 @@ ], "model.layers.29.self_attn": [ { - "accuracy": 0.9608701199758798, - "total_bits": 89141248, + "accuracy": 0.9558693342260085, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -56645,8 +56645,8 @@ } }, { - "accuracy": 0.962438702583313, - "total_bits": 91697152, + "accuracy": 0.9572235742816702, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -56709,8 +56709,8 @@ } }, { - "accuracy": 0.9726416683988646, - "total_bits": 95234560, + "accuracy": 0.9663578089966904, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -56773,8 +56773,8 @@ } }, { - "accuracy": 0.9780280129052699, - "total_bits": 111748096, + "accuracy": 0.9729988987091929, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -56837,8 +56837,8 @@ } }, { - "accuracy": 0.9820066256215796, - "total_bits": 132388864, + "accuracy": 0.9763167930941563, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -56901,8 +56901,8 @@ } }, { - "accuracy": 0.9820802011527121, - "total_bits": 132455936, + "accuracy": 0.9776681170624215, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -56965,8 +56965,8 @@ } }, { - "accuracy": 0.9866309635108337, - "total_bits": 169089024, + "accuracy": 0.9815346100949682, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -57017,8 +57017,8 @@ } }, { - "accuracy": 0.9865399926784448, - "total_bits": 169221632, + "accuracy": 0.9830296769941924, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -57069,8 +57069,8 @@ } }, { - "accuracy": 0.9898942282306962, - "total_bits": 170671104, + "accuracy": 0.9866015218867688, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -57121,8 +57121,8 @@ } }, { - "accuracy": 0.9900045917602256, - "total_bits": 173039616, + "accuracy": 0.9874341624090448, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -57173,8 +57173,8 @@ } }, { - "accuracy": 0.9914423301524948, - "total_bits": 174398976, + "accuracy": 0.9895926214230713, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -57237,8 +57237,8 @@ } }, { - "accuracy": 0.9921081878128462, - "total_bits": 175225856, + "accuracy": 0.9898429020831827, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -57301,8 +57301,8 @@ } }, { - "accuracy": 0.992435004125582, - "total_bits": 178728960, + "accuracy": 0.9907817719358718, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -57362,8 +57362,8 @@ } }, { - "accuracy": 0.9933971756836399, - "total_bits": 181067776, + "accuracy": 0.9905227487470256, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -57423,8 +57423,8 @@ } }, { - "accuracy": 0.9958027148386464, - "total_bits": 219944960, + "accuracy": 0.9946968102303799, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -57484,8 +57484,8 @@ } }, { - "accuracy": 0.9964105687104166, - "total_bits": 223010816, + "accuracy": 0.9955913964004139, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -57545,8 +57545,8 @@ } }, { - "accuracy": 0.9965360744972713, - "total_bits": 252975104, + "accuracy": 0.995705336194078, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -57597,8 +57597,8 @@ } }, { - "accuracy": 0.9972677519108402, - "total_bits": 265314304, + "accuracy": 0.9970646558904264, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -57649,8 +57649,8 @@ } }, { - "accuracy": 0.9983605410816381, - "total_bits": 336861184, + "accuracy": 0.9985642530427867, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -57703,8 +57703,8 @@ ], "model.layers.29.block_sparse_moe": [ { - "accuracy": 0.9632776044309139, - "total_bits": 3157926400, + "accuracy": 0.9538796185515821, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -57755,8 +57755,8 @@ } }, { - "accuracy": 0.9647262704093009, - "total_bits": 3268026880, + "accuracy": 0.9556196986231953, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -57807,8 +57807,8 @@ } }, { - "accuracy": 0.9670966673875228, - "total_bits": 3652411392, + "accuracy": 0.9600857462501153, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -57856,8 +57856,8 @@ } }, { - "accuracy": 0.967880874988623, - "total_bits": 4098056192, + "accuracy": 0.9620151665585581, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -57905,8 +57905,8 @@ } }, { - "accuracy": 0.9822011215146631, - "total_bits": 4621411072, + "accuracy": 0.9764004955068231, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -57957,8 +57957,8 @@ } }, { - "accuracy": 0.9836757756420411, - "total_bits": 4737212416, + "accuracy": 0.9783504685328808, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -58009,8 +58009,8 @@ } }, { - "accuracy": 0.9844985797535628, - "total_bits": 5093868288, + "accuracy": 0.9805611932824831, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -58058,8 +58058,8 @@ } }, { - "accuracy": 0.9906410349067301, - "total_bits": 5824164608, + "accuracy": 0.9869954852329101, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -58101,8 +58101,8 @@ } }, { - "accuracy": 0.9914555510622449, - "total_bits": 5910044672, + "accuracy": 0.9882198954583146, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -58144,8 +58144,8 @@ } }, { - "accuracy": 0.9909641515114345, - "total_bits": 6006579968, + "accuracy": 0.9879281478642952, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -58196,8 +58196,8 @@ } }, { - "accuracy": 0.992130204977002, - "total_bits": 6122381312, + "accuracy": 0.9894562861591112, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -58248,8 +58248,8 @@ } }, { - "accuracy": 0.9954220654908568, - "total_bits": 7391748864, + "accuracy": 0.9938438470853725, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -58300,8 +58300,8 @@ } }, { - "accuracy": 0.9958002669445705, - "total_bits": 7507550208, + "accuracy": 0.9947257286767126, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -58352,8 +58352,8 @@ } }, { - "accuracy": 0.9972590599354589, - "total_bits": 8550425344, + "accuracy": 0.9964215103827883, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -58395,8 +58395,8 @@ } }, { - "accuracy": 0.9974472786707338, - "total_bits": 8877312000, + "accuracy": 0.9968673409020994, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -58444,8 +58444,8 @@ } }, { - "accuracy": 0.997577567701228, - "total_bits": 9674229760, + "accuracy": 0.9973569583926292, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -58490,8 +58490,8 @@ } }, { - "accuracy": 0.998522600159049, - "total_bits": 11318396928, + "accuracy": 0.9989476686905618, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -58532,8 +58532,8 @@ ], "model.layers.30.self_attn": [ { - "accuracy": 0.9694207918364555, - "total_bits": 89141248, + "accuracy": 0.9634038866497576, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -58596,8 +58596,8 @@ } }, { - "accuracy": 0.9706075268331915, - "total_bits": 91697152, + "accuracy": 0.965070200356422, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -58660,8 +58660,8 @@ } }, { - "accuracy": 0.9744983097771183, - "total_bits": 95234560, + "accuracy": 0.9681082468305249, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -58724,8 +58724,8 @@ } }, { - "accuracy": 0.9803562588058412, - "total_bits": 111748096, + "accuracy": 0.9745180726167746, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -58788,8 +58788,8 @@ } }, { - "accuracy": 0.9843372566392645, - "total_bits": 132388864, + "accuracy": 0.9816155265871203, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -58852,8 +58852,8 @@ } }, { - "accuracy": 0.9855422916007228, - "total_bits": 132455936, + "accuracy": 0.9821725684596458, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -58916,8 +58916,8 @@ } }, { - "accuracy": 0.9890948435640894, - "total_bits": 169089024, + "accuracy": 0.987356522178743, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -58968,8 +58968,8 @@ } }, { - "accuracy": 0.99057325249305, - "total_bits": 169221632, + "accuracy": 0.9880507232592208, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -59020,8 +59020,8 @@ } }, { - "accuracy": 0.9910624071198981, - "total_bits": 170671104, + "accuracy": 0.9870903741102666, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -59072,8 +59072,8 @@ } }, { - "accuracy": 0.991522089811042, - "total_bits": 173039616, + "accuracy": 0.9892559279105626, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -59124,8 +59124,8 @@ } }, { - "accuracy": 0.9925551398482639, - "total_bits": 174398976, + "accuracy": 0.9907923655991908, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -59188,8 +59188,8 @@ } }, { - "accuracy": 0.9930649745801929, - "total_bits": 175225856, + "accuracy": 0.9913412455134676, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -59252,8 +59252,8 @@ } }, { - "accuracy": 0.9936155014729593, - "total_bits": 178728960, + "accuracy": 0.9920958295697346, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -59313,8 +59313,8 @@ } }, { - "accuracy": 0.9938916285173036, - "total_bits": 181067776, + "accuracy": 0.9926775795902358, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -59374,8 +59374,8 @@ } }, { - "accuracy": 0.9964086984109599, - "total_bits": 219944960, + "accuracy": 0.995497214684292, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -59435,8 +59435,8 @@ } }, { - "accuracy": 0.9962329395057168, - "total_bits": 223010816, + "accuracy": 0.9963571085681906, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -59496,8 +59496,8 @@ } }, { - "accuracy": 0.9971578061813489, - "total_bits": 252975104, + "accuracy": 0.9964870917465305, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -59548,8 +59548,8 @@ } }, { - "accuracy": 0.9969299465155927, - "total_bits": 265314304, + "accuracy": 0.9977427639532834, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -59600,8 +59600,8 @@ } }, { - "accuracy": 0.9984630753897363, - "total_bits": 336861184, + "accuracy": 0.9990213116116138, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -59654,8 +59654,8 @@ ], "model.layers.30.block_sparse_moe": [ { - "accuracy": 0.9637796608731151, - "total_bits": 3157926400, + "accuracy": 0.956871971313376, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -59706,8 +59706,8 @@ } }, { - "accuracy": 0.965128994663246, - "total_bits": 3268026880, + "accuracy": 0.9583895955001935, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -59758,8 +59758,8 @@ } }, { - "accuracy": 0.9673422039486468, - "total_bits": 3652411392, + "accuracy": 0.962382945639547, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -59807,8 +59807,8 @@ } }, { - "accuracy": 0.9680863645626232, - "total_bits": 4098056192, + "accuracy": 0.9640619686688296, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -59856,8 +59856,8 @@ } }, { - "accuracy": 0.9825394411454909, - "total_bits": 4621411072, + "accuracy": 0.9778909022279549, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -59908,8 +59908,8 @@ } }, { - "accuracy": 0.9840861347038299, - "total_bits": 4737212416, + "accuracy": 0.979745312826708, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -59960,8 +59960,8 @@ } }, { - "accuracy": 0.9848618414252996, - "total_bits": 5093868288, + "accuracy": 0.9816655949543929, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -60009,8 +60009,8 @@ } }, { - "accuracy": 0.990729100536555, - "total_bits": 5824164608, + "accuracy": 0.9878102341317572, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -60052,8 +60052,8 @@ } }, { - "accuracy": 0.9916374326567166, - "total_bits": 5910044672, + "accuracy": 0.9889602991752326, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -60095,8 +60095,8 @@ } }, { - "accuracy": 0.9911164715013001, - "total_bits": 6006579968, + "accuracy": 0.9886521848820848, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -60147,8 +60147,8 @@ } }, { - "accuracy": 0.9922299902245868, - "total_bits": 6122381312, + "accuracy": 0.9900498497954686, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -60199,8 +60199,8 @@ } }, { - "accuracy": 0.9954629788117018, - "total_bits": 7391748864, + "accuracy": 0.9941774144754163, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -60251,8 +60251,8 @@ } }, { - "accuracy": 0.9957174436858622, - "total_bits": 7507550208, + "accuracy": 0.9950011447508587, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -60303,8 +60303,8 @@ } }, { - "accuracy": 0.9971771511627594, - "total_bits": 8550425344, + "accuracy": 0.9965910611426807, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -60346,8 +60346,8 @@ } }, { - "accuracy": 0.9974351794080576, - "total_bits": 8877312000, + "accuracy": 0.9970110861650028, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -60395,8 +60395,8 @@ } }, { - "accuracy": 0.9975584884959972, - "total_bits": 9674229760, + "accuracy": 0.9974619781605725, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -60441,8 +60441,8 @@ } }, { - "accuracy": 0.9984119053842733, - "total_bits": 11318396928, + "accuracy": 0.9989289466348055, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128 @@ -60483,8 +60483,8 @@ ], "model.layers.31.self_attn": [ { - "accuracy": 0.9698078738292679, - "total_bits": 89141248, + "accuracy": 0.9662979132845066, + "total_bits": 89665536, "q_proj": { "group_size": { "3": 64, @@ -60547,8 +60547,8 @@ } }, { - "accuracy": 0.9716533664613962, - "total_bits": 91697152, + "accuracy": 0.9680890653689858, + "total_bits": 92221440, "q_proj": { "group_size": { "3": 64, @@ -60611,8 +60611,8 @@ } }, { - "accuracy": 0.9759100443916395, - "total_bits": 95234560, + "accuracy": 0.9717282597266603, + "total_bits": 95758848, "q_proj": { "group_size": { "3": 64, @@ -60675,8 +60675,8 @@ } }, { - "accuracy": 0.9812318295007572, - "total_bits": 111748096, + "accuracy": 0.9778634191607125, + "total_bits": 112272384, "q_proj": { "group_size": { "3": 64, @@ -60739,8 +60739,8 @@ } }, { - "accuracy": 0.985558450978715, - "total_bits": 132388864, + "accuracy": 0.9824691925750813, + "total_bits": 132913152, "q_proj": { "group_size": { "4": 128, @@ -60803,8 +60803,8 @@ } }, { - "accuracy": 0.9860909873968922, - "total_bits": 132455936, + "accuracy": 0.9830566641903715, + "total_bits": 132980224, "q_proj": { "group_size": { "4": 128, @@ -60867,8 +60867,8 @@ } }, { - "accuracy": 0.9901088335900567, - "total_bits": 169089024, + "accuracy": 0.9875637218065094, + "total_bits": 169613312, "q_proj": { "group_size": { "4": 128 @@ -60919,8 +60919,8 @@ } }, { - "accuracy": 0.9906528096180409, - "total_bits": 169221632, + "accuracy": 0.9884238858503522, + "total_bits": 169745920, "q_proj": { "group_size": { "4": 128 @@ -60971,8 +60971,8 @@ } }, { - "accuracy": 0.9914724960108288, - "total_bits": 170671104, + "accuracy": 0.9898434544884367, + "total_bits": 171195392, "q_proj": { "group_size": { "4": 64 @@ -61023,8 +61023,8 @@ } }, { - "accuracy": 0.9920686493569519, - "total_bits": 173039616, + "accuracy": 0.9906905299285427, + "total_bits": 173563904, "q_proj": { "group_size": { "4": 32 @@ -61075,8 +61075,8 @@ } }, { - "accuracy": 0.9929109421209432, - "total_bits": 174398976, + "accuracy": 0.9906248478000634, + "total_bits": 174923264, "q_proj": { "group_size": { "5": 128, @@ -61139,8 +61139,8 @@ } }, { - "accuracy": 0.9935312595334835, - "total_bits": 175225856, + "accuracy": 0.9914314899724559, + "total_bits": 175750144, "q_proj": { "group_size": { "5": 64, @@ -61203,8 +61203,8 @@ } }, { - "accuracy": 0.9941065498569515, - "total_bits": 178728960, + "accuracy": 0.9919101549166953, + "total_bits": 179253248, "q_proj": { "group_size": { "5": 64, @@ -61264,8 +61264,8 @@ } }, { - "accuracy": 0.9943883052328601, - "total_bits": 181067776, + "accuracy": 0.9927582318923669, + "total_bits": 181592064, "q_proj": { "group_size": { "5": 32, @@ -61325,8 +61325,8 @@ } }, { - "accuracy": 0.996171072096331, - "total_bits": 219944960, + "accuracy": 0.996015546417766, + "total_bits": 220469248, "q_proj": { "group_size": { "6": 128, @@ -61386,8 +61386,8 @@ } }, { - "accuracy": 0.9962222876492888, - "total_bits": 223010816, + "accuracy": 0.9964834913625964, + "total_bits": 223535104, "q_proj": { "group_size": { "6": 32, @@ -61447,8 +61447,8 @@ } }, { - "accuracy": 0.9967191656760406, - "total_bits": 252975104, + "accuracy": 0.9969142353183997, + "total_bits": 253499392, "q_proj": { "group_size": { "6": 128 @@ -61499,8 +61499,8 @@ } }, { - "accuracy": 0.9968131867353804, - "total_bits": 265314304, + "accuracy": 0.9977868342430156, + "total_bits": 265838592, "q_proj": { "group_size": { "6": 32 @@ -61551,8 +61551,8 @@ } }, { - "accuracy": 0.9981138767907396, - "total_bits": 336861184, + "accuracy": 0.9988692441011153, + "total_bits": 337385472, "q_proj": { "group_size": { "8": 128 @@ -61605,8 +61605,8 @@ ], "model.layers.31.block_sparse_moe": [ { - "accuracy": 0.976185682346113, - "total_bits": 3157926400, + "accuracy": 0.969541786093032, + "total_bits": 3163693568, "w1": { "group_size": { "3": 64, @@ -61657,8 +61657,8 @@ } }, { - "accuracy": 0.9768299876013771, - "total_bits": 3268026880, + "accuracy": 0.9704707066703122, + "total_bits": 3273794048, "w1": { "group_size": { "3": 64, @@ -61709,8 +61709,8 @@ } }, { - "accuracy": 0.978169146226719, - "total_bits": 3652411392, + "accuracy": 0.9729160451388452, + "total_bits": 3658178560, "w1": { "group_size": { "3": 64, @@ -61758,8 +61758,8 @@ } }, { - "accuracy": 0.9786341701401398, - "total_bits": 4098056192, + "accuracy": 0.9739532782405149, + "total_bits": 4103823360, "w1": { "group_size": { "3": 64, @@ -61807,8 +61807,8 @@ } }, { - "accuracy": 0.9886293447925709, - "total_bits": 4621411072, + "accuracy": 0.984875336434925, + "total_bits": 4627178240, "w1": { "group_size": { "4": 128, @@ -61859,8 +61859,8 @@ } }, { - "accuracy": 0.9895238541066647, - "total_bits": 4737212416, + "accuracy": 0.9860867593961302, + "total_bits": 4742979584, "w1": { "group_size": { "4": 32, @@ -61911,8 +61911,8 @@ } }, { - "accuracy": 0.9900188003084622, - "total_bits": 5093868288, + "accuracy": 0.987232305778889, + "total_bits": 5099635456, "w1": { "group_size": { "4": 32, @@ -61960,8 +61960,8 @@ } }, { - "accuracy": 0.993969805072993, - "total_bits": 5824164608, + "accuracy": 0.9916669441518025, + "total_bits": 5829931776, "w1": { "group_size": { "4": 128 @@ -62003,8 +62003,8 @@ } }, { - "accuracy": 0.994550551782595, - "total_bits": 5910044672, + "accuracy": 0.9925135922676418, + "total_bits": 5915811840, "w1": { "group_size": { "4": 32 @@ -62046,8 +62046,8 @@ } }, { - "accuracy": 0.9942000653827563, - "total_bits": 6006579968, + "accuracy": 0.9922451756065129, + "total_bits": 6012347136, "w1": { "group_size": { "5": 128, @@ -62098,8 +62098,8 @@ } }, { - "accuracy": 0.9948955126747023, - "total_bits": 6122381312, + "accuracy": 0.9932162363693351, + "total_bits": 6128148480, "w1": { "group_size": { "5": 32, @@ -62150,8 +62150,8 @@ } }, { - "accuracy": 0.9969982496404555, - "total_bits": 7391748864, + "accuracy": 0.9959935083788878, + "total_bits": 7397516032, "w1": { "group_size": { "6": 128, @@ -62202,8 +62202,8 @@ } }, { - "accuracy": 0.9971515763463685, - "total_bits": 7507550208, + "accuracy": 0.9965529800720105, + "total_bits": 7513317376, "w1": { "group_size": { "6": 32, @@ -62254,8 +62254,8 @@ } }, { - "accuracy": 0.9981220058252802, - "total_bits": 8550425344, + "accuracy": 0.9976389382518391, + "total_bits": 8556192512, "w1": { "group_size": { "6": 128 @@ -62297,8 +62297,8 @@ } }, { - "accuracy": 0.9982746534442413, - "total_bits": 8877312000, + "accuracy": 0.9979207405613124, + "total_bits": 8883079168, "w1": { "group_size": { "8": 128, @@ -62346,8 +62346,8 @@ } }, { - "accuracy": 0.9983555800208705, - "total_bits": 9674229760, + "accuracy": 0.9982078294542589, + "total_bits": 9679996928, "w1": { "group_size": { "8": 128, @@ -62392,8 +62392,8 @@ } }, { - "accuracy": 0.9988854439070565, - "total_bits": 11318396928, + "accuracy": 0.9991795637934047, + "total_bits": 11324164096, "w1": { "group_size": { "8": 128