File size: 11,667 Bytes
8e85085
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
layer,module,loss,samples,damp,time
0,self_attn.k_proj,0.0000040634,0.05000,0.608
0,self_attn.v_proj,0.0000006062,0.05000,0.366
0,self_attn.q_proj,0.0000193926,0.05000,0.367
0,self_attn.o_proj,0.0000022809,0.05000,0.358
0,mlp.up_proj,0.0000099378,0.05000,0.373
0,mlp.gate_proj,0.0000124556,0.05000,0.374
0,mlp.down_proj,0.0000023232,0.05000,1.931
1,self_attn.k_proj,0.0000009198,0.05000,0.368
1,self_attn.v_proj,0.0000001869,0.05000,0.351
1,self_attn.q_proj,0.0000033967,0.05000,0.344
1,self_attn.o_proj,0.0000015133,0.05000,0.369
1,mlp.up_proj,0.0022965061,0.05000,0.366
1,mlp.gate_proj,0.0031997402,0.05000,0.360
1,mlp.down_proj,0.0000001408,0.05000,2.025
2,self_attn.k_proj,0.0000010904,0.05000,0.330
2,self_attn.v_proj,0.0000002759,0.05000,0.340
2,self_attn.q_proj,0.0000049015,0.05000,0.336
2,self_attn.o_proj,0.0000011300,0.05000,0.328
2,mlp.up_proj,0.0006311594,0.05000,0.325
2,mlp.gate_proj,0.0004826943,0.05000,0.342
2,mlp.down_proj,0.0004366701,0.05000,2.115
3,self_attn.k_proj,0.0000032384,0.05000,0.366
3,self_attn.v_proj,0.0000007111,0.05000,0.368
3,self_attn.q_proj,0.0000148615,0.05000,0.361
3,self_attn.o_proj,0.0000014529,0.05000,0.340
3,mlp.up_proj,0.0005290429,0.05000,0.360
3,mlp.gate_proj,0.0004906502,0.05000,0.352
3,mlp.down_proj,0.0000038434,0.05000,1.924
4,self_attn.k_proj,0.0000025431,0.05000,0.368
4,self_attn.v_proj,0.0000008757,0.05000,0.362
4,self_attn.q_proj,0.0000126745,0.05000,0.379
4,self_attn.o_proj,0.0000020869,0.05000,0.355
4,mlp.up_proj,0.0003978110,0.05000,0.367
4,mlp.gate_proj,0.0004516964,0.05000,0.361
4,mlp.down_proj,0.0000016207,0.05000,1.978
5,self_attn.k_proj,0.0000033112,0.05000,0.347
5,self_attn.v_proj,0.0000014565,0.05000,0.341
5,self_attn.q_proj,0.0000184254,0.05000,0.334
5,self_attn.o_proj,0.0000023437,0.05000,0.330
5,mlp.up_proj,0.0007071495,0.05000,0.334
5,mlp.gate_proj,0.0007961280,0.05000,0.340
5,mlp.down_proj,0.0000014939,0.05000,1.995
6,self_attn.k_proj,0.0000026044,0.05000,0.347
6,self_attn.v_proj,0.0000016148,0.05000,0.353
6,self_attn.q_proj,0.0000157694,0.05000,0.370
6,self_attn.o_proj,0.0000042285,0.05000,0.370
6,mlp.up_proj,0.0009018112,0.05000,0.368
6,mlp.gate_proj,0.0010366849,0.05000,0.368
6,mlp.down_proj,0.0000011726,0.05000,1.930
7,self_attn.k_proj,0.0000031160,0.05000,0.357
7,self_attn.v_proj,0.0000018465,0.05000,0.354
7,self_attn.q_proj,0.0000190155,0.05000,0.367
7,self_attn.o_proj,0.0000020445,0.05000,0.357
7,mlp.up_proj,0.0005896314,0.05000,0.369
7,mlp.gate_proj,0.0006801203,0.05000,0.364
7,mlp.down_proj,0.0000021789,0.05000,2.109
8,self_attn.k_proj,0.0000029602,0.05000,0.363
8,self_attn.v_proj,0.0000020435,0.05000,0.353
8,self_attn.q_proj,0.0000188390,0.05000,0.351
8,self_attn.o_proj,0.0000013217,0.05000,0.345
8,mlp.up_proj,0.0004880707,0.05000,0.341
8,mlp.gate_proj,0.0006587554,0.05000,0.345
8,mlp.down_proj,0.0000016654,0.05000,2.072
9,self_attn.k_proj,0.0000032877,0.05000,0.363
9,self_attn.v_proj,0.0000019524,0.05000,0.371
9,self_attn.q_proj,0.0000181619,0.05000,0.386
9,self_attn.o_proj,0.0000008039,0.05000,0.358
9,mlp.up_proj,0.0003569369,0.05000,0.371
9,mlp.gate_proj,0.0005121149,0.05000,0.371
9,mlp.down_proj,0.0000025109,0.05000,1.953
10,self_attn.k_proj,0.0000034298,0.05000,0.372
10,self_attn.v_proj,0.0000026901,0.05000,0.362
10,self_attn.q_proj,0.0000226561,0.05000,0.372
10,self_attn.o_proj,0.0000010749,0.05000,0.351
10,mlp.up_proj,0.0002255284,0.05000,0.377
10,mlp.gate_proj,0.0003353583,0.05000,0.368
10,mlp.down_proj,0.0000035347,0.05000,2.016
11,self_attn.k_proj,0.0000026725,0.05000,0.351
11,self_attn.v_proj,0.0000021309,0.05000,0.358
11,self_attn.q_proj,0.0000166953,0.05000,0.361
11,self_attn.o_proj,0.0000017854,0.05000,0.354
11,mlp.up_proj,0.0000811131,0.05000,0.349
11,mlp.gate_proj,0.0001103679,0.05000,0.350
11,mlp.down_proj,0.0000052722,0.05000,2.140
12,self_attn.k_proj,0.0000027185,0.05000,0.380
12,self_attn.v_proj,0.0000018781,0.05000,0.364
12,self_attn.q_proj,0.0000168562,0.05000,0.382
12,self_attn.o_proj,0.0000017540,0.05000,0.363
12,mlp.up_proj,0.0000913523,0.05000,0.373
12,mlp.gate_proj,0.0001290024,0.05000,0.370
12,mlp.down_proj,0.0000040652,0.05000,1.977
13,self_attn.k_proj,0.0000039847,0.05000,0.370
13,self_attn.v_proj,0.0000015442,0.05000,0.364
13,self_attn.q_proj,0.0000216264,0.05000,0.374
13,self_attn.o_proj,0.0000020568,0.05000,0.358
13,mlp.up_proj,0.0000465535,0.05000,0.371
13,mlp.gate_proj,0.0000485026,0.05000,0.366
13,mlp.down_proj,0.0000035773,0.05000,2.066
14,self_attn.k_proj,0.0000027242,0.05000,0.352
14,self_attn.v_proj,0.0000015621,0.05000,0.331
14,self_attn.q_proj,0.0000163233,0.05000,0.351
14,self_attn.o_proj,0.0000034186,0.05000,0.331
14,mlp.up_proj,0.0000473507,0.05000,0.340
14,mlp.gate_proj,0.0000497855,0.05000,0.337
14,mlp.down_proj,0.0000049650,0.05000,2.044
15,self_attn.k_proj,0.0000031355,0.05000,0.347
15,self_attn.v_proj,0.0000018360,0.05000,0.356
15,self_attn.q_proj,0.0000190232,0.05000,0.368
15,self_attn.o_proj,0.0000070935,0.05000,0.370
15,mlp.up_proj,0.0000492337,0.05000,0.371
15,mlp.gate_proj,0.0000472793,0.05000,0.372
15,mlp.down_proj,0.0000048209,0.05000,1.958
16,self_attn.k_proj,0.0000028816,0.05000,0.363
16,self_attn.v_proj,0.0000016655,0.05000,0.365
16,self_attn.q_proj,0.0000168683,0.05000,0.378
16,self_attn.o_proj,0.0000035855,0.05000,0.352
16,mlp.up_proj,0.0000498367,0.05000,0.374
16,mlp.gate_proj,0.0000511090,0.05000,0.368
16,mlp.down_proj,0.0000046699,0.05000,2.065
17,self_attn.k_proj,0.0000050119,0.05000,0.341
17,self_attn.v_proj,0.0000026293,0.05000,0.330
17,self_attn.q_proj,0.0000287738,0.05000,0.341
17,self_attn.o_proj,0.0000093749,0.05000,0.341
17,mlp.up_proj,0.0000494367,0.05000,0.343
17,mlp.gate_proj,0.0000486890,0.05000,0.341
17,mlp.down_proj,0.0000048504,0.05000,2.106
18,self_attn.k_proj,0.0000029729,0.05000,0.356
18,self_attn.v_proj,0.0000018751,0.05000,0.356
18,self_attn.q_proj,0.0000189247,0.05000,0.365
18,self_attn.o_proj,0.0000036817,0.05000,0.348
18,mlp.up_proj,0.0000509893,0.05000,0.371
18,mlp.gate_proj,0.0000527114,0.05000,0.366
18,mlp.down_proj,0.0000053501,0.05000,1.976
19,self_attn.k_proj,0.0000041355,0.05000,0.371
19,self_attn.v_proj,0.0000020078,0.05000,0.367
19,self_attn.q_proj,0.0000238353,0.05000,0.378
19,self_attn.o_proj,0.0000088396,0.05000,0.366
19,mlp.up_proj,0.0000449316,0.05000,0.375
19,mlp.gate_proj,0.0000431556,0.05000,0.374
19,mlp.down_proj,0.0000043508,0.05000,2.085
20,self_attn.k_proj,0.0000041288,0.05000,0.349
20,self_attn.v_proj,0.0000038416,0.05000,0.344
20,self_attn.q_proj,0.0000344315,0.05000,0.365
20,self_attn.o_proj,0.0000147554,0.05000,0.347
20,mlp.up_proj,0.0000535635,0.05000,0.349
20,mlp.gate_proj,0.0000523148,0.05000,0.363
20,mlp.down_proj,0.0000068330,0.05000,2.188
21,self_attn.k_proj,0.0000038298,0.05000,0.379
21,self_attn.v_proj,0.0000025224,0.05000,0.378
21,self_attn.q_proj,0.0000242552,0.05000,0.388
21,self_attn.o_proj,0.0000071513,0.05000,0.368
21,mlp.up_proj,0.0000517560,0.05000,0.385
21,mlp.gate_proj,0.0000548613,0.05000,0.380
21,mlp.down_proj,0.0000072140,0.05000,1.973
22,self_attn.k_proj,0.0000046511,0.05000,0.383
22,self_attn.v_proj,0.0000038298,0.05000,0.372
22,self_attn.q_proj,0.0000306858,0.05000,0.376
22,self_attn.o_proj,0.0000068249,0.05000,0.386
22,mlp.up_proj,0.0000545035,0.05000,0.385
22,mlp.gate_proj,0.0000554754,0.05000,0.363
22,mlp.down_proj,0.0000060427,0.05000,2.128
23,self_attn.k_proj,0.0000045517,0.05000,0.339
23,self_attn.v_proj,0.0000022649,0.05000,0.340
23,self_attn.q_proj,0.0000303075,0.05000,0.336
23,self_attn.o_proj,0.0000062696,0.05000,0.347
23,mlp.up_proj,0.0000542692,0.05000,0.367
23,mlp.gate_proj,0.0000519028,0.05000,0.380
23,mlp.down_proj,0.0000060388,0.05000,2.115
24,self_attn.k_proj,0.0000031391,0.05000,0.382
24,self_attn.v_proj,0.0000025389,0.05000,0.370
24,self_attn.q_proj,0.0000217306,0.05000,0.363
24,self_attn.o_proj,0.0000041164,0.05000,0.387
24,mlp.up_proj,0.0000561470,0.05000,0.385
24,mlp.gate_proj,0.0000542109,0.05000,0.362
24,mlp.down_proj,0.0000069144,0.05000,1.967
25,self_attn.k_proj,0.0000033413,0.05000,0.355
25,self_attn.v_proj,0.0000043154,0.05000,0.361
25,self_attn.q_proj,0.0000316823,0.05000,0.376
25,self_attn.o_proj,0.0000118628,0.05000,0.374
25,mlp.up_proj,0.0000677753,0.05000,0.368
25,mlp.gate_proj,0.0000669758,0.05000,0.383
25,mlp.down_proj,0.0000110951,0.05000,2.098
26,self_attn.k_proj,0.0000030871,0.05000,0.346
26,self_attn.v_proj,0.0000038372,0.05000,0.341
26,self_attn.q_proj,0.0000233898,0.05000,0.349
26,self_attn.o_proj,0.0000063974,0.05000,0.378
26,mlp.up_proj,0.0000735646,0.05000,0.403
26,mlp.gate_proj,0.0000696784,0.05000,0.387
26,mlp.down_proj,0.0000139417,0.05000,2.107
27,self_attn.k_proj,0.0000034423,0.05000,0.370
27,self_attn.v_proj,0.0000058276,0.05000,0.359
27,self_attn.q_proj,0.0000393344,0.05000,0.382
27,self_attn.o_proj,0.0000130343,0.05000,0.376
27,mlp.up_proj,0.0000744298,0.05000,0.377
27,mlp.gate_proj,0.0000736244,0.05000,0.356
27,mlp.down_proj,0.0000116098,0.05000,2.036
28,self_attn.k_proj,0.0000031231,0.05000,0.364
28,self_attn.v_proj,0.0000051522,0.05000,0.366
28,self_attn.q_proj,0.0000287182,0.05000,0.376
28,self_attn.o_proj,0.0000078141,0.05000,0.362
28,mlp.up_proj,0.0000809760,0.05000,0.489
28,mlp.gate_proj,0.0000809227,0.05000,0.362
28,mlp.down_proj,0.0000107104,0.05000,2.040
29,self_attn.k_proj,0.0000032159,0.05000,0.328
29,self_attn.v_proj,0.0000043822,0.05000,0.333
29,self_attn.q_proj,0.0000270912,0.05000,0.347
29,self_attn.o_proj,0.0000069960,0.05000,0.368
29,mlp.up_proj,0.0000897726,0.05000,0.388
29,mlp.gate_proj,0.0000888188,0.05000,0.408
29,mlp.down_proj,0.0000134719,0.05000,2.165
30,self_attn.k_proj,0.0000032228,0.05000,0.360
30,self_attn.v_proj,0.0000085865,0.05000,0.371
30,self_attn.q_proj,0.0000339776,0.05000,0.385
30,self_attn.o_proj,0.0000160412,0.05000,0.377
30,mlp.up_proj,0.0001201127,0.05000,0.354
30,mlp.gate_proj,0.0001130646,0.05000,0.345
30,mlp.down_proj,0.0008732517,0.05000,2.056
31,self_attn.k_proj,0.0000042993,0.05000,0.370
31,self_attn.v_proj,0.0000086031,0.05000,0.367
31,self_attn.q_proj,0.0000424028,0.05000,0.374
31,self_attn.o_proj,0.0000390922,0.05000,0.385
31,mlp.up_proj,0.0001399534,0.05000,0.379
31,mlp.gate_proj,0.0001253367,0.05000,0.370
31,mlp.down_proj,0.0000341888,0.05000,2.085
32,self_attn.k_proj,0.0000050244,0.05000,0.339
32,self_attn.v_proj,0.0000184226,0.05000,0.341
32,self_attn.q_proj,0.0000524240,0.05000,0.356
32,self_attn.o_proj,0.0000505486,0.05000,0.377
32,mlp.up_proj,0.0001564809,0.05000,0.380
32,mlp.gate_proj,0.0001395116,0.05000,0.347
32,mlp.down_proj,0.0000686840,0.05000,2.103
33,self_attn.k_proj,0.0000044370,0.05000,0.369
33,self_attn.v_proj,0.0000313598,0.05000,0.370
33,self_attn.q_proj,0.0000489970,0.05000,0.380
33,self_attn.o_proj,0.0000689787,0.05000,0.358
33,mlp.up_proj,0.0001696904,0.05000,0.345
33,mlp.gate_proj,0.0001370589,0.05000,0.341
33,mlp.down_proj,0.0001987096,0.05000,2.084
34,self_attn.k_proj,0.0000026818,0.05000,0.372
34,self_attn.v_proj,0.0000085758,0.05000,0.352
34,self_attn.q_proj,0.0000272798,0.05000,0.370
34,self_attn.o_proj,0.0000386386,0.05000,0.382
34,mlp.up_proj,0.0002085190,0.05000,0.373
34,mlp.gate_proj,0.0001788506,0.05000,0.355
34,mlp.down_proj,0.0002425168,0.05000,2.092
35,self_attn.k_proj,0.0000028524,0.05000,0.380
35,self_attn.v_proj,0.0000081620,0.05000,0.380
35,self_attn.q_proj,0.0000271286,0.05000,0.390
35,self_attn.o_proj,0.0000118175,0.05000,0.362
35,mlp.up_proj,0.0003510200,0.05000,0.383
35,mlp.gate_proj,0.0003188154,0.05000,0.374
35,mlp.down_proj,0.0010605976,0.05000,2.084