|
layer,module,loss,samples,damp,time
|
|
0,self_attn.k_proj,0.0000040634,0.05000,0.608
|
|
0,self_attn.v_proj,0.0000006062,0.05000,0.366
|
|
0,self_attn.q_proj,0.0000193926,0.05000,0.367
|
|
0,self_attn.o_proj,0.0000022809,0.05000,0.358
|
|
0,mlp.up_proj,0.0000099378,0.05000,0.373
|
|
0,mlp.gate_proj,0.0000124556,0.05000,0.374
|
|
0,mlp.down_proj,0.0000023232,0.05000,1.931
|
|
1,self_attn.k_proj,0.0000009198,0.05000,0.368
|
|
1,self_attn.v_proj,0.0000001869,0.05000,0.351
|
|
1,self_attn.q_proj,0.0000033967,0.05000,0.344
|
|
1,self_attn.o_proj,0.0000015133,0.05000,0.369
|
|
1,mlp.up_proj,0.0022965061,0.05000,0.366
|
|
1,mlp.gate_proj,0.0031997402,0.05000,0.360
|
|
1,mlp.down_proj,0.0000001408,0.05000,2.025
|
|
2,self_attn.k_proj,0.0000010904,0.05000,0.330
|
|
2,self_attn.v_proj,0.0000002759,0.05000,0.340
|
|
2,self_attn.q_proj,0.0000049015,0.05000,0.336
|
|
2,self_attn.o_proj,0.0000011300,0.05000,0.328
|
|
2,mlp.up_proj,0.0006311594,0.05000,0.325
|
|
2,mlp.gate_proj,0.0004826943,0.05000,0.342
|
|
2,mlp.down_proj,0.0004366701,0.05000,2.115
|
|
3,self_attn.k_proj,0.0000032384,0.05000,0.366
|
|
3,self_attn.v_proj,0.0000007111,0.05000,0.368
|
|
3,self_attn.q_proj,0.0000148615,0.05000,0.361
|
|
3,self_attn.o_proj,0.0000014529,0.05000,0.340
|
|
3,mlp.up_proj,0.0005290429,0.05000,0.360
|
|
3,mlp.gate_proj,0.0004906502,0.05000,0.352
|
|
3,mlp.down_proj,0.0000038434,0.05000,1.924
|
|
4,self_attn.k_proj,0.0000025431,0.05000,0.368
|
|
4,self_attn.v_proj,0.0000008757,0.05000,0.362
|
|
4,self_attn.q_proj,0.0000126745,0.05000,0.379
|
|
4,self_attn.o_proj,0.0000020869,0.05000,0.355
|
|
4,mlp.up_proj,0.0003978110,0.05000,0.367
|
|
4,mlp.gate_proj,0.0004516964,0.05000,0.361
|
|
4,mlp.down_proj,0.0000016207,0.05000,1.978
|
|
5,self_attn.k_proj,0.0000033112,0.05000,0.347
|
|
5,self_attn.v_proj,0.0000014565,0.05000,0.341
|
|
5,self_attn.q_proj,0.0000184254,0.05000,0.334
|
|
5,self_attn.o_proj,0.0000023437,0.05000,0.330
|
|
5,mlp.up_proj,0.0007071495,0.05000,0.334
|
|
5,mlp.gate_proj,0.0007961280,0.05000,0.340
|
|
5,mlp.down_proj,0.0000014939,0.05000,1.995
|
|
6,self_attn.k_proj,0.0000026044,0.05000,0.347
|
|
6,self_attn.v_proj,0.0000016148,0.05000,0.353
|
|
6,self_attn.q_proj,0.0000157694,0.05000,0.370
|
|
6,self_attn.o_proj,0.0000042285,0.05000,0.370
|
|
6,mlp.up_proj,0.0009018112,0.05000,0.368
|
|
6,mlp.gate_proj,0.0010366849,0.05000,0.368
|
|
6,mlp.down_proj,0.0000011726,0.05000,1.930
|
|
7,self_attn.k_proj,0.0000031160,0.05000,0.357
|
|
7,self_attn.v_proj,0.0000018465,0.05000,0.354
|
|
7,self_attn.q_proj,0.0000190155,0.05000,0.367
|
|
7,self_attn.o_proj,0.0000020445,0.05000,0.357
|
|
7,mlp.up_proj,0.0005896314,0.05000,0.369
|
|
7,mlp.gate_proj,0.0006801203,0.05000,0.364
|
|
7,mlp.down_proj,0.0000021789,0.05000,2.109
|
|
8,self_attn.k_proj,0.0000029602,0.05000,0.363
|
|
8,self_attn.v_proj,0.0000020435,0.05000,0.353
|
|
8,self_attn.q_proj,0.0000188390,0.05000,0.351
|
|
8,self_attn.o_proj,0.0000013217,0.05000,0.345
|
|
8,mlp.up_proj,0.0004880707,0.05000,0.341
|
|
8,mlp.gate_proj,0.0006587554,0.05000,0.345
|
|
8,mlp.down_proj,0.0000016654,0.05000,2.072
|
|
9,self_attn.k_proj,0.0000032877,0.05000,0.363
|
|
9,self_attn.v_proj,0.0000019524,0.05000,0.371
|
|
9,self_attn.q_proj,0.0000181619,0.05000,0.386
|
|
9,self_attn.o_proj,0.0000008039,0.05000,0.358
|
|
9,mlp.up_proj,0.0003569369,0.05000,0.371
|
|
9,mlp.gate_proj,0.0005121149,0.05000,0.371
|
|
9,mlp.down_proj,0.0000025109,0.05000,1.953
|
|
10,self_attn.k_proj,0.0000034298,0.05000,0.372
|
|
10,self_attn.v_proj,0.0000026901,0.05000,0.362
|
|
10,self_attn.q_proj,0.0000226561,0.05000,0.372
|
|
10,self_attn.o_proj,0.0000010749,0.05000,0.351
|
|
10,mlp.up_proj,0.0002255284,0.05000,0.377
|
|
10,mlp.gate_proj,0.0003353583,0.05000,0.368
|
|
10,mlp.down_proj,0.0000035347,0.05000,2.016
|
|
11,self_attn.k_proj,0.0000026725,0.05000,0.351
|
|
11,self_attn.v_proj,0.0000021309,0.05000,0.358
|
|
11,self_attn.q_proj,0.0000166953,0.05000,0.361
|
|
11,self_attn.o_proj,0.0000017854,0.05000,0.354
|
|
11,mlp.up_proj,0.0000811131,0.05000,0.349
|
|
11,mlp.gate_proj,0.0001103679,0.05000,0.350
|
|
11,mlp.down_proj,0.0000052722,0.05000,2.140
|
|
12,self_attn.k_proj,0.0000027185,0.05000,0.380
|
|
12,self_attn.v_proj,0.0000018781,0.05000,0.364
|
|
12,self_attn.q_proj,0.0000168562,0.05000,0.382
|
|
12,self_attn.o_proj,0.0000017540,0.05000,0.363
|
|
12,mlp.up_proj,0.0000913523,0.05000,0.373
|
|
12,mlp.gate_proj,0.0001290024,0.05000,0.370
|
|
12,mlp.down_proj,0.0000040652,0.05000,1.977
|
|
13,self_attn.k_proj,0.0000039847,0.05000,0.370
|
|
13,self_attn.v_proj,0.0000015442,0.05000,0.364
|
|
13,self_attn.q_proj,0.0000216264,0.05000,0.374
|
|
13,self_attn.o_proj,0.0000020568,0.05000,0.358
|
|
13,mlp.up_proj,0.0000465535,0.05000,0.371
|
|
13,mlp.gate_proj,0.0000485026,0.05000,0.366
|
|
13,mlp.down_proj,0.0000035773,0.05000,2.066
|
|
14,self_attn.k_proj,0.0000027242,0.05000,0.352
|
|
14,self_attn.v_proj,0.0000015621,0.05000,0.331
|
|
14,self_attn.q_proj,0.0000163233,0.05000,0.351
|
|
14,self_attn.o_proj,0.0000034186,0.05000,0.331
|
|
14,mlp.up_proj,0.0000473507,0.05000,0.340
|
|
14,mlp.gate_proj,0.0000497855,0.05000,0.337
|
|
14,mlp.down_proj,0.0000049650,0.05000,2.044
|
|
15,self_attn.k_proj,0.0000031355,0.05000,0.347
|
|
15,self_attn.v_proj,0.0000018360,0.05000,0.356
|
|
15,self_attn.q_proj,0.0000190232,0.05000,0.368
|
|
15,self_attn.o_proj,0.0000070935,0.05000,0.370
|
|
15,mlp.up_proj,0.0000492337,0.05000,0.371
|
|
15,mlp.gate_proj,0.0000472793,0.05000,0.372
|
|
15,mlp.down_proj,0.0000048209,0.05000,1.958
|
|
16,self_attn.k_proj,0.0000028816,0.05000,0.363
|
|
16,self_attn.v_proj,0.0000016655,0.05000,0.365
|
|
16,self_attn.q_proj,0.0000168683,0.05000,0.378
|
|
16,self_attn.o_proj,0.0000035855,0.05000,0.352
|
|
16,mlp.up_proj,0.0000498367,0.05000,0.374
|
|
16,mlp.gate_proj,0.0000511090,0.05000,0.368
|
|
16,mlp.down_proj,0.0000046699,0.05000,2.065
|
|
17,self_attn.k_proj,0.0000050119,0.05000,0.341
|
|
17,self_attn.v_proj,0.0000026293,0.05000,0.330
|
|
17,self_attn.q_proj,0.0000287738,0.05000,0.341
|
|
17,self_attn.o_proj,0.0000093749,0.05000,0.341
|
|
17,mlp.up_proj,0.0000494367,0.05000,0.343
|
|
17,mlp.gate_proj,0.0000486890,0.05000,0.341
|
|
17,mlp.down_proj,0.0000048504,0.05000,2.106
|
|
18,self_attn.k_proj,0.0000029729,0.05000,0.356
|
|
18,self_attn.v_proj,0.0000018751,0.05000,0.356
|
|
18,self_attn.q_proj,0.0000189247,0.05000,0.365
|
|
18,self_attn.o_proj,0.0000036817,0.05000,0.348
|
|
18,mlp.up_proj,0.0000509893,0.05000,0.371
|
|
18,mlp.gate_proj,0.0000527114,0.05000,0.366
|
|
18,mlp.down_proj,0.0000053501,0.05000,1.976
|
|
19,self_attn.k_proj,0.0000041355,0.05000,0.371
|
|
19,self_attn.v_proj,0.0000020078,0.05000,0.367
|
|
19,self_attn.q_proj,0.0000238353,0.05000,0.378
|
|
19,self_attn.o_proj,0.0000088396,0.05000,0.366
|
|
19,mlp.up_proj,0.0000449316,0.05000,0.375
|
|
19,mlp.gate_proj,0.0000431556,0.05000,0.374
|
|
19,mlp.down_proj,0.0000043508,0.05000,2.085
|
|
20,self_attn.k_proj,0.0000041288,0.05000,0.349
|
|
20,self_attn.v_proj,0.0000038416,0.05000,0.344
|
|
20,self_attn.q_proj,0.0000344315,0.05000,0.365
|
|
20,self_attn.o_proj,0.0000147554,0.05000,0.347
|
|
20,mlp.up_proj,0.0000535635,0.05000,0.349
|
|
20,mlp.gate_proj,0.0000523148,0.05000,0.363
|
|
20,mlp.down_proj,0.0000068330,0.05000,2.188
|
|
21,self_attn.k_proj,0.0000038298,0.05000,0.379
|
|
21,self_attn.v_proj,0.0000025224,0.05000,0.378
|
|
21,self_attn.q_proj,0.0000242552,0.05000,0.388
|
|
21,self_attn.o_proj,0.0000071513,0.05000,0.368
|
|
21,mlp.up_proj,0.0000517560,0.05000,0.385
|
|
21,mlp.gate_proj,0.0000548613,0.05000,0.380
|
|
21,mlp.down_proj,0.0000072140,0.05000,1.973
|
|
22,self_attn.k_proj,0.0000046511,0.05000,0.383
|
|
22,self_attn.v_proj,0.0000038298,0.05000,0.372
|
|
22,self_attn.q_proj,0.0000306858,0.05000,0.376
|
|
22,self_attn.o_proj,0.0000068249,0.05000,0.386
|
|
22,mlp.up_proj,0.0000545035,0.05000,0.385
|
|
22,mlp.gate_proj,0.0000554754,0.05000,0.363
|
|
22,mlp.down_proj,0.0000060427,0.05000,2.128
|
|
23,self_attn.k_proj,0.0000045517,0.05000,0.339
|
|
23,self_attn.v_proj,0.0000022649,0.05000,0.340
|
|
23,self_attn.q_proj,0.0000303075,0.05000,0.336
|
|
23,self_attn.o_proj,0.0000062696,0.05000,0.347
|
|
23,mlp.up_proj,0.0000542692,0.05000,0.367
|
|
23,mlp.gate_proj,0.0000519028,0.05000,0.380
|
|
23,mlp.down_proj,0.0000060388,0.05000,2.115
|
|
24,self_attn.k_proj,0.0000031391,0.05000,0.382
|
|
24,self_attn.v_proj,0.0000025389,0.05000,0.370
|
|
24,self_attn.q_proj,0.0000217306,0.05000,0.363
|
|
24,self_attn.o_proj,0.0000041164,0.05000,0.387
|
|
24,mlp.up_proj,0.0000561470,0.05000,0.385
|
|
24,mlp.gate_proj,0.0000542109,0.05000,0.362
|
|
24,mlp.down_proj,0.0000069144,0.05000,1.967
|
|
25,self_attn.k_proj,0.0000033413,0.05000,0.355
|
|
25,self_attn.v_proj,0.0000043154,0.05000,0.361
|
|
25,self_attn.q_proj,0.0000316823,0.05000,0.376
|
|
25,self_attn.o_proj,0.0000118628,0.05000,0.374
|
|
25,mlp.up_proj,0.0000677753,0.05000,0.368
|
|
25,mlp.gate_proj,0.0000669758,0.05000,0.383
|
|
25,mlp.down_proj,0.0000110951,0.05000,2.098
|
|
26,self_attn.k_proj,0.0000030871,0.05000,0.346
|
|
26,self_attn.v_proj,0.0000038372,0.05000,0.341
|
|
26,self_attn.q_proj,0.0000233898,0.05000,0.349
|
|
26,self_attn.o_proj,0.0000063974,0.05000,0.378
|
|
26,mlp.up_proj,0.0000735646,0.05000,0.403
|
|
26,mlp.gate_proj,0.0000696784,0.05000,0.387
|
|
26,mlp.down_proj,0.0000139417,0.05000,2.107
|
|
27,self_attn.k_proj,0.0000034423,0.05000,0.370
|
|
27,self_attn.v_proj,0.0000058276,0.05000,0.359
|
|
27,self_attn.q_proj,0.0000393344,0.05000,0.382
|
|
27,self_attn.o_proj,0.0000130343,0.05000,0.376
|
|
27,mlp.up_proj,0.0000744298,0.05000,0.377
|
|
27,mlp.gate_proj,0.0000736244,0.05000,0.356
|
|
27,mlp.down_proj,0.0000116098,0.05000,2.036
|
|
28,self_attn.k_proj,0.0000031231,0.05000,0.364
|
|
28,self_attn.v_proj,0.0000051522,0.05000,0.366
|
|
28,self_attn.q_proj,0.0000287182,0.05000,0.376
|
|
28,self_attn.o_proj,0.0000078141,0.05000,0.362
|
|
28,mlp.up_proj,0.0000809760,0.05000,0.489
|
|
28,mlp.gate_proj,0.0000809227,0.05000,0.362
|
|
28,mlp.down_proj,0.0000107104,0.05000,2.040
|
|
29,self_attn.k_proj,0.0000032159,0.05000,0.328
|
|
29,self_attn.v_proj,0.0000043822,0.05000,0.333
|
|
29,self_attn.q_proj,0.0000270912,0.05000,0.347
|
|
29,self_attn.o_proj,0.0000069960,0.05000,0.368
|
|
29,mlp.up_proj,0.0000897726,0.05000,0.388
|
|
29,mlp.gate_proj,0.0000888188,0.05000,0.408
|
|
29,mlp.down_proj,0.0000134719,0.05000,2.165
|
|
30,self_attn.k_proj,0.0000032228,0.05000,0.360
|
|
30,self_attn.v_proj,0.0000085865,0.05000,0.371
|
|
30,self_attn.q_proj,0.0000339776,0.05000,0.385
|
|
30,self_attn.o_proj,0.0000160412,0.05000,0.377
|
|
30,mlp.up_proj,0.0001201127,0.05000,0.354
|
|
30,mlp.gate_proj,0.0001130646,0.05000,0.345
|
|
30,mlp.down_proj,0.0008732517,0.05000,2.056
|
|
31,self_attn.k_proj,0.0000042993,0.05000,0.370
|
|
31,self_attn.v_proj,0.0000086031,0.05000,0.367
|
|
31,self_attn.q_proj,0.0000424028,0.05000,0.374
|
|
31,self_attn.o_proj,0.0000390922,0.05000,0.385
|
|
31,mlp.up_proj,0.0001399534,0.05000,0.379
|
|
31,mlp.gate_proj,0.0001253367,0.05000,0.370
|
|
31,mlp.down_proj,0.0000341888,0.05000,2.085
|
|
32,self_attn.k_proj,0.0000050244,0.05000,0.339
|
|
32,self_attn.v_proj,0.0000184226,0.05000,0.341
|
|
32,self_attn.q_proj,0.0000524240,0.05000,0.356
|
|
32,self_attn.o_proj,0.0000505486,0.05000,0.377
|
|
32,mlp.up_proj,0.0001564809,0.05000,0.380
|
|
32,mlp.gate_proj,0.0001395116,0.05000,0.347
|
|
32,mlp.down_proj,0.0000686840,0.05000,2.103
|
|
33,self_attn.k_proj,0.0000044370,0.05000,0.369
|
|
33,self_attn.v_proj,0.0000313598,0.05000,0.370
|
|
33,self_attn.q_proj,0.0000489970,0.05000,0.380
|
|
33,self_attn.o_proj,0.0000689787,0.05000,0.358
|
|
33,mlp.up_proj,0.0001696904,0.05000,0.345
|
|
33,mlp.gate_proj,0.0001370589,0.05000,0.341
|
|
33,mlp.down_proj,0.0001987096,0.05000,2.084
|
|
34,self_attn.k_proj,0.0000026818,0.05000,0.372
|
|
34,self_attn.v_proj,0.0000085758,0.05000,0.352
|
|
34,self_attn.q_proj,0.0000272798,0.05000,0.370
|
|
34,self_attn.o_proj,0.0000386386,0.05000,0.382
|
|
34,mlp.up_proj,0.0002085190,0.05000,0.373
|
|
34,mlp.gate_proj,0.0001788506,0.05000,0.355
|
|
34,mlp.down_proj,0.0002425168,0.05000,2.092
|
|
35,self_attn.k_proj,0.0000028524,0.05000,0.380
|
|
35,self_attn.v_proj,0.0000081620,0.05000,0.380
|
|
35,self_attn.q_proj,0.0000271286,0.05000,0.390
|
|
35,self_attn.o_proj,0.0000118175,0.05000,0.362
|
|
35,mlp.up_proj,0.0003510200,0.05000,0.383
|
|
35,mlp.gate_proj,0.0003188154,0.05000,0.374
|
|
35,mlp.down_proj,0.0010605976,0.05000,2.084
|
|
|