24-0830-wanda-llama3.1-8B / sparsity_report_Meta-Llama-3.1-8B-wanda-unstructured-0.7.csv
Vui Seng Chua
Add content
37aba61
row,layer_id,short_id,layer_type,param_type,shape,nparam,nnz,sparsity,tile_shape,n_tile,n_tile_total,tile_avg,tile_min,tile_med,tile_max,col_avg,col_min,col_med,col_max,row_avg,row_min,row_med,row_max
0,model.layers.0.self_attn.q_proj,tx.0.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.380859375,0.70458984375,0.94970703125,0.699951171875,0.0,0.9453125,1.0,0.699951171875,0.125,0.6875,1.0
1,model.layers.0.self_attn.k_proj,tx.0.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.4150390625,0.70263671875,0.9375,0.699951171875,0.0,0.9296875,1.0,0.699951171875,0.1875,0.6875,1.0
2,model.layers.0.self_attn.v_proj,tx.0.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.44287109375,0.701171875,0.90771484375,0.699951171875,0.0078125,0.7734375,1.0,0.699951171875,0.125,0.6875,1.0
3,model.layers.0.self_attn.o_proj,tx.0.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.1572265625,0.775390625,1.0,0.699951171875,0.015625,0.78125,1.0,0.699951171875,0.0,0.8125,1.0
4,model.layers.0.mlp.gate_proj,tx.0.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.607421875,0.701171875,0.76318359375,0.6999512314796448,0.125,0.71875,1.0,0.6999512314796448,0.0625,0.6875,1.0
5,model.layers.0.mlp.up_proj,tx.0.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.62646484375,0.7001953125,0.76513671875,0.6999512314796448,0.1171875,0.7109375,1.0,0.6999512314796448,0.125,0.6875,1.0
6,model.layers.0.mlp.down_proj,tx.0.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.5146484375,0.70166015625,0.81689453125,0.6999861001968384,0.0625,0.7265625,1.0,0.6999861001968384,0.0625,0.6875,1.0
7,model.layers.1.self_attn.q_proj,tx.1.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.4599609375,0.703125,0.8779296875,0.699951171875,0.0,0.7890625,1.0,0.699951171875,0.125,0.6875,1.0
8,model.layers.1.self_attn.k_proj,tx.1.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.4560546875,0.70263671875,0.86865234375,0.699951171875,0.0,0.796875,1.0,0.699951171875,0.1875,0.6875,1.0
9,model.layers.1.self_attn.v_proj,tx.1.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.48046875,0.7021484375,0.83349609375,0.699951171875,0.03125,0.765625,1.0,0.699951171875,0.1875,0.6875,1.0
10,model.layers.1.self_attn.o_proj,tx.1.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.39208984375,0.73291015625,0.9912109375,0.699951171875,0.0625,0.7109375,1.0,0.699951171875,0.0,0.75,1.0
11,model.layers.1.mlp.gate_proj,tx.1.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.61865234375,0.70068359375,0.76806640625,0.6999512314796448,0.1953125,0.7109375,1.0,0.6999512314796448,0.125,0.6875,1.0
12,model.layers.1.mlp.up_proj,tx.1.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.62548828125,0.7001953125,0.77001953125,0.6999512314796448,0.15625,0.7109375,1.0,0.6999512314796448,0.125,0.6875,1.0
13,model.layers.1.mlp.down_proj,tx.1.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.58740234375,0.7021484375,0.81005859375,0.6999861001968384,0.0078125,0.7421875,0.9609375,0.6999861001968384,0.125,0.6875,1.0
14,model.layers.2.self_attn.q_proj,tx.2.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.5400390625,0.703125,0.8369140625,0.699951171875,0.0078125,0.765625,1.0,0.699951171875,0.125,0.6875,1.0
15,model.layers.2.self_attn.k_proj,tx.2.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.51904296875,0.7041015625,0.849609375,0.699951171875,0.015625,0.7734375,1.0,0.699951171875,0.125,0.6875,1.0
16,model.layers.2.self_attn.v_proj,tx.2.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5751953125,0.703125,0.80615234375,0.699951171875,0.078125,0.75,1.0,0.699951171875,0.1875,0.6875,1.0
17,model.layers.2.self_attn.o_proj,tx.2.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.37841796875,0.673828125,0.99755859375,0.699951171875,0.0234375,0.6875,1.0,0.699951171875,0.0,0.6875,1.0
18,model.layers.2.mlp.gate_proj,tx.2.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.626953125,0.701171875,0.7607421875,0.6999512314796448,0.171875,0.7109375,1.0,0.6999512314796448,0.0625,0.6875,1.0
19,model.layers.2.mlp.up_proj,tx.2.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.6318359375,0.70068359375,0.75439453125,0.6999512314796448,0.21875,0.703125,1.0,0.6999512314796448,0.125,0.6875,1.0
20,model.layers.2.mlp.down_proj,tx.2.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.560546875,0.70166015625,0.83056640625,0.6999861001968384,0.0078125,0.734375,0.984375,0.6999861001968384,0.0625,0.6875,1.0
21,model.layers.3.self_attn.q_proj,tx.3.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.57275390625,0.70166015625,0.8076171875,0.699951171875,0.0234375,0.7421875,1.0,0.699951171875,0.125,0.6875,1.0
22,model.layers.3.self_attn.k_proj,tx.3.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55615234375,0.70263671875,0.80712890625,0.699951171875,0.0234375,0.75,1.0,0.699951171875,0.125,0.6875,1.0
23,model.layers.3.self_attn.v_proj,tx.3.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58837890625,0.70166015625,0.79443359375,0.699951171875,0.0859375,0.7265625,1.0,0.699951171875,0.125,0.6875,1.0
24,model.layers.3.self_attn.o_proj,tx.3.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.40673828125,0.673828125,0.9990234375,0.699951171875,0.0703125,0.703125,1.0,0.699951171875,0.0,0.6875,1.0
25,model.layers.3.mlp.gate_proj,tx.3.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.62353515625,0.701171875,0.763671875,0.6999512314796448,0.140625,0.7109375,1.0,0.6999512314796448,0.125,0.6875,1.0
26,model.layers.3.mlp.up_proj,tx.3.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.6416015625,0.7001953125,0.76708984375,0.6999512314796448,0.234375,0.7109375,1.0,0.6999512314796448,0.0625,0.6875,1.0
27,model.layers.3.mlp.down_proj,tx.3.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.576171875,0.701171875,0.81103515625,0.6999861001968384,0.0078125,0.7265625,0.9921875,0.6999861001968384,0.0625,0.6875,1.0
28,model.layers.4.self_attn.q_proj,tx.4.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.55419921875,0.701171875,0.81884765625,0.699951171875,0.0078125,0.75,1.0,0.699951171875,0.125,0.6875,1.0
29,model.layers.4.self_attn.k_proj,tx.4.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5556640625,0.701171875,0.8232421875,0.699951171875,0.0234375,0.75,1.0,0.699951171875,0.125,0.6875,1.0
30,model.layers.4.self_attn.v_proj,tx.4.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58251953125,0.701171875,0.8134765625,0.699951171875,0.0703125,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
31,model.layers.4.self_attn.o_proj,tx.4.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.29248046875,0.6943359375,0.99755859375,0.699951171875,0.0234375,0.703125,1.0,0.699951171875,0.0,0.6875,1.0
32,model.layers.4.mlp.gate_proj,tx.4.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.6123046875,0.70068359375,0.7685546875,0.6999512314796448,0.1328125,0.71875,1.0,0.6999512314796448,0.125,0.6875,1.0
33,model.layers.4.mlp.up_proj,tx.4.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.6259765625,0.70068359375,0.77099609375,0.6999512314796448,0.2109375,0.7109375,1.0,0.6999512314796448,0.125,0.6875,1.0
34,model.layers.4.mlp.down_proj,tx.4.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.53955078125,0.7021484375,0.8134765625,0.6999861001968384,0.0,0.71875,1.0,0.6999861001968384,0.125,0.6875,1.0
35,model.layers.5.self_attn.q_proj,tx.5.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.55419921875,0.70166015625,0.82177734375,0.699951171875,0.015625,0.7421875,1.0,0.699951171875,0.125,0.6875,1.0
36,model.layers.5.self_attn.k_proj,tx.5.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55029296875,0.7021484375,0.8115234375,0.699951171875,0.015625,0.75,0.984375,0.699951171875,0.1875,0.6875,1.0
37,model.layers.5.self_attn.v_proj,tx.5.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.583984375,0.701171875,0.81689453125,0.699951171875,0.09375,0.734375,1.0,0.699951171875,0.1875,0.6875,1.0
38,model.layers.5.self_attn.o_proj,tx.5.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.30322265625,0.7255859375,0.9775390625,0.699951171875,0.0234375,0.75,1.0,0.699951171875,0.0,0.75,1.0
39,model.layers.5.mlp.gate_proj,tx.5.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59423828125,0.701171875,0.77978515625,0.6999512314796448,0.0703125,0.71875,1.0,0.6999512314796448,0.125,0.6875,1.0
40,model.layers.5.mlp.up_proj,tx.5.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59033203125,0.70068359375,0.77392578125,0.6999512314796448,0.2109375,0.71875,1.0,0.6999512314796448,0.125,0.6875,1.0
41,model.layers.5.mlp.down_proj,tx.5.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.5673828125,0.701171875,0.8076171875,0.6999861001968384,0.0,0.7265625,1.0,0.6999861001968384,0.125,0.6875,1.0
42,model.layers.6.self_attn.q_proj,tx.6.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.56591796875,0.70166015625,0.82958984375,0.699951171875,0.03125,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
43,model.layers.6.self_attn.k_proj,tx.6.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.54443359375,0.7021484375,0.828125,0.699951171875,0.015625,0.7421875,0.984375,0.699951171875,0.125,0.6875,1.0
44,model.layers.6.self_attn.v_proj,tx.6.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58642578125,0.701171875,0.802734375,0.699951171875,0.0703125,0.71875,1.0,0.699951171875,0.125,0.6875,1.0
45,model.layers.6.self_attn.o_proj,tx.6.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.33203125,0.70703125,0.9892578125,0.699951171875,0.0390625,0.7265625,1.0,0.699951171875,0.0,0.6875,1.0
46,model.layers.6.mlp.gate_proj,tx.6.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.60546875,0.70166015625,0.77734375,0.6999512314796448,0.0859375,0.71875,1.0,0.6999512314796448,0.0625,0.6875,1.0
47,model.layers.6.mlp.up_proj,tx.6.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.60693359375,0.701171875,0.77294921875,0.6999512314796448,0.1796875,0.71875,1.0,0.6999512314796448,0.0625,0.6875,1.0
48,model.layers.6.mlp.down_proj,tx.6.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.53564453125,0.70068359375,0.8017578125,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.125,0.6875,1.0
49,model.layers.7.self_attn.q_proj,tx.7.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.58056640625,0.701171875,0.80908203125,0.699951171875,0.0234375,0.7265625,1.0,0.699951171875,0.0625,0.6875,1.0
50,model.layers.7.self_attn.k_proj,tx.7.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58203125,0.701171875,0.82080078125,0.699951171875,0.015625,0.734375,0.984375,0.699951171875,0.1875,0.6875,1.0
51,model.layers.7.self_attn.v_proj,tx.7.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.60400390625,0.69970703125,0.80615234375,0.699951171875,0.1015625,0.71875,0.9921875,0.699951171875,0.125,0.6875,1.0
52,model.layers.7.self_attn.o_proj,tx.7.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.39306640625,0.70947265625,0.94873046875,0.699951171875,0.0234375,0.7265625,1.0,0.699951171875,0.0,0.6875,1.0
53,model.layers.7.mlp.gate_proj,tx.7.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.60595703125,0.70068359375,0.76904296875,0.6999512314796448,0.078125,0.71875,0.9296875,0.6999512314796448,0.0,0.6875,1.0
54,model.layers.7.mlp.up_proj,tx.7.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.62255859375,0.70068359375,0.77392578125,0.6999512314796448,0.140625,0.71875,0.9375,0.6999512314796448,0.125,0.6875,1.0
55,model.layers.7.mlp.down_proj,tx.7.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.54052734375,0.69970703125,0.83349609375,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.0625,0.6875,1.0
56,model.layers.8.self_attn.q_proj,tx.8.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.56884765625,0.7021484375,0.79931640625,0.699951171875,0.0234375,0.7265625,0.9921875,0.699951171875,0.125,0.6875,1.0
57,model.layers.8.self_attn.k_proj,tx.8.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.564453125,0.703125,0.79052734375,0.699951171875,0.015625,0.734375,0.9921875,0.699951171875,0.125,0.6875,1.0
58,model.layers.8.self_attn.v_proj,tx.8.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58642578125,0.701171875,0.775390625,0.699951171875,0.078125,0.71875,1.0,0.699951171875,0.1875,0.6875,1.0
59,model.layers.8.self_attn.o_proj,tx.8.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.306640625,0.7109375,0.9765625,0.699951171875,0.015625,0.7265625,1.0,0.699951171875,0.0,0.6875,1.0
60,model.layers.8.mlp.gate_proj,tx.8.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5966796875,0.7021484375,0.77978515625,0.6999512314796448,0.0390625,0.71875,1.0,0.6999512314796448,0.0625,0.6875,1.0
61,model.layers.8.mlp.up_proj,tx.8.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.60546875,0.701171875,0.77001953125,0.6999512314796448,0.1640625,0.71875,1.0,0.6999512314796448,0.125,0.6875,1.0
62,model.layers.8.mlp.down_proj,tx.8.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.53955078125,0.70263671875,0.84619140625,0.6999861001968384,0.015625,0.734375,1.0,0.6999861001968384,0.125,0.6875,1.0
63,model.layers.9.self_attn.q_proj,tx.9.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.583984375,0.70068359375,0.8125,0.699951171875,0.0234375,0.71875,1.0,0.699951171875,0.125,0.6875,1.0
64,model.layers.9.self_attn.k_proj,tx.9.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.57763671875,0.7001953125,0.80322265625,0.699951171875,0.0234375,0.7265625,0.984375,0.699951171875,0.1875,0.6875,1.0
65,model.layers.9.self_attn.v_proj,tx.9.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5927734375,0.69921875,0.78515625,0.699951171875,0.0859375,0.7109375,1.0,0.699951171875,0.1875,0.6875,1.0
66,model.layers.9.self_attn.o_proj,tx.9.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.3671875,0.6826171875,0.97998046875,0.699951171875,0.078125,0.71875,1.0,0.699951171875,0.0,0.6875,1.0
67,model.layers.9.mlp.gate_proj,tx.9.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.578125,0.70166015625,0.779296875,0.6999512314796448,0.015625,0.71875,0.9453125,0.6999512314796448,0.125,0.6875,1.0
68,model.layers.9.mlp.up_proj,tx.9.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5966796875,0.70068359375,0.7734375,0.6999512314796448,0.1484375,0.71875,0.9375,0.6999512314796448,0.0625,0.6875,1.0
69,model.layers.9.mlp.down_proj,tx.9.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.50244140625,0.7001953125,0.86474609375,0.6999861001968384,0.0078125,0.734375,1.0,0.6999861001968384,0.125,0.6875,1.0
70,model.layers.10.self_attn.q_proj,tx.10.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.57275390625,0.70263671875,0.80078125,0.699951171875,0.0234375,0.7265625,1.0,0.699951171875,0.125,0.6875,1.0
71,model.layers.10.self_attn.k_proj,tx.10.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5830078125,0.703125,0.79443359375,0.699951171875,0.015625,0.734375,0.9921875,0.699951171875,0.125,0.6875,1.0
72,model.layers.10.self_attn.v_proj,tx.10.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.6005859375,0.7021484375,0.7900390625,0.699951171875,0.1015625,0.71875,0.9921875,0.699951171875,0.125,0.6875,1.0
73,model.layers.10.self_attn.o_proj,tx.10.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.28076171875,0.75634765625,0.96875,0.699951171875,0.015625,0.734375,1.0,0.699951171875,0.0,0.75,1.0
74,model.layers.10.mlp.gate_proj,tx.10.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.6083984375,0.70166015625,0.791015625,0.6999512314796448,0.0234375,0.71875,0.9375,0.6999512314796448,0.0625,0.6875,1.0
75,model.layers.10.mlp.up_proj,tx.10.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.61376953125,0.70166015625,0.775390625,0.6999512314796448,0.125,0.71875,0.9296875,0.6999512314796448,0.0625,0.6875,1.0
76,model.layers.10.mlp.down_proj,tx.10.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.533203125,0.701171875,0.85888671875,0.6999861001968384,0.0078125,0.734375,1.0,0.6999861001968384,0.125,0.6875,1.0
77,model.layers.11.self_attn.q_proj,tx.11.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.57373046875,0.7021484375,0.79345703125,0.699951171875,0.015625,0.71875,1.0,0.699951171875,0.125,0.6875,1.0
78,model.layers.11.self_attn.k_proj,tx.11.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.57958984375,0.703125,0.7841796875,0.699951171875,0.03125,0.7265625,0.9921875,0.699951171875,0.125,0.6875,1.0
79,model.layers.11.self_attn.v_proj,tx.11.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.59326171875,0.70166015625,0.78515625,0.699951171875,0.125,0.71875,1.0,0.699951171875,0.125,0.6875,1.0
80,model.layers.11.self_attn.o_proj,tx.11.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.3916015625,0.7158203125,0.9892578125,0.699951171875,0.03125,0.7265625,1.0,0.699951171875,0.0,0.6875,1.0
81,model.layers.11.mlp.gate_proj,tx.11.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.60205078125,0.7021484375,0.7890625,0.6999512314796448,0.015625,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
82,model.layers.11.mlp.up_proj,tx.11.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5986328125,0.701171875,0.787109375,0.6999512314796448,0.09375,0.71875,0.953125,0.6999512314796448,0.125,0.6875,1.0
83,model.layers.11.mlp.down_proj,tx.11.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.51025390625,0.70068359375,0.865234375,0.6999861001968384,0.0,0.7265625,1.0,0.6999861001968384,0.0625,0.6875,1.0
84,model.layers.12.self_attn.q_proj,tx.12.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.54638671875,0.701171875,0.7958984375,0.699951171875,0.03125,0.7265625,1.0,0.699951171875,0.125,0.6875,1.0
85,model.layers.12.self_attn.k_proj,tx.12.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.57421875,0.701171875,0.79345703125,0.699951171875,0.0234375,0.7265625,0.9921875,0.699951171875,0.125,0.6875,1.0
86,model.layers.12.self_attn.v_proj,tx.12.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5751953125,0.7001953125,0.78564453125,0.699951171875,0.0859375,0.71875,1.0,0.699951171875,0.1875,0.6875,1.0
87,model.layers.12.self_attn.o_proj,tx.12.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.384765625,0.7080078125,0.9140625,0.699951171875,0.109375,0.71875,1.0,0.699951171875,0.0,0.6875,1.0
88,model.layers.12.mlp.gate_proj,tx.12.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.58837890625,0.703125,0.7734375,0.6999512314796448,0.0390625,0.71875,0.9453125,0.6999512314796448,0.125,0.6875,1.0
89,model.layers.12.mlp.up_proj,tx.12.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5947265625,0.70263671875,0.76806640625,0.6999512314796448,0.109375,0.71875,0.9453125,0.6999512314796448,0.125,0.6875,1.0
90,model.layers.12.mlp.down_proj,tx.12.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.50439453125,0.70068359375,0.85498046875,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.125,0.6875,1.0
91,model.layers.13.self_attn.q_proj,tx.13.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.587890625,0.69873046875,0.79638671875,0.699951171875,0.0234375,0.71875,1.0,0.699951171875,0.125,0.6875,1.0
92,model.layers.13.self_attn.k_proj,tx.13.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.59130859375,0.69970703125,0.79345703125,0.699951171875,0.0078125,0.7265625,0.9921875,0.699951171875,0.1875,0.6875,1.0
93,model.layers.13.self_attn.v_proj,tx.13.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.61669921875,0.69921875,0.775390625,0.699951171875,0.1015625,0.71875,1.0,0.699951171875,0.125,0.6875,1.0
94,model.layers.13.self_attn.o_proj,tx.13.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.32958984375,0.70068359375,0.99462890625,0.699951171875,0.046875,0.71875,1.0,0.699951171875,0.0,0.6875,1.0
95,model.layers.13.mlp.gate_proj,tx.13.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5830078125,0.701171875,0.7900390625,0.6999512314796448,0.03125,0.7265625,0.9453125,0.6999512314796448,0.125,0.6875,1.0
96,model.layers.13.mlp.up_proj,tx.13.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.591796875,0.701171875,0.7822265625,0.6999512314796448,0.109375,0.71875,0.9453125,0.6999512314796448,0.0625,0.6875,1.0
97,model.layers.13.mlp.down_proj,tx.13.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.53564453125,0.69921875,0.84912109375,0.6999861001968384,0.0,0.7265625,1.0,0.6999861001968384,0.125,0.6875,1.0
98,model.layers.14.self_attn.q_proj,tx.14.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.576171875,0.7001953125,0.80078125,0.699951171875,0.0234375,0.71875,1.0,0.699951171875,0.125,0.6875,1.0
99,model.layers.14.self_attn.k_proj,tx.14.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.57421875,0.70166015625,0.81005859375,0.699951171875,0.015625,0.7265625,1.0,0.699951171875,0.125,0.6875,1.0
100,model.layers.14.self_attn.v_proj,tx.14.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.56787109375,0.7001953125,0.81494140625,0.699951171875,0.03125,0.7265625,1.0,0.699951171875,0.1875,0.6875,1.0
101,model.layers.14.self_attn.o_proj,tx.14.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.3203125,0.7138671875,0.9765625,0.699951171875,0.0390625,0.7265625,1.0,0.699951171875,0.0,0.75,1.0
102,model.layers.14.mlp.gate_proj,tx.14.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59375,0.701171875,0.7822265625,0.6999512314796448,0.03125,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
103,model.layers.14.mlp.up_proj,tx.14.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59765625,0.701171875,0.7822265625,0.6999512314796448,0.109375,0.71875,0.953125,0.6999512314796448,0.0625,0.6875,1.0
104,model.layers.14.mlp.down_proj,tx.14.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.47802734375,0.701171875,0.8837890625,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.0625,0.6875,1.0
105,model.layers.15.self_attn.q_proj,tx.15.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.59521484375,0.7001953125,0.79931640625,0.699951171875,0.03125,0.7265625,1.0,0.699951171875,0.125,0.6875,1.0
106,model.layers.15.self_attn.k_proj,tx.15.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.59326171875,0.70068359375,0.7978515625,0.699951171875,0.015625,0.734375,0.9921875,0.699951171875,0.125,0.6875,1.0
107,model.layers.15.self_attn.v_proj,tx.15.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.591796875,0.69970703125,0.7939453125,0.699951171875,0.0546875,0.7265625,1.0,0.699951171875,0.125,0.6875,1.0
108,model.layers.15.self_attn.o_proj,tx.15.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.279296875,0.71044921875,1.0,0.699951171875,0.0703125,0.7109375,1.0,0.699951171875,0.0,0.6875,1.0
109,model.layers.15.mlp.gate_proj,tx.15.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5966796875,0.70166015625,0.810546875,0.6999512314796448,0.0234375,0.7265625,0.96875,0.6999512314796448,0.0625,0.6875,1.0
110,model.layers.15.mlp.up_proj,tx.15.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5927734375,0.70166015625,0.80126953125,0.6999512314796448,0.125,0.71875,0.9609375,0.6999512314796448,0.125,0.6875,1.0
111,model.layers.15.mlp.down_proj,tx.15.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.52099609375,0.70068359375,0.87353515625,0.6999861001968384,0.0234375,0.7265625,1.0,0.6999861001968384,0.0625,0.6875,1.0
112,model.layers.16.self_attn.q_proj,tx.16.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.59326171875,0.7021484375,0.80712890625,0.699951171875,0.0234375,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
113,model.layers.16.self_attn.k_proj,tx.16.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.6005859375,0.7021484375,0.802734375,0.699951171875,0.015625,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
114,model.layers.16.self_attn.v_proj,tx.16.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58203125,0.7001953125,0.81494140625,0.699951171875,0.0390625,0.734375,1.0,0.699951171875,0.1875,0.6875,1.0
115,model.layers.16.self_attn.o_proj,tx.16.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.30126953125,0.712890625,1.0,0.699951171875,0.0390625,0.71875,1.0,0.699951171875,0.0,0.75,1.0
116,model.layers.16.mlp.gate_proj,tx.16.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.60791015625,0.7001953125,0.796875,0.6999512314796448,0.0,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
117,model.layers.16.mlp.up_proj,tx.16.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.6103515625,0.69970703125,0.79052734375,0.6999512314796448,0.09375,0.71875,0.984375,0.6999512314796448,0.125,0.6875,1.0
118,model.layers.16.mlp.down_proj,tx.16.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.53466796875,0.69873046875,0.86865234375,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.125,0.6875,1.0
119,model.layers.17.self_attn.q_proj,tx.17.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.57763671875,0.70166015625,0.8212890625,0.699951171875,0.015625,0.734375,0.9921875,0.699951171875,0.125,0.6875,1.0
120,model.layers.17.self_attn.k_proj,tx.17.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58056640625,0.703125,0.8203125,0.699951171875,0.015625,0.7421875,0.984375,0.699951171875,0.1875,0.6875,1.0
121,model.layers.17.self_attn.v_proj,tx.17.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58642578125,0.701171875,0.80224609375,0.699951171875,0.0703125,0.7265625,1.0,0.699951171875,0.1875,0.6875,1.0
122,model.layers.17.self_attn.o_proj,tx.17.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.357421875,0.6708984375,1.0,0.699951171875,0.0625,0.703125,1.0,0.699951171875,0.0,0.6875,1.0
123,model.layers.17.mlp.gate_proj,tx.17.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.583984375,0.7001953125,0.78662109375,0.6999512314796448,0.046875,0.7265625,0.9609375,0.6999512314796448,0.125,0.6875,1.0
124,model.layers.17.mlp.up_proj,tx.17.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5908203125,0.70068359375,0.78369140625,0.6999512314796448,0.0859375,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
125,model.layers.17.mlp.down_proj,tx.17.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.5498046875,0.701171875,0.8662109375,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.125,0.6875,1.0
126,model.layers.18.self_attn.q_proj,tx.18.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.5810546875,0.70361328125,0.8017578125,0.699951171875,0.0234375,0.734375,0.9921875,0.699951171875,0.0625,0.6875,1.0
127,model.layers.18.self_attn.k_proj,tx.18.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58740234375,0.703125,0.802734375,0.699951171875,0.0078125,0.7421875,0.984375,0.699951171875,0.125,0.6875,1.0
128,model.layers.18.self_attn.v_proj,tx.18.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.56787109375,0.7021484375,0.81494140625,0.699951171875,0.0390625,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
129,model.layers.18.self_attn.o_proj,tx.18.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.22705078125,0.71533203125,1.0,0.699951171875,0.0234375,0.71875,1.0,0.699951171875,0.0,0.75,1.0
130,model.layers.18.mlp.gate_proj,tx.18.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.57373046875,0.70166015625,0.78515625,0.6999512314796448,0.03125,0.7265625,0.96875,0.6999512314796448,0.125,0.6875,1.0
131,model.layers.18.mlp.up_proj,tx.18.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.58837890625,0.70166015625,0.775390625,0.6999512314796448,0.0703125,0.71875,0.984375,0.6999512314796448,0.125,0.6875,1.0
132,model.layers.18.mlp.down_proj,tx.18.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.52783203125,0.70166015625,0.880859375,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.0625,0.6875,1.0
133,model.layers.19.self_attn.q_proj,tx.19.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.576171875,0.7021484375,0.80712890625,0.699951171875,0.015625,0.734375,0.96875,0.699951171875,0.125,0.6875,1.0
134,model.layers.19.self_attn.k_proj,tx.19.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.56787109375,0.70166015625,0.80419921875,0.699951171875,0.0078125,0.7421875,0.9765625,0.699951171875,0.1875,0.6875,1.0
135,model.layers.19.self_attn.v_proj,tx.19.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.56884765625,0.70166015625,0.80908203125,0.699951171875,0.0390625,0.734375,1.0,0.699951171875,0.1875,0.6875,1.0
136,model.layers.19.self_attn.o_proj,tx.19.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.25537109375,0.75390625,1.0,0.699951171875,0.0234375,0.75,1.0,0.699951171875,0.0,0.75,1.0
137,model.layers.19.mlp.gate_proj,tx.19.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5537109375,0.70263671875,0.7861328125,0.6999512314796448,0.015625,0.7265625,0.96875,0.6999512314796448,0.125,0.6875,1.0
138,model.layers.19.mlp.up_proj,tx.19.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5693359375,0.7021484375,0.79150390625,0.6999512314796448,0.078125,0.7265625,0.984375,0.6999512314796448,0.0625,0.6875,1.0
139,model.layers.19.mlp.down_proj,tx.19.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.49755859375,0.70166015625,0.85888671875,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.0625,0.6875,1.0
140,model.layers.20.self_attn.q_proj,tx.20.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.56689453125,0.7021484375,0.81103515625,0.699951171875,0.015625,0.7421875,0.9921875,0.699951171875,0.125,0.6875,1.0
141,model.layers.20.self_attn.k_proj,tx.20.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.56884765625,0.70361328125,0.8115234375,0.699951171875,0.0078125,0.75,0.984375,0.699951171875,0.125,0.6875,1.0
142,model.layers.20.self_attn.v_proj,tx.20.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55322265625,0.70361328125,0.8037109375,0.699951171875,0.015625,0.734375,1.0,0.699951171875,0.1875,0.6875,1.0
143,model.layers.20.self_attn.o_proj,tx.20.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.330078125,0.70703125,0.99951171875,0.699951171875,0.0390625,0.734375,1.0,0.699951171875,0.0,0.75,1.0
144,model.layers.20.mlp.gate_proj,tx.20.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.57470703125,0.7021484375,0.787109375,0.6999512314796448,0.0234375,0.7265625,0.96875,0.6999512314796448,0.0625,0.6875,1.0
145,model.layers.20.mlp.up_proj,tx.20.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59375,0.7021484375,0.787109375,0.6999512314796448,0.0859375,0.7265625,0.9921875,0.6999512314796448,0.125,0.6875,1.0
146,model.layers.20.mlp.down_proj,tx.20.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.5576171875,0.7001953125,0.85498046875,0.6999861001968384,0.0078125,0.734375,1.0,0.6999861001968384,0.0625,0.6875,1.0
147,model.layers.21.self_attn.q_proj,tx.21.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.57080078125,0.70263671875,0.798828125,0.699951171875,0.0078125,0.7421875,0.9765625,0.699951171875,0.125,0.6875,1.0
148,model.layers.21.self_attn.k_proj,tx.21.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5732421875,0.70263671875,0.79736328125,0.699951171875,0.0078125,0.7421875,0.9765625,0.699951171875,0.1875,0.6875,1.0
149,model.layers.21.self_attn.v_proj,tx.21.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5615234375,0.70263671875,0.80126953125,0.699951171875,0.015625,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
150,model.layers.21.self_attn.o_proj,tx.21.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.34375,0.66845703125,1.0,0.699951171875,0.0078125,0.703125,1.0,0.699951171875,0.0,0.6875,1.0
151,model.layers.21.mlp.gate_proj,tx.21.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59619140625,0.701171875,0.7880859375,0.6999512314796448,0.0,0.7265625,0.9453125,0.6999512314796448,0.125,0.6875,1.0
152,model.layers.21.mlp.up_proj,tx.21.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59033203125,0.701171875,0.78759765625,0.6999512314796448,0.0546875,0.7265625,0.96875,0.6999512314796448,0.0625,0.6875,1.0
153,model.layers.21.mlp.down_proj,tx.21.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.49365234375,0.701171875,0.83447265625,0.6999861001968384,0.0,0.734375,1.0,0.6999861001968384,0.0625,0.6875,1.0
154,model.layers.22.self_attn.q_proj,tx.22.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.5654296875,0.703125,0.8017578125,0.699951171875,0.0078125,0.7421875,0.96875,0.699951171875,0.0625,0.6875,1.0
155,model.layers.22.self_attn.k_proj,tx.22.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.564453125,0.70361328125,0.796875,0.699951171875,0.0078125,0.75,0.984375,0.699951171875,0.125,0.6875,1.0
156,model.layers.22.self_attn.v_proj,tx.22.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5703125,0.70263671875,0.7900390625,0.699951171875,0.0390625,0.7421875,1.0,0.699951171875,0.1875,0.6875,1.0
157,model.layers.22.self_attn.o_proj,tx.22.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.23828125,0.72314453125,1.0,0.699951171875,0.0078125,0.7265625,1.0,0.699951171875,0.0,0.75,1.0
158,model.layers.22.mlp.gate_proj,tx.22.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.599609375,0.7021484375,0.783203125,0.6999512314796448,0.0078125,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
159,model.layers.22.mlp.up_proj,tx.22.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.60595703125,0.70166015625,0.77880859375,0.6999512314796448,0.0703125,0.7265625,0.9765625,0.6999512314796448,0.0625,0.6875,1.0
160,model.layers.22.mlp.down_proj,tx.22.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.54736328125,0.701171875,0.83544921875,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.0625,0.6875,1.0
161,model.layers.23.self_attn.q_proj,tx.23.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.564453125,0.703125,0.802734375,0.699951171875,0.0,0.7421875,0.96875,0.699951171875,0.125,0.6875,1.0
162,model.layers.23.self_attn.k_proj,tx.23.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55712890625,0.70361328125,0.8115234375,0.699951171875,0.0078125,0.75,0.96875,0.699951171875,0.125,0.6875,1.0
163,model.layers.23.self_attn.v_proj,tx.23.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.57275390625,0.7021484375,0.80517578125,0.699951171875,0.0234375,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
164,model.layers.23.self_attn.o_proj,tx.23.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.28076171875,0.736328125,1.0,0.699951171875,0.046875,0.7421875,1.0,0.699951171875,0.0,0.75,1.0
165,model.layers.23.mlp.gate_proj,tx.23.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.595703125,0.70263671875,0.7861328125,0.6999512314796448,0.015625,0.7265625,0.9453125,0.6999512314796448,0.125,0.6875,1.0
166,model.layers.23.mlp.up_proj,tx.23.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.6005859375,0.7021484375,0.78857421875,0.6999512314796448,0.0625,0.7265625,0.9609375,0.6999512314796448,0.125,0.6875,1.0
167,model.layers.23.mlp.down_proj,tx.23.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.5439453125,0.70263671875,0.8251953125,0.6999861001968384,0.0,0.734375,1.0,0.6999861001968384,0.125,0.6875,1.0
168,model.layers.24.self_attn.q_proj,tx.24.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.54833984375,0.7021484375,0.81396484375,0.699951171875,0.0,0.7421875,0.9765625,0.699951171875,0.125,0.6875,1.0
169,model.layers.24.self_attn.k_proj,tx.24.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55419921875,0.7021484375,0.8251953125,0.699951171875,0.0,0.75,0.9765625,0.699951171875,0.1875,0.6875,1.0
170,model.layers.24.self_attn.v_proj,tx.24.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5576171875,0.70068359375,0.8125,0.699951171875,0.015625,0.7421875,1.0,0.699951171875,0.125,0.6875,1.0
171,model.layers.24.self_attn.o_proj,tx.24.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.27099609375,0.716796875,1.0,0.699951171875,0.0234375,0.734375,1.0,0.699951171875,0.0,0.75,1.0
172,model.layers.24.mlp.gate_proj,tx.24.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5966796875,0.70166015625,0.7890625,0.6999512314796448,0.03125,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
173,model.layers.24.mlp.up_proj,tx.24.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.599609375,0.70166015625,0.78564453125,0.6999512314796448,0.0703125,0.7265625,0.9609375,0.6999512314796448,0.125,0.6875,1.0
174,model.layers.24.mlp.down_proj,tx.24.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.48388671875,0.701171875,0.83837890625,0.6999861001968384,0.0,0.734375,1.0,0.6999861001968384,0.125,0.6875,1.0
175,model.layers.25.self_attn.q_proj,tx.25.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.57421875,0.70263671875,0.8056640625,0.699951171875,0.0,0.7421875,0.9765625,0.699951171875,0.125,0.6875,1.0
176,model.layers.25.self_attn.k_proj,tx.25.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55419921875,0.7021484375,0.83935546875,0.699951171875,0.0,0.75,0.9921875,0.699951171875,0.125,0.6875,1.0
177,model.layers.25.self_attn.v_proj,tx.25.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.54248046875,0.70166015625,0.8037109375,0.699951171875,0.03125,0.7421875,1.0,0.699951171875,0.1875,0.6875,1.0
178,model.layers.25.self_attn.o_proj,tx.25.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.26220703125,0.79541015625,1.0,0.699951171875,0.015625,0.796875,1.0,0.699951171875,0.0,0.8125,1.0
179,model.layers.25.mlp.gate_proj,tx.25.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59375,0.70166015625,0.78369140625,0.6999512314796448,0.03125,0.7265625,0.953125,0.6999512314796448,0.0625,0.6875,1.0
180,model.layers.25.mlp.up_proj,tx.25.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59130859375,0.70166015625,0.7822265625,0.6999512314796448,0.0546875,0.7265625,0.9609375,0.6999512314796448,0.125,0.6875,1.0
181,model.layers.25.mlp.down_proj,tx.25.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.52294921875,0.7021484375,0.84033203125,0.6999861001968384,0.0,0.734375,1.0,0.6999861001968384,0.125,0.6875,1.0
182,model.layers.26.self_attn.q_proj,tx.26.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.55419921875,0.703125,0.8056640625,0.699951171875,0.0,0.7421875,0.9921875,0.699951171875,0.125,0.6875,1.0
183,model.layers.26.self_attn.k_proj,tx.26.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5478515625,0.70361328125,0.82421875,0.699951171875,0.0,0.75,0.9765625,0.699951171875,0.125,0.6875,1.0
184,model.layers.26.self_attn.v_proj,tx.26.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55029296875,0.70166015625,0.80126953125,0.699951171875,0.015625,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
185,model.layers.26.self_attn.o_proj,tx.26.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.24609375,0.6591796875,1.0,0.699951171875,0.015625,0.703125,1.0,0.699951171875,0.0,0.6875,1.0
186,model.layers.26.mlp.gate_proj,tx.26.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.58056640625,0.701171875,0.79248046875,0.6999512314796448,0.0390625,0.7265625,0.9453125,0.6999512314796448,0.125,0.6875,1.0
187,model.layers.26.mlp.up_proj,tx.26.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.58251953125,0.701171875,0.78955078125,0.6999512314796448,0.078125,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
188,model.layers.26.mlp.down_proj,tx.26.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.443359375,0.70263671875,0.8466796875,0.6999861001968384,0.0078125,0.7421875,1.0,0.6999861001968384,0.0625,0.6875,1.0
189,model.layers.27.self_attn.q_proj,tx.27.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.5283203125,0.70361328125,0.80322265625,0.699951171875,0.0,0.75,1.0,0.699951171875,0.125,0.6875,1.0
190,model.layers.27.self_attn.k_proj,tx.27.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.541015625,0.7041015625,0.8251953125,0.699951171875,0.0,0.75,1.0,0.699951171875,0.125,0.6875,1.0
191,model.layers.27.self_attn.v_proj,tx.27.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55615234375,0.7021484375,0.80322265625,0.699951171875,0.0234375,0.7421875,1.0,0.699951171875,0.125,0.6875,1.0
192,model.layers.27.self_attn.o_proj,tx.27.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.2080078125,0.736328125,1.0,0.699951171875,0.0078125,0.7890625,1.0,0.699951171875,0.0,0.75,1.0
193,model.layers.27.mlp.gate_proj,tx.27.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.55712890625,0.70263671875,0.78515625,0.6999512314796448,0.0234375,0.7265625,0.9453125,0.6999512314796448,0.0625,0.6875,1.0
194,model.layers.27.mlp.up_proj,tx.27.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.55908203125,0.7021484375,0.77978515625,0.6999512314796448,0.0546875,0.7265625,0.953125,0.6999512314796448,0.0625,0.6875,1.0
195,model.layers.27.mlp.down_proj,tx.27.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.5302734375,0.69970703125,0.83203125,0.6999861001968384,0.0,0.75,1.0,0.6999861001968384,0.125,0.6875,1.0
196,model.layers.28.self_attn.q_proj,tx.28.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.53173828125,0.70361328125,0.81298828125,0.699951171875,0.0,0.7421875,0.984375,0.699951171875,0.125,0.6875,1.0
197,model.layers.28.self_attn.k_proj,tx.28.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.537109375,0.70361328125,0.818359375,0.699951171875,0.0,0.75,0.96875,0.699951171875,0.1875,0.6875,1.0
198,model.layers.28.self_attn.v_proj,tx.28.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.556640625,0.70166015625,0.822265625,0.699951171875,0.0234375,0.734375,1.0,0.699951171875,0.0625,0.6875,1.0
199,model.layers.28.self_attn.o_proj,tx.28.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.30224609375,0.728515625,1.0,0.699951171875,0.0390625,0.734375,1.0,0.699951171875,0.0,0.75,1.0
200,model.layers.28.mlp.gate_proj,tx.28.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5693359375,0.70263671875,0.77978515625,0.6999512314796448,0.0234375,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
201,model.layers.28.mlp.up_proj,tx.28.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5693359375,0.70263671875,0.7822265625,0.6999512314796448,0.0546875,0.7265625,0.9453125,0.6999512314796448,0.0625,0.6875,1.0
202,model.layers.28.mlp.down_proj,tx.28.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.474609375,0.703125,0.8681640625,0.6999861001968384,0.0078125,0.7578125,1.0,0.6999861001968384,0.125,0.6875,1.0
203,model.layers.29.self_attn.q_proj,tx.29.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.54345703125,0.701171875,0.8125,0.699951171875,0.0,0.7421875,1.0,0.699951171875,0.125,0.6875,1.0
204,model.layers.29.self_attn.k_proj,tx.29.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.56298828125,0.70166015625,0.79931640625,0.699951171875,0.0,0.7421875,1.0,0.699951171875,0.1875,0.6875,1.0
205,model.layers.29.self_attn.v_proj,tx.29.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55615234375,0.70166015625,0.8017578125,0.699951171875,0.015625,0.734375,1.0,0.699951171875,0.1875,0.6875,1.0
206,model.layers.29.self_attn.o_proj,tx.29.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.17431640625,0.73291015625,1.0,0.699951171875,0.015625,0.7734375,1.0,0.699951171875,0.0,0.75,1.0
207,model.layers.29.mlp.gate_proj,tx.29.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5732421875,0.70166015625,0.79296875,0.6999512314796448,0.03125,0.734375,0.9609375,0.6999512314796448,0.125,0.6875,1.0
208,model.layers.29.mlp.up_proj,tx.29.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.57958984375,0.70166015625,0.7861328125,0.6999512314796448,0.0390625,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
209,model.layers.29.mlp.down_proj,tx.29.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.43017578125,0.70458984375,0.86669921875,0.6999861001968384,0.0,0.7734375,1.0,0.6999861001968384,0.125,0.6875,1.0
210,model.layers.30.self_attn.q_proj,tx.30.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.541015625,0.70166015625,0.81982421875,0.699951171875,0.0,0.75,0.984375,0.699951171875,0.0625,0.6875,1.0
211,model.layers.30.self_attn.k_proj,tx.30.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.54541015625,0.701171875,0.82763671875,0.699951171875,0.0,0.75,0.984375,0.699951171875,0.125,0.6875,1.0
212,model.layers.30.self_attn.v_proj,tx.30.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5244140625,0.70068359375,0.818359375,0.699951171875,0.03125,0.734375,1.0,0.699951171875,0.1875,0.6875,1.0
213,model.layers.30.self_attn.o_proj,tx.30.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.31591796875,0.6845703125,1.0,0.699951171875,0.0625,0.703125,1.0,0.699951171875,0.0,0.6875,1.0
214,model.layers.30.mlp.gate_proj,tx.30.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59228515625,0.69921875,0.79638671875,0.6999512314796448,0.0546875,0.7265625,1.0,0.6999512314796448,0.125,0.6875,1.0
215,model.layers.30.mlp.up_proj,tx.30.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59228515625,0.69921875,0.7958984375,0.6999512314796448,0.0625,0.7265625,0.984375,0.6999512314796448,0.125,0.6875,1.0
216,model.layers.30.mlp.down_proj,tx.30.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.47607421875,0.703125,0.87646484375,0.6999861001968384,0.0,0.796875,1.0,0.6999861001968384,0.0625,0.6875,1.0
217,model.layers.31.self_attn.q_proj,tx.31.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.54296875,0.701171875,0.82177734375,0.699951171875,0.0,0.7421875,1.0,0.699951171875,0.1875,0.6875,1.0
218,model.layers.31.self_attn.k_proj,tx.31.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5185546875,0.70068359375,0.83935546875,0.699951171875,0.0078125,0.7421875,1.0,0.699951171875,0.125,0.6875,1.0
219,model.layers.31.self_attn.v_proj,tx.31.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.515625,0.701171875,0.82421875,0.699951171875,0.015625,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
220,model.layers.31.self_attn.o_proj,tx.31.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.17138671875,0.7275390625,1.0,0.699951171875,0.0078125,0.7265625,1.0,0.699951171875,0.0,0.75,1.0
221,model.layers.31.mlp.gate_proj,tx.31.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59521484375,0.70068359375,0.796875,0.6999512314796448,0.0703125,0.7265625,0.9765625,0.6999512314796448,0.125,0.6875,1.0
222,model.layers.31.mlp.up_proj,tx.31.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.595703125,0.70068359375,0.79443359375,0.6999512314796448,0.0703125,0.7265625,0.984375,0.6999512314796448,0.125,0.6875,1.0
223,model.layers.31.mlp.down_proj,tx.31.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.4501953125,0.69921875,0.92431640625,0.6999861001968384,0.0,0.828125,1.0,0.6999861001968384,0.125,0.6875,1.0
224,lm_head,lm_head,Linear,weight,"[128256, 4096]",525336576,525336576,0.0,"(128, 16)",1002 x 256,256512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0