Meta-Llama-3.1-8B-wanda-unstructured-0.0
/
sparsity_report_Meta-Llama-3.1-8B-wanda-unstructured-0.0.csv
row,layer_id,short_id,layer_type,param_type,shape,nparam,nnz,sparsity,tile_shape,n_tile,n_tile_total,tile_avg,tile_min,tile_med,tile_max,col_avg,col_min,col_med,col_max,row_avg,row_min,row_med,row_max | |
0,model.layers.0.self_attn.q_proj,tx.0.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
1,model.layers.0.self_attn.k_proj,tx.0.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
2,model.layers.0.self_attn.v_proj,tx.0.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
3,model.layers.0.self_attn.o_proj,tx.0.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
4,model.layers.0.mlp.gate_proj,tx.0.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
5,model.layers.0.mlp.up_proj,tx.0.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
6,model.layers.0.mlp.down_proj,tx.0.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
7,model.layers.1.self_attn.q_proj,tx.1.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
8,model.layers.1.self_attn.k_proj,tx.1.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
9,model.layers.1.self_attn.v_proj,tx.1.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
10,model.layers.1.self_attn.o_proj,tx.1.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
11,model.layers.1.mlp.gate_proj,tx.1.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
12,model.layers.1.mlp.up_proj,tx.1.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
13,model.layers.1.mlp.down_proj,tx.1.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
14,model.layers.2.self_attn.q_proj,tx.2.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
15,model.layers.2.self_attn.k_proj,tx.2.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
16,model.layers.2.self_attn.v_proj,tx.2.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
17,model.layers.2.self_attn.o_proj,tx.2.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
18,model.layers.2.mlp.gate_proj,tx.2.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
19,model.layers.2.mlp.up_proj,tx.2.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
20,model.layers.2.mlp.down_proj,tx.2.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
21,model.layers.3.self_attn.q_proj,tx.3.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
22,model.layers.3.self_attn.k_proj,tx.3.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
23,model.layers.3.self_attn.v_proj,tx.3.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
24,model.layers.3.self_attn.o_proj,tx.3.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
25,model.layers.3.mlp.gate_proj,tx.3.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
26,model.layers.3.mlp.up_proj,tx.3.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
27,model.layers.3.mlp.down_proj,tx.3.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
28,model.layers.4.self_attn.q_proj,tx.4.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
29,model.layers.4.self_attn.k_proj,tx.4.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
30,model.layers.4.self_attn.v_proj,tx.4.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
31,model.layers.4.self_attn.o_proj,tx.4.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
32,model.layers.4.mlp.gate_proj,tx.4.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
33,model.layers.4.mlp.up_proj,tx.4.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
34,model.layers.4.mlp.down_proj,tx.4.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
35,model.layers.5.self_attn.q_proj,tx.5.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
36,model.layers.5.self_attn.k_proj,tx.5.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
37,model.layers.5.self_attn.v_proj,tx.5.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
38,model.layers.5.self_attn.o_proj,tx.5.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
39,model.layers.5.mlp.gate_proj,tx.5.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
40,model.layers.5.mlp.up_proj,tx.5.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
41,model.layers.5.mlp.down_proj,tx.5.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
42,model.layers.6.self_attn.q_proj,tx.6.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
43,model.layers.6.self_attn.k_proj,tx.6.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
44,model.layers.6.self_attn.v_proj,tx.6.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
45,model.layers.6.self_attn.o_proj,tx.6.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
46,model.layers.6.mlp.gate_proj,tx.6.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
47,model.layers.6.mlp.up_proj,tx.6.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
48,model.layers.6.mlp.down_proj,tx.6.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
49,model.layers.7.self_attn.q_proj,tx.7.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
50,model.layers.7.self_attn.k_proj,tx.7.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
51,model.layers.7.self_attn.v_proj,tx.7.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
52,model.layers.7.self_attn.o_proj,tx.7.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
53,model.layers.7.mlp.gate_proj,tx.7.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
54,model.layers.7.mlp.up_proj,tx.7.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
55,model.layers.7.mlp.down_proj,tx.7.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
56,model.layers.8.self_attn.q_proj,tx.8.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
57,model.layers.8.self_attn.k_proj,tx.8.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
58,model.layers.8.self_attn.v_proj,tx.8.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
59,model.layers.8.self_attn.o_proj,tx.8.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
60,model.layers.8.mlp.gate_proj,tx.8.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
61,model.layers.8.mlp.up_proj,tx.8.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
62,model.layers.8.mlp.down_proj,tx.8.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
63,model.layers.9.self_attn.q_proj,tx.9.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
64,model.layers.9.self_attn.k_proj,tx.9.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
65,model.layers.9.self_attn.v_proj,tx.9.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
66,model.layers.9.self_attn.o_proj,tx.9.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
67,model.layers.9.mlp.gate_proj,tx.9.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
68,model.layers.9.mlp.up_proj,tx.9.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
69,model.layers.9.mlp.down_proj,tx.9.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
70,model.layers.10.self_attn.q_proj,tx.10.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
71,model.layers.10.self_attn.k_proj,tx.10.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
72,model.layers.10.self_attn.v_proj,tx.10.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
73,model.layers.10.self_attn.o_proj,tx.10.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
74,model.layers.10.mlp.gate_proj,tx.10.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
75,model.layers.10.mlp.up_proj,tx.10.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
76,model.layers.10.mlp.down_proj,tx.10.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
77,model.layers.11.self_attn.q_proj,tx.11.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
78,model.layers.11.self_attn.k_proj,tx.11.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
79,model.layers.11.self_attn.v_proj,tx.11.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
80,model.layers.11.self_attn.o_proj,tx.11.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
81,model.layers.11.mlp.gate_proj,tx.11.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
82,model.layers.11.mlp.up_proj,tx.11.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
83,model.layers.11.mlp.down_proj,tx.11.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
84,model.layers.12.self_attn.q_proj,tx.12.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
85,model.layers.12.self_attn.k_proj,tx.12.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
86,model.layers.12.self_attn.v_proj,tx.12.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
87,model.layers.12.self_attn.o_proj,tx.12.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
88,model.layers.12.mlp.gate_proj,tx.12.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
89,model.layers.12.mlp.up_proj,tx.12.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
90,model.layers.12.mlp.down_proj,tx.12.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
91,model.layers.13.self_attn.q_proj,tx.13.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
92,model.layers.13.self_attn.k_proj,tx.13.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
93,model.layers.13.self_attn.v_proj,tx.13.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
94,model.layers.13.self_attn.o_proj,tx.13.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
95,model.layers.13.mlp.gate_proj,tx.13.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
96,model.layers.13.mlp.up_proj,tx.13.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
97,model.layers.13.mlp.down_proj,tx.13.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
98,model.layers.14.self_attn.q_proj,tx.14.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
99,model.layers.14.self_attn.k_proj,tx.14.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
100,model.layers.14.self_attn.v_proj,tx.14.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
101,model.layers.14.self_attn.o_proj,tx.14.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
102,model.layers.14.mlp.gate_proj,tx.14.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
103,model.layers.14.mlp.up_proj,tx.14.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
104,model.layers.14.mlp.down_proj,tx.14.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
105,model.layers.15.self_attn.q_proj,tx.15.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
106,model.layers.15.self_attn.k_proj,tx.15.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
107,model.layers.15.self_attn.v_proj,tx.15.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
108,model.layers.15.self_attn.o_proj,tx.15.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
109,model.layers.15.mlp.gate_proj,tx.15.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
110,model.layers.15.mlp.up_proj,tx.15.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
111,model.layers.15.mlp.down_proj,tx.15.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
112,model.layers.16.self_attn.q_proj,tx.16.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
113,model.layers.16.self_attn.k_proj,tx.16.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
114,model.layers.16.self_attn.v_proj,tx.16.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
115,model.layers.16.self_attn.o_proj,tx.16.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
116,model.layers.16.mlp.gate_proj,tx.16.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
117,model.layers.16.mlp.up_proj,tx.16.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
118,model.layers.16.mlp.down_proj,tx.16.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
119,model.layers.17.self_attn.q_proj,tx.17.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
120,model.layers.17.self_attn.k_proj,tx.17.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
121,model.layers.17.self_attn.v_proj,tx.17.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
122,model.layers.17.self_attn.o_proj,tx.17.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
123,model.layers.17.mlp.gate_proj,tx.17.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
124,model.layers.17.mlp.up_proj,tx.17.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
125,model.layers.17.mlp.down_proj,tx.17.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
126,model.layers.18.self_attn.q_proj,tx.18.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
127,model.layers.18.self_attn.k_proj,tx.18.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
128,model.layers.18.self_attn.v_proj,tx.18.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
129,model.layers.18.self_attn.o_proj,tx.18.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
130,model.layers.18.mlp.gate_proj,tx.18.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
131,model.layers.18.mlp.up_proj,tx.18.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
132,model.layers.18.mlp.down_proj,tx.18.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
133,model.layers.19.self_attn.q_proj,tx.19.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
134,model.layers.19.self_attn.k_proj,tx.19.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
135,model.layers.19.self_attn.v_proj,tx.19.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
136,model.layers.19.self_attn.o_proj,tx.19.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
137,model.layers.19.mlp.gate_proj,tx.19.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
138,model.layers.19.mlp.up_proj,tx.19.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
139,model.layers.19.mlp.down_proj,tx.19.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
140,model.layers.20.self_attn.q_proj,tx.20.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
141,model.layers.20.self_attn.k_proj,tx.20.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
142,model.layers.20.self_attn.v_proj,tx.20.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
143,model.layers.20.self_attn.o_proj,tx.20.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
144,model.layers.20.mlp.gate_proj,tx.20.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
145,model.layers.20.mlp.up_proj,tx.20.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
146,model.layers.20.mlp.down_proj,tx.20.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
147,model.layers.21.self_attn.q_proj,tx.21.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
148,model.layers.21.self_attn.k_proj,tx.21.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
149,model.layers.21.self_attn.v_proj,tx.21.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
150,model.layers.21.self_attn.o_proj,tx.21.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
151,model.layers.21.mlp.gate_proj,tx.21.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
152,model.layers.21.mlp.up_proj,tx.21.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
153,model.layers.21.mlp.down_proj,tx.21.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
154,model.layers.22.self_attn.q_proj,tx.22.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
155,model.layers.22.self_attn.k_proj,tx.22.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
156,model.layers.22.self_attn.v_proj,tx.22.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
157,model.layers.22.self_attn.o_proj,tx.22.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
158,model.layers.22.mlp.gate_proj,tx.22.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
159,model.layers.22.mlp.up_proj,tx.22.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
160,model.layers.22.mlp.down_proj,tx.22.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
161,model.layers.23.self_attn.q_proj,tx.23.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
162,model.layers.23.self_attn.k_proj,tx.23.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
163,model.layers.23.self_attn.v_proj,tx.23.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
164,model.layers.23.self_attn.o_proj,tx.23.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
165,model.layers.23.mlp.gate_proj,tx.23.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
166,model.layers.23.mlp.up_proj,tx.23.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
167,model.layers.23.mlp.down_proj,tx.23.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
168,model.layers.24.self_attn.q_proj,tx.24.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
169,model.layers.24.self_attn.k_proj,tx.24.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
170,model.layers.24.self_attn.v_proj,tx.24.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
171,model.layers.24.self_attn.o_proj,tx.24.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
172,model.layers.24.mlp.gate_proj,tx.24.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
173,model.layers.24.mlp.up_proj,tx.24.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
174,model.layers.24.mlp.down_proj,tx.24.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
175,model.layers.25.self_attn.q_proj,tx.25.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
176,model.layers.25.self_attn.k_proj,tx.25.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
177,model.layers.25.self_attn.v_proj,tx.25.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
178,model.layers.25.self_attn.o_proj,tx.25.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
179,model.layers.25.mlp.gate_proj,tx.25.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
180,model.layers.25.mlp.up_proj,tx.25.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
181,model.layers.25.mlp.down_proj,tx.25.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
182,model.layers.26.self_attn.q_proj,tx.26.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
183,model.layers.26.self_attn.k_proj,tx.26.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
184,model.layers.26.self_attn.v_proj,tx.26.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
185,model.layers.26.self_attn.o_proj,tx.26.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
186,model.layers.26.mlp.gate_proj,tx.26.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
187,model.layers.26.mlp.up_proj,tx.26.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
188,model.layers.26.mlp.down_proj,tx.26.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
189,model.layers.27.self_attn.q_proj,tx.27.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
190,model.layers.27.self_attn.k_proj,tx.27.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
191,model.layers.27.self_attn.v_proj,tx.27.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
192,model.layers.27.self_attn.o_proj,tx.27.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
193,model.layers.27.mlp.gate_proj,tx.27.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
194,model.layers.27.mlp.up_proj,tx.27.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
195,model.layers.27.mlp.down_proj,tx.27.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
196,model.layers.28.self_attn.q_proj,tx.28.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
197,model.layers.28.self_attn.k_proj,tx.28.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
198,model.layers.28.self_attn.v_proj,tx.28.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
199,model.layers.28.self_attn.o_proj,tx.28.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
200,model.layers.28.mlp.gate_proj,tx.28.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
201,model.layers.28.mlp.up_proj,tx.28.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
202,model.layers.28.mlp.down_proj,tx.28.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
203,model.layers.29.self_attn.q_proj,tx.29.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
204,model.layers.29.self_attn.k_proj,tx.29.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
205,model.layers.29.self_attn.v_proj,tx.29.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
206,model.layers.29.self_attn.o_proj,tx.29.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
207,model.layers.29.mlp.gate_proj,tx.29.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
208,model.layers.29.mlp.up_proj,tx.29.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
209,model.layers.29.mlp.down_proj,tx.29.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
210,model.layers.30.self_attn.q_proj,tx.30.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
211,model.layers.30.self_attn.k_proj,tx.30.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
212,model.layers.30.self_attn.v_proj,tx.30.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
213,model.layers.30.self_attn.o_proj,tx.30.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
214,model.layers.30.mlp.gate_proj,tx.30.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
215,model.layers.30.mlp.up_proj,tx.30.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
216,model.layers.30.mlp.down_proj,tx.30.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
217,model.layers.31.self_attn.q_proj,tx.31.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
218,model.layers.31.self_attn.k_proj,tx.31.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
219,model.layers.31.self_attn.v_proj,tx.31.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
220,model.layers.31.self_attn.o_proj,tx.31.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
221,model.layers.31.mlp.gate_proj,tx.31.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
222,model.layers.31.mlp.up_proj,tx.31.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
223,model.layers.31.mlp.down_proj,tx.31.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |
224,lm_head,lm_head,Linear,weight,"[128256, 4096]",525336576,525336576,0.0,"(128, 16)",1002 x 256,256512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0 | |