Vui Seng Chua commited on
Commit
37aba61
·
1 Parent(s): 402f6e6

Add content

Browse files
.gitattributes CHANGED
@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.1 filter=lfs diff=lfs merge=lfs -text
37
+ blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.2 filter=lfs diff=lfs merge=lfs -text
38
+ blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.3 filter=lfs diff=lfs merge=lfs -text
39
+ blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.4 filter=lfs diff=lfs merge=lfs -text
40
+ blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.5 filter=lfs diff=lfs merge=lfs -text
41
+ blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.6 filter=lfs diff=lfs merge=lfs -text
42
+ blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.7 filter=lfs diff=lfs merge=lfs -text
43
+ blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.0 filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__/
README.md ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This repo contains sparsity report for each of the pruned model in the table below.
2
+
3
+ Pruning ```meta-llama/Meta-Llama-3.1-8B``` with Wanda
4
+ | Weight Target Sparsity | Perplexity (lower is better) |
5
+ |------------------------|-----------------------------|
6
+ | 0 (dense, baseline) | 5.8393 |
7
+ | 10 | 5.8781 |
8
+ | 20 | 6.0102 |
9
+ | 30 | 6.3076 |
10
+ | 40 | 7.0094 |
11
+ | 50 | 9.0642 |
12
+ | 60 | 20.2265 |
13
+ | 70 | 103.5209 |
14
+
15
+ The report (csv) shows layer-wise sparsity, sparsity by tile of 128x16, sparsity by col and row global to its layers.
16
+
17
+ > For a more granular sparsity report within a given tile, pls continue below
18
+
19
+ # Install
20
+ ```pip install torch ipython pandas```
21
+
22
+ # Interative look up a specific tile of a layer
23
+ ```bash
24
+ ./
25
+ ```
26
+ Expected outcome in as follows, it will be in ipython console with the needed functionality loaded.
27
+ ```
28
+ $ ./interactive_sparsity.sh
29
+ Python 3.11.9 | packaged by conda-forge | (main, Apr 19 2024, 18:36:13) [GCC 12.3.0]
30
+ Type 'copyright', 'credits' or 'license' for more information
31
+ IPython 8.26.0 -- An enhanced Interactive Python. Type '?' for help.
32
+
33
+ - Help ------------------
34
+
35
+ h = SparseBlob("path to sparsity blob")
36
+
37
+ SparseBlob.preview:
38
+ preview sparsity dataframe, intend to show row id, short id for look up
39
+ eg. h.preview()
40
+
41
+
42
+ SparseBlob.ls_layers:
43
+ list all available layer ids for look up
44
+ eg. h.ls_layers()
45
+
46
+
47
+ SparseBlob.get_sparsity_by_short_id:
48
+ return a sparsity stats of a layer via short_id lookup.
49
+ eg. h.get_sparsity_by_short_id('tx.0.attn.v')
50
+
51
+
52
+ SparseBlob.get_sparsity_by_row_id:
53
+ return a sparsity stats of a layer via row id lookup.
54
+ eg. h.get_sparsity_by_row_id(36)
55
+
56
+
57
+ SparseBlob.get_sparsity_of_tile:
58
+ zoom into a specific layer and a specific tile,
59
+ return the sparsity stats of the tile down to col, row granularity
60
+ eg. h.get_sparsity_by_row_id(36, (5, 6))
61
+
62
+
63
+ SparseBlob.show_help:
64
+ print help for available function of SparseBlob
65
+ eg. h.show_help()
66
+
67
+
68
+ - End of Help ------------------
69
+ In [1]:
70
+ ```
71
+
72
+ Sample usage:
73
+ ```
74
+ In [1]: ls blob*
75
+ blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.0
76
+ blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.1
77
+ blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.2
78
+ blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.3
79
+ blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.4
80
+ blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.5
81
+ blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.6
82
+ blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.7
83
+
84
+ In [2]: h = SparseBlob("blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.5")
85
+
86
+
87
+ In [3]: h.preview()
88
+ layer_id short_id ... row_med row_max
89
+ 0 model.layers.0.self_attn.q_proj tx.0.attn.q ... 0.5000 1.0000
90
+ 1 model.layers.0.self_attn.k_proj tx.0.attn.k ... 0.5000 1.0000
91
+ 2 model.layers.0.self_attn.v_proj tx.0.attn.v ... 0.5000 1.0000
92
+ 3 model.layers.0.self_attn.o_proj tx.0.attn.o ... 0.5000 1.0000
93
+ 4 model.layers.0.mlp.gate_proj tx.0.mlp.gate ... 0.5000 1.0000
94
+ 5 model.layers.0.mlp.up_proj tx.0.mlp.up ... 0.5000 1.0000
95
+ 6 model.layers.0.mlp.down_proj tx.0.mlp.down ... 0.5000 1.0000
96
+ 7 model.layers.1.self_attn.q_proj tx.1.attn.q ... 0.5000 1.0000
97
+ 8 model.layers.1.self_attn.k_proj tx.1.attn.k ... 0.5000 1.0000
98
+ 9 model.layers.1.self_attn.v_proj tx.1.attn.v ... 0.5000 1.0000
99
+ 10 model.layers.1.self_attn.o_proj tx.1.attn.o ... 0.5000 1.0000
100
+ 11 model.layers.1.mlp.gate_proj tx.1.mlp.gate ... 0.5000 1.0000
101
+ .
102
+ .
103
+ .
104
+ 222 model.layers.31.mlp.up_proj tx.31.mlp.up ... 0.5000 1.0000
105
+ 223 model.layers.31.mlp.down_proj tx.31.mlp.down ... 0.5000 1.0000
106
+ 224 lm_head lm_head ... 0.0000 0.0000
107
+
108
+ [225 rows x 23 columns]
109
+
110
+
111
+ In [4]: h.get_sparsity_by_row_id(10)
112
+ Out[4]:
113
+ layer_id model.layers.1.self_attn.o_proj
114
+ short_id tx.1.attn.o
115
+ layer_type Linear
116
+ param_type weight
117
+ shape [4096, 4096]
118
+ nparam 16777216
119
+ nnz 8388608
120
+ sparsity 0.5000
121
+ tile_shape (128, 16)
122
+ n_tile 32 x 256
123
+ n_tile_total 8192
124
+ tile_avg 0.5000
125
+ tile_min 0.2197
126
+ tile_med 0.5073
127
+ tile_max 0.9678
128
+ col_avg 0.5000
129
+ col_min 0.0312
130
+ col_med 0.4609
131
+ col_max 1.0000
132
+ row_avg 0.5000
133
+ row_min 0.0000
134
+ row_med 0.5000
135
+ row_max 1.0000
136
+ Name: 10, dtype: object
137
+
138
+
139
+ In [5]: h.get_sparsity_of_tile(10, (30, 245))
140
+ (30, 245) : tile_id
141
+ model.layers.1.self_attn.o_proj : layer_id
142
+ (128, 16) : tiled by
143
+ 0.2861 : tile_sparsity
144
+ 16 : col_count
145
+ 0.2861 : col_avg
146
+ 0.2266 : col_min
147
+ 0.2734 : col_med
148
+ 0.3594 : col_max
149
+ 128 : row_count
150
+ 0.2861 : row_avg
151
+ 0.0000 : row_min
152
+ 0.2500 : row_med
153
+ 0.6250 : row_max
154
+ ```
155
+
blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bdec70b225dd0ed7e403098e303be5f0f08065c90300d491b3fbb4794ee4b87
3
+ size 2125641134
blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67dfef3467e9bdcb93ac723c1699a8f289e2a8ff8ec5cd8f938f0f0ca5c6129d
3
+ size 2125645358
blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e15c554a6070d59a4ac23b4c3d8ca35b9e04ff67f5da5ada62f93f9756c6e39
3
+ size 2125645550
blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c0b0ee04352fa7cd8878be7853124c2eca535f1191d53d3f3cf7a9a3ea795a2
3
+ size 2125644334
blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:628ca3fb9b359b6730e9fb409d0950041e3dc1ab1cacb422eff3f8505def23d2
3
+ size 2125645550
blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38ce8eee865c4e31af9cda0e1f2b88f2ff2c020d1c1676aaf4c0c28c4da08628
3
+ size 2125644270
blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73e29c85ccc700041e4307492a971c4d18014af37d673e5112ac1572bb8b9c9a
3
+ size 2125644590
blob.sparsity._Meta-Llama-3.1-8B-wanda-unstructured-0.7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21379172d68d714fe06c51b04ee41ec359bb65a332fadcbd37aae7f9d8d52d16
3
+ size 2125644462
interactive_sparsity.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ ipython -i -c "from lookup_sparsity_blob import *"
lookup_sparsity_blob.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import pandas as pd
3
+
4
+
5
+ # Set options to display the full DataFrame
6
+ # pd.set_option('display.max_rows', None) # Display all rows
7
+ # pd.set_option('display.max_columns', None) # Display all columns
8
+ pd.set_option('display.float_format', '{:.4f}'.format) # Format floats to 3 decimal places
9
+
10
+ def get_stats(tensor, axis=None, label=None):
11
+ if label is None:
12
+ prefix = ""
13
+ else:
14
+ prefix = f"{label}_"
15
+
16
+ if axis is None:
17
+ return {
18
+ f"{prefix}count": tensor.numel(),
19
+ f"{prefix}avg": tensor.mean().item(),
20
+ f"{prefix}min": tensor.min().item(),
21
+ f"{prefix}med": tensor.median().item(),
22
+ f"{prefix}max": tensor.max().item(),
23
+ }
24
+ else:
25
+ return {
26
+ f"{prefix}avg": tensor.mean(dim=axis),
27
+ f"{prefix}min": tensor.min(dim=axis),
28
+ f"{prefix}med": tensor.median(dim=axis),
29
+ f"{prefix}max": tensor.max(dim=axis),
30
+ }
31
+
32
+ class SparseBlob:
33
+ def __init__(self, blob_path):
34
+ self.blob_path = blob_path
35
+ d = torch.load(blob_path)
36
+ self._rpt = d['df']
37
+ self._blob = d['blob']
38
+
39
+ @classmethod
40
+ def print_help(cls):
41
+ print("\n- Help ------------------")
42
+ print("\nh = SparseBlob(\"path to sparsity blob\")\n")
43
+
44
+ methods = [
45
+ 'preview', 'ls_layers',
46
+ 'get_sparsity_by_short_id',
47
+ 'get_sparsity_by_row_id',
48
+ 'get_sparsity_of_tile',
49
+ 'show_help'
50
+ ]
51
+ for method in methods:
52
+ print(f'{cls.__name__}.{method}:{cls.__dict__[method].__doc__}\n')
53
+ print("- End of Help ------------------")
54
+
55
+ def show_help(self):
56
+ """
57
+ print help for available function of SparseBlob
58
+ eg. h.show_help()
59
+ """
60
+ self.print_help()
61
+
62
+ def preview(self):
63
+ """
64
+ preview sparsity dataframe, intend to show row id, short id for look up
65
+ eg. h.preview()
66
+ """
67
+ pd.set_option('display.max_rows', None)
68
+ print(self._rpt)
69
+ pd.reset_option('display.max_rows')
70
+
71
+ def ls_layers(self):
72
+ """
73
+ list all available layer ids for look up
74
+ eg. h.ls_layers()
75
+ """
76
+ pd.set_option('display.max_rows', None)
77
+ print(self._rpt.short_id)
78
+ pd.reset_option('display.max_rows')
79
+
80
+ def get_sparsity_by_short_id(self, short_id):
81
+ """
82
+ return a sparsity stats of a layer via short_id lookup.
83
+ eg. h.get_sparsity_by_short_id('tx.0.attn.v')
84
+ """
85
+ return self._rpt[self._rpt.short_id == short_id].iloc[0]
86
+
87
+ def get_sparsity_by_row_id(self, id):
88
+ """
89
+ return a sparsity stats of a layer via row id lookup.
90
+ eg. h.get_sparsity_by_row_id(36)
91
+ """
92
+ return self._rpt.iloc[id]
93
+
94
+ def get_sparsity_of_tile(self, lut_id, tile_id):
95
+ """
96
+ zoom into a specific layer and a specific tile,
97
+ return the sparsity stats of the tile down to col, row granularity
98
+ eg. h.get_sparsity_by_row_id(36, (5, 6))
99
+ """
100
+ if isinstance(lut_id, int):
101
+ row = self.get_sparsity_by_row_id(lut_id)
102
+ elif isinstance(lut_id, str):
103
+ row = self.get_sparsity_by_layer_id(lut_id)
104
+ else:
105
+ print("Invalid lookup id, use row number or short_id; you can do .preview() to find out.")
106
+
107
+ l = row['layer_id']
108
+ max_nrow, max_ncol = self._blob[l]['tile_sparsity'].shape
109
+
110
+ if not isinstance(tile_id, tuple) or len(tile_id) != 2:
111
+ print(f"[Error] tile_id must be a tuple, eg .get_sparsity_of_tile({lut_id}, ( 5, 10))")
112
+ else:
113
+ if tile_id[0] >= max_nrow or tile_id[1] >= max_ncol:
114
+ print(f"[Error] tile not exist: tile_id must be (0-{max_nrow-1}, 0-{max_ncol-1})")
115
+ else:
116
+ outdict={
117
+ "tile_id": tile_id,
118
+ "layer_id": l,
119
+ "tiled by": row['tile_shape'],
120
+ "tile_sparsity": self._blob[l]['tile_sparsity'][tile_id].item(),
121
+ **get_stats(self._blob[l]['tile_sparsity_per_col'][tile_id], label="col"),
122
+ **get_stats(self._blob[l]['tile_sparsity_per_row'][tile_id], label="row"),
123
+ }
124
+ self._print_one_tile_stats(outdict)
125
+
126
+ def _print_one_tile_stats(self, d):
127
+ for key, value in d.items():
128
+ if isinstance(value, float):
129
+ print(f'{value:>40.4f} : {key}')
130
+ else:
131
+ print(f'{str(value):>40} : {key}')
132
+
133
+
134
+ SparseBlob.print_help()
sparsity_report_Meta-Llama-3.1-8B-wanda-unstructured-0.0.csv ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ row,layer_id,short_id,layer_type,param_type,shape,nparam,nnz,sparsity,tile_shape,n_tile,n_tile_total,tile_avg,tile_min,tile_med,tile_max,col_avg,col_min,col_med,col_max,row_avg,row_min,row_med,row_max
2
+ 0,model.layers.0.self_attn.q_proj,tx.0.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3
+ 1,model.layers.0.self_attn.k_proj,tx.0.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4
+ 2,model.layers.0.self_attn.v_proj,tx.0.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5
+ 3,model.layers.0.self_attn.o_proj,tx.0.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6
+ 4,model.layers.0.mlp.gate_proj,tx.0.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7
+ 5,model.layers.0.mlp.up_proj,tx.0.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8
+ 6,model.layers.0.mlp.down_proj,tx.0.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9
+ 7,model.layers.1.self_attn.q_proj,tx.1.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10
+ 8,model.layers.1.self_attn.k_proj,tx.1.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11
+ 9,model.layers.1.self_attn.v_proj,tx.1.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12
+ 10,model.layers.1.self_attn.o_proj,tx.1.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13
+ 11,model.layers.1.mlp.gate_proj,tx.1.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14
+ 12,model.layers.1.mlp.up_proj,tx.1.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15
+ 13,model.layers.1.mlp.down_proj,tx.1.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16
+ 14,model.layers.2.self_attn.q_proj,tx.2.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17
+ 15,model.layers.2.self_attn.k_proj,tx.2.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
18
+ 16,model.layers.2.self_attn.v_proj,tx.2.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19
+ 17,model.layers.2.self_attn.o_proj,tx.2.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20
+ 18,model.layers.2.mlp.gate_proj,tx.2.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21
+ 19,model.layers.2.mlp.up_proj,tx.2.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
22
+ 20,model.layers.2.mlp.down_proj,tx.2.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
23
+ 21,model.layers.3.self_attn.q_proj,tx.3.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24
+ 22,model.layers.3.self_attn.k_proj,tx.3.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25
+ 23,model.layers.3.self_attn.v_proj,tx.3.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
26
+ 24,model.layers.3.self_attn.o_proj,tx.3.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
27
+ 25,model.layers.3.mlp.gate_proj,tx.3.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
28
+ 26,model.layers.3.mlp.up_proj,tx.3.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
29
+ 27,model.layers.3.mlp.down_proj,tx.3.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
30
+ 28,model.layers.4.self_attn.q_proj,tx.4.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
31
+ 29,model.layers.4.self_attn.k_proj,tx.4.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
32
+ 30,model.layers.4.self_attn.v_proj,tx.4.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
33
+ 31,model.layers.4.self_attn.o_proj,tx.4.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34
+ 32,model.layers.4.mlp.gate_proj,tx.4.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
35
+ 33,model.layers.4.mlp.up_proj,tx.4.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
36
+ 34,model.layers.4.mlp.down_proj,tx.4.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
37
+ 35,model.layers.5.self_attn.q_proj,tx.5.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
38
+ 36,model.layers.5.self_attn.k_proj,tx.5.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
39
+ 37,model.layers.5.self_attn.v_proj,tx.5.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
40
+ 38,model.layers.5.self_attn.o_proj,tx.5.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
41
+ 39,model.layers.5.mlp.gate_proj,tx.5.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
42
+ 40,model.layers.5.mlp.up_proj,tx.5.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
43
+ 41,model.layers.5.mlp.down_proj,tx.5.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
44
+ 42,model.layers.6.self_attn.q_proj,tx.6.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
45
+ 43,model.layers.6.self_attn.k_proj,tx.6.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
46
+ 44,model.layers.6.self_attn.v_proj,tx.6.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
47
+ 45,model.layers.6.self_attn.o_proj,tx.6.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
48
+ 46,model.layers.6.mlp.gate_proj,tx.6.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
49
+ 47,model.layers.6.mlp.up_proj,tx.6.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50
+ 48,model.layers.6.mlp.down_proj,tx.6.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
51
+ 49,model.layers.7.self_attn.q_proj,tx.7.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
52
+ 50,model.layers.7.self_attn.k_proj,tx.7.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
53
+ 51,model.layers.7.self_attn.v_proj,tx.7.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
54
+ 52,model.layers.7.self_attn.o_proj,tx.7.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
55
+ 53,model.layers.7.mlp.gate_proj,tx.7.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
56
+ 54,model.layers.7.mlp.up_proj,tx.7.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
57
+ 55,model.layers.7.mlp.down_proj,tx.7.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
58
+ 56,model.layers.8.self_attn.q_proj,tx.8.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
59
+ 57,model.layers.8.self_attn.k_proj,tx.8.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
60
+ 58,model.layers.8.self_attn.v_proj,tx.8.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
61
+ 59,model.layers.8.self_attn.o_proj,tx.8.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
62
+ 60,model.layers.8.mlp.gate_proj,tx.8.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
63
+ 61,model.layers.8.mlp.up_proj,tx.8.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
64
+ 62,model.layers.8.mlp.down_proj,tx.8.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
65
+ 63,model.layers.9.self_attn.q_proj,tx.9.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
66
+ 64,model.layers.9.self_attn.k_proj,tx.9.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
67
+ 65,model.layers.9.self_attn.v_proj,tx.9.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
68
+ 66,model.layers.9.self_attn.o_proj,tx.9.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
69
+ 67,model.layers.9.mlp.gate_proj,tx.9.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
70
+ 68,model.layers.9.mlp.up_proj,tx.9.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
71
+ 69,model.layers.9.mlp.down_proj,tx.9.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
72
+ 70,model.layers.10.self_attn.q_proj,tx.10.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
73
+ 71,model.layers.10.self_attn.k_proj,tx.10.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
74
+ 72,model.layers.10.self_attn.v_proj,tx.10.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75
+ 73,model.layers.10.self_attn.o_proj,tx.10.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
76
+ 74,model.layers.10.mlp.gate_proj,tx.10.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
77
+ 75,model.layers.10.mlp.up_proj,tx.10.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
78
+ 76,model.layers.10.mlp.down_proj,tx.10.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
79
+ 77,model.layers.11.self_attn.q_proj,tx.11.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
80
+ 78,model.layers.11.self_attn.k_proj,tx.11.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
81
+ 79,model.layers.11.self_attn.v_proj,tx.11.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
82
+ 80,model.layers.11.self_attn.o_proj,tx.11.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
83
+ 81,model.layers.11.mlp.gate_proj,tx.11.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
84
+ 82,model.layers.11.mlp.up_proj,tx.11.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
85
+ 83,model.layers.11.mlp.down_proj,tx.11.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
86
+ 84,model.layers.12.self_attn.q_proj,tx.12.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
87
+ 85,model.layers.12.self_attn.k_proj,tx.12.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
88
+ 86,model.layers.12.self_attn.v_proj,tx.12.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
89
+ 87,model.layers.12.self_attn.o_proj,tx.12.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
90
+ 88,model.layers.12.mlp.gate_proj,tx.12.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
91
+ 89,model.layers.12.mlp.up_proj,tx.12.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
92
+ 90,model.layers.12.mlp.down_proj,tx.12.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
93
+ 91,model.layers.13.self_attn.q_proj,tx.13.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
94
+ 92,model.layers.13.self_attn.k_proj,tx.13.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
95
+ 93,model.layers.13.self_attn.v_proj,tx.13.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
96
+ 94,model.layers.13.self_attn.o_proj,tx.13.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
97
+ 95,model.layers.13.mlp.gate_proj,tx.13.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
98
+ 96,model.layers.13.mlp.up_proj,tx.13.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
99
+ 97,model.layers.13.mlp.down_proj,tx.13.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100
+ 98,model.layers.14.self_attn.q_proj,tx.14.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
101
+ 99,model.layers.14.self_attn.k_proj,tx.14.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
102
+ 100,model.layers.14.self_attn.v_proj,tx.14.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
103
+ 101,model.layers.14.self_attn.o_proj,tx.14.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
104
+ 102,model.layers.14.mlp.gate_proj,tx.14.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
105
+ 103,model.layers.14.mlp.up_proj,tx.14.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
106
+ 104,model.layers.14.mlp.down_proj,tx.14.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
107
+ 105,model.layers.15.self_attn.q_proj,tx.15.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
108
+ 106,model.layers.15.self_attn.k_proj,tx.15.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
109
+ 107,model.layers.15.self_attn.v_proj,tx.15.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
110
+ 108,model.layers.15.self_attn.o_proj,tx.15.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
111
+ 109,model.layers.15.mlp.gate_proj,tx.15.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
112
+ 110,model.layers.15.mlp.up_proj,tx.15.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
113
+ 111,model.layers.15.mlp.down_proj,tx.15.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
114
+ 112,model.layers.16.self_attn.q_proj,tx.16.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
115
+ 113,model.layers.16.self_attn.k_proj,tx.16.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
116
+ 114,model.layers.16.self_attn.v_proj,tx.16.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
117
+ 115,model.layers.16.self_attn.o_proj,tx.16.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
118
+ 116,model.layers.16.mlp.gate_proj,tx.16.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
119
+ 117,model.layers.16.mlp.up_proj,tx.16.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
120
+ 118,model.layers.16.mlp.down_proj,tx.16.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
121
+ 119,model.layers.17.self_attn.q_proj,tx.17.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
122
+ 120,model.layers.17.self_attn.k_proj,tx.17.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
123
+ 121,model.layers.17.self_attn.v_proj,tx.17.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
124
+ 122,model.layers.17.self_attn.o_proj,tx.17.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
125
+ 123,model.layers.17.mlp.gate_proj,tx.17.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
126
+ 124,model.layers.17.mlp.up_proj,tx.17.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
127
+ 125,model.layers.17.mlp.down_proj,tx.17.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
128
+ 126,model.layers.18.self_attn.q_proj,tx.18.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
129
+ 127,model.layers.18.self_attn.k_proj,tx.18.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
130
+ 128,model.layers.18.self_attn.v_proj,tx.18.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
131
+ 129,model.layers.18.self_attn.o_proj,tx.18.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
132
+ 130,model.layers.18.mlp.gate_proj,tx.18.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
133
+ 131,model.layers.18.mlp.up_proj,tx.18.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
134
+ 132,model.layers.18.mlp.down_proj,tx.18.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
135
+ 133,model.layers.19.self_attn.q_proj,tx.19.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
136
+ 134,model.layers.19.self_attn.k_proj,tx.19.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
137
+ 135,model.layers.19.self_attn.v_proj,tx.19.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
138
+ 136,model.layers.19.self_attn.o_proj,tx.19.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
139
+ 137,model.layers.19.mlp.gate_proj,tx.19.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
140
+ 138,model.layers.19.mlp.up_proj,tx.19.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
141
+ 139,model.layers.19.mlp.down_proj,tx.19.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
142
+ 140,model.layers.20.self_attn.q_proj,tx.20.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
143
+ 141,model.layers.20.self_attn.k_proj,tx.20.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
144
+ 142,model.layers.20.self_attn.v_proj,tx.20.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
145
+ 143,model.layers.20.self_attn.o_proj,tx.20.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
146
+ 144,model.layers.20.mlp.gate_proj,tx.20.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
147
+ 145,model.layers.20.mlp.up_proj,tx.20.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
148
+ 146,model.layers.20.mlp.down_proj,tx.20.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
149
+ 147,model.layers.21.self_attn.q_proj,tx.21.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
150
+ 148,model.layers.21.self_attn.k_proj,tx.21.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
151
+ 149,model.layers.21.self_attn.v_proj,tx.21.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
152
+ 150,model.layers.21.self_attn.o_proj,tx.21.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
153
+ 151,model.layers.21.mlp.gate_proj,tx.21.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
154
+ 152,model.layers.21.mlp.up_proj,tx.21.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
155
+ 153,model.layers.21.mlp.down_proj,tx.21.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
156
+ 154,model.layers.22.self_attn.q_proj,tx.22.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
157
+ 155,model.layers.22.self_attn.k_proj,tx.22.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
158
+ 156,model.layers.22.self_attn.v_proj,tx.22.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
159
+ 157,model.layers.22.self_attn.o_proj,tx.22.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
160
+ 158,model.layers.22.mlp.gate_proj,tx.22.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
161
+ 159,model.layers.22.mlp.up_proj,tx.22.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
162
+ 160,model.layers.22.mlp.down_proj,tx.22.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
163
+ 161,model.layers.23.self_attn.q_proj,tx.23.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
164
+ 162,model.layers.23.self_attn.k_proj,tx.23.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
165
+ 163,model.layers.23.self_attn.v_proj,tx.23.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
166
+ 164,model.layers.23.self_attn.o_proj,tx.23.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
167
+ 165,model.layers.23.mlp.gate_proj,tx.23.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
168
+ 166,model.layers.23.mlp.up_proj,tx.23.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
169
+ 167,model.layers.23.mlp.down_proj,tx.23.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
170
+ 168,model.layers.24.self_attn.q_proj,tx.24.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
171
+ 169,model.layers.24.self_attn.k_proj,tx.24.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
172
+ 170,model.layers.24.self_attn.v_proj,tx.24.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
173
+ 171,model.layers.24.self_attn.o_proj,tx.24.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
174
+ 172,model.layers.24.mlp.gate_proj,tx.24.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
175
+ 173,model.layers.24.mlp.up_proj,tx.24.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
176
+ 174,model.layers.24.mlp.down_proj,tx.24.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
177
+ 175,model.layers.25.self_attn.q_proj,tx.25.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
178
+ 176,model.layers.25.self_attn.k_proj,tx.25.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
179
+ 177,model.layers.25.self_attn.v_proj,tx.25.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
180
+ 178,model.layers.25.self_attn.o_proj,tx.25.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
181
+ 179,model.layers.25.mlp.gate_proj,tx.25.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
182
+ 180,model.layers.25.mlp.up_proj,tx.25.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
183
+ 181,model.layers.25.mlp.down_proj,tx.25.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
184
+ 182,model.layers.26.self_attn.q_proj,tx.26.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
185
+ 183,model.layers.26.self_attn.k_proj,tx.26.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
186
+ 184,model.layers.26.self_attn.v_proj,tx.26.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
187
+ 185,model.layers.26.self_attn.o_proj,tx.26.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
188
+ 186,model.layers.26.mlp.gate_proj,tx.26.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
189
+ 187,model.layers.26.mlp.up_proj,tx.26.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
190
+ 188,model.layers.26.mlp.down_proj,tx.26.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
191
+ 189,model.layers.27.self_attn.q_proj,tx.27.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
192
+ 190,model.layers.27.self_attn.k_proj,tx.27.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
193
+ 191,model.layers.27.self_attn.v_proj,tx.27.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
194
+ 192,model.layers.27.self_attn.o_proj,tx.27.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
195
+ 193,model.layers.27.mlp.gate_proj,tx.27.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
196
+ 194,model.layers.27.mlp.up_proj,tx.27.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
197
+ 195,model.layers.27.mlp.down_proj,tx.27.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
198
+ 196,model.layers.28.self_attn.q_proj,tx.28.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
199
+ 197,model.layers.28.self_attn.k_proj,tx.28.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
200
+ 198,model.layers.28.self_attn.v_proj,tx.28.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
201
+ 199,model.layers.28.self_attn.o_proj,tx.28.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
202
+ 200,model.layers.28.mlp.gate_proj,tx.28.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
203
+ 201,model.layers.28.mlp.up_proj,tx.28.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
204
+ 202,model.layers.28.mlp.down_proj,tx.28.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
205
+ 203,model.layers.29.self_attn.q_proj,tx.29.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
206
+ 204,model.layers.29.self_attn.k_proj,tx.29.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
207
+ 205,model.layers.29.self_attn.v_proj,tx.29.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
208
+ 206,model.layers.29.self_attn.o_proj,tx.29.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
209
+ 207,model.layers.29.mlp.gate_proj,tx.29.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
210
+ 208,model.layers.29.mlp.up_proj,tx.29.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
211
+ 209,model.layers.29.mlp.down_proj,tx.29.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
212
+ 210,model.layers.30.self_attn.q_proj,tx.30.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
213
+ 211,model.layers.30.self_attn.k_proj,tx.30.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
214
+ 212,model.layers.30.self_attn.v_proj,tx.30.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
215
+ 213,model.layers.30.self_attn.o_proj,tx.30.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
216
+ 214,model.layers.30.mlp.gate_proj,tx.30.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
217
+ 215,model.layers.30.mlp.up_proj,tx.30.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
218
+ 216,model.layers.30.mlp.down_proj,tx.30.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
219
+ 217,model.layers.31.self_attn.q_proj,tx.31.attn.q,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
220
+ 218,model.layers.31.self_attn.k_proj,tx.31.attn.k,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
221
+ 219,model.layers.31.self_attn.v_proj,tx.31.attn.v,Linear,weight,"[1024, 4096]",4194304,4194304,0.0,"(128, 16)",8 x 256,2048,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
222
+ 220,model.layers.31.self_attn.o_proj,tx.31.attn.o,Linear,weight,"[4096, 4096]",16777216,16777216,0.0,"(128, 16)",32 x 256,8192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
223
+ 221,model.layers.31.mlp.gate_proj,tx.31.mlp.gate,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
224
+ 222,model.layers.31.mlp.up_proj,tx.31.mlp.up,Linear,weight,"[14336, 4096]",58720256,58720256,0.0,"(128, 16)",112 x 256,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
225
+ 223,model.layers.31.mlp.down_proj,tx.31.mlp.down,Linear,weight,"[4096, 14336]",58720256,58720256,0.0,"(128, 16)",32 x 896,28672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
226
+ 224,lm_head,lm_head,Linear,weight,"[128256, 4096]",525336576,525336576,0.0,"(128, 16)",1002 x 256,256512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sparsity_report_Meta-Llama-3.1-8B-wanda-unstructured-0.1.csv ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ row,layer_id,short_id,layer_type,param_type,shape,nparam,nnz,sparsity,tile_shape,n_tile,n_tile_total,tile_avg,tile_min,tile_med,tile_max,col_avg,col_min,col_med,col_max,row_avg,row_min,row_med,row_max
2
+ 0,model.layers.0.self_attn.q_proj,tx.0.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0,0.09130859375,0.33984375,0.099853515625,0.0,0.0,1.0,0.099853515625,0.0,0.0625,0.5
3
+ 1,model.layers.0.self_attn.k_proj,tx.0.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.00048828125,0.09033203125,0.34716796875,0.099853515625,0.0,0.0,1.0,0.099853515625,0.0,0.0625,0.5625
4
+ 2,model.layers.0.self_attn.v_proj,tx.0.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.00244140625,0.08837890625,0.31884765625,0.099853515625,0.0,0.0,1.0,0.099853515625,0.0,0.0625,0.5
5
+ 3,model.layers.0.self_attn.o_proj,tx.0.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0048828125,0.06494140625,0.4658203125,0.099853515625,0.0,0.0625,0.6484375,0.099853515625,0.0,0.0625,0.875
6
+ 4,model.layers.0.mlp.gate_proj,tx.0.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0693359375,0.09619140625,0.22705078125,0.0998535230755806,0.0,0.09375,1.0,0.0998535230755806,0.0,0.0625,0.6875
7
+ 5,model.layers.0.mlp.up_proj,tx.0.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.068359375,0.09619140625,0.2236328125,0.0998535230755806,0.0,0.09375,1.0,0.0998535230755806,0.0,0.0625,0.6875
8
+ 6,model.layers.0.mlp.down_proj,tx.0.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.060546875,0.10009765625,0.1416015625,0.0999581515789032,0.0,0.1015625,0.3125,0.0999581515789032,0.0,0.0625,0.625
9
+ 7,model.layers.1.self_attn.q_proj,tx.1.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0458984375,0.09375,0.2919921875,0.099853515625,0.0,0.09375,1.0,0.099853515625,0.0,0.0625,0.625
10
+ 8,model.layers.1.self_attn.k_proj,tx.1.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0517578125,0.09423828125,0.271484375,0.099853515625,0.0,0.09375,1.0,0.099853515625,0.0,0.0625,0.5
11
+ 9,model.layers.1.self_attn.v_proj,tx.1.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.05419921875,0.09326171875,0.2744140625,0.099853515625,0.0,0.09375,1.0,0.099853515625,0.0,0.0625,0.5625
12
+ 10,model.layers.1.self_attn.o_proj,tx.1.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.03076171875,0.0859375,0.341796875,0.099853515625,0.0,0.078125,0.59375,0.099853515625,0.0,0.0625,0.75
13
+ 11,model.layers.1.mlp.gate_proj,tx.1.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0693359375,0.0966796875,0.2236328125,0.0998535230755806,0.0,0.09375,1.0,0.0998535230755806,0.0,0.0625,0.625
14
+ 12,model.layers.1.mlp.up_proj,tx.1.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0712890625,0.0966796875,0.22216796875,0.0998535230755806,0.0,0.09375,1.0,0.0998535230755806,0.0,0.0625,0.625
15
+ 13,model.layers.1.mlp.down_proj,tx.1.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.064453125,0.099609375,0.13525390625,0.0999581515789032,0.0,0.1015625,0.2734375,0.0999581515789032,0.0,0.0625,0.625
16
+ 14,model.layers.2.self_attn.q_proj,tx.2.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.05908203125,0.0986328125,0.189453125,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.5625
17
+ 15,model.layers.2.self_attn.k_proj,tx.2.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0615234375,0.0986328125,0.18115234375,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.5625
18
+ 16,model.layers.2.self_attn.v_proj,tx.2.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07275390625,0.09814453125,0.1787109375,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.5
19
+ 17,model.layers.2.self_attn.o_proj,tx.2.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.03564453125,0.08349609375,0.3173828125,0.099853515625,0.0,0.0859375,0.4765625,0.099853515625,0.0,0.0625,0.75
20
+ 18,model.layers.2.mlp.gate_proj,tx.2.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0703125,0.0966796875,0.22412109375,0.0998535230755806,0.0,0.09375,1.0,0.0998535230755806,0.0,0.0625,0.625
21
+ 19,model.layers.2.mlp.up_proj,tx.2.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.06982421875,0.0966796875,0.22802734375,0.0998535230755806,0.0,0.09375,1.0,0.0998535230755806,0.0,0.0625,0.625
22
+ 20,model.layers.2.mlp.down_proj,tx.2.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.0654296875,0.10009765625,0.14453125,0.0999581515789032,0.0,0.1015625,0.2890625,0.0999581515789032,0.0,0.0625,0.6875
23
+ 21,model.layers.3.self_attn.q_proj,tx.3.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.068359375,0.099609375,0.18212890625,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.5625
24
+ 22,model.layers.3.self_attn.k_proj,tx.3.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.06982421875,0.099609375,0.16650390625,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.625
25
+ 23,model.layers.3.self_attn.v_proj,tx.3.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.072265625,0.09912109375,0.17919921875,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.625
26
+ 24,model.layers.3.self_attn.o_proj,tx.3.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.044921875,0.09521484375,0.18603515625,0.099853515625,0.0,0.09375,0.328125,0.099853515625,0.0,0.0625,0.625
27
+ 25,model.layers.3.mlp.gate_proj,tx.3.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0693359375,0.09765625,0.22412109375,0.0998535230755806,0.0,0.09375,1.0,0.0998535230755806,0.0,0.0625,0.5625
28
+ 26,model.layers.3.mlp.up_proj,tx.3.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07275390625,0.09765625,0.2236328125,0.0998535230755806,0.0,0.09375,1.0,0.0998535230755806,0.0,0.0625,0.5625
29
+ 27,model.layers.3.mlp.down_proj,tx.3.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.06884765625,0.099609375,0.140625,0.0999581515789032,0.0,0.1015625,0.2734375,0.0999581515789032,0.0,0.0625,0.6875
30
+ 28,model.layers.4.self_attn.q_proj,tx.4.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.06640625,0.099609375,0.169921875,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.5625
31
+ 29,model.layers.4.self_attn.k_proj,tx.4.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0673828125,0.10009765625,0.1591796875,0.099853515625,0.0,0.1015625,0.9765625,0.099853515625,0.0,0.0625,0.5625
32
+ 30,model.layers.4.self_attn.v_proj,tx.4.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07080078125,0.09912109375,0.17138671875,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.5625
33
+ 31,model.layers.4.self_attn.o_proj,tx.4.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.037109375,0.08642578125,0.28857421875,0.099853515625,0.0,0.0859375,0.453125,0.099853515625,0.0,0.0625,0.8125
34
+ 32,model.layers.4.mlp.gate_proj,tx.4.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0703125,0.09912109375,0.173828125,0.0998535230755806,0.0,0.1015625,1.0,0.0998535230755806,0.0,0.0625,0.625
35
+ 33,model.layers.4.mlp.up_proj,tx.4.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07177734375,0.09912109375,0.171875,0.0998535230755806,0.0,0.1015625,1.0,0.0998535230755806,0.0,0.0625,0.5625
36
+ 34,model.layers.4.mlp.down_proj,tx.4.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.06982421875,0.10009765625,0.1337890625,0.0999581515789032,0.0,0.1015625,0.421875,0.0999581515789032,0.0,0.0625,0.625
37
+ 35,model.layers.5.self_attn.q_proj,tx.5.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.06396484375,0.099609375,0.13330078125,0.099853515625,0.0,0.1015625,0.28125,0.099853515625,0.0,0.0625,0.5625
38
+ 36,model.layers.5.self_attn.k_proj,tx.5.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.06005859375,0.10009765625,0.14013671875,0.099853515625,0.0,0.1015625,0.4375,0.099853515625,0.0,0.0625,0.5
39
+ 37,model.layers.5.self_attn.v_proj,tx.5.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0712890625,0.09912109375,0.17724609375,0.099853515625,0.0,0.1015625,0.9921875,0.099853515625,0.0,0.0625,0.5
40
+ 38,model.layers.5.self_attn.o_proj,tx.5.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.02783203125,0.08056640625,0.25341796875,0.099853515625,0.0,0.0859375,0.4375,0.099853515625,0.0,0.0625,0.6875
41
+ 39,model.layers.5.mlp.gate_proj,tx.5.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.06982421875,0.099609375,0.16748046875,0.0998535230755806,0.0,0.1015625,1.0,0.0998535230755806,0.0,0.0625,0.625
42
+ 40,model.layers.5.mlp.up_proj,tx.5.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07421875,0.099609375,0.173828125,0.0998535230755806,0.0,0.1015625,1.0,0.0998535230755806,0.0,0.0625,0.625
43
+ 41,model.layers.5.mlp.down_proj,tx.5.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.07080078125,0.10009765625,0.134765625,0.0999581515789032,0.0,0.1015625,0.296875,0.0999581515789032,0.0,0.0625,0.625
44
+ 42,model.layers.6.self_attn.q_proj,tx.6.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.06787109375,0.099609375,0.13427734375,0.099853515625,0.0,0.1015625,0.2578125,0.099853515625,0.0,0.0625,0.5625
45
+ 43,model.layers.6.self_attn.k_proj,tx.6.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.06982421875,0.099609375,0.1318359375,0.099853515625,0.0,0.1015625,0.3984375,0.099853515625,0.0,0.0625,0.5625
46
+ 44,model.layers.6.self_attn.v_proj,tx.6.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07421875,0.099609375,0.1640625,0.099853515625,0.0,0.1015625,0.9921875,0.099853515625,0.0,0.0625,0.5
47
+ 45,model.layers.6.self_attn.o_proj,tx.6.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.03759765625,0.0927734375,0.2080078125,0.099853515625,0.0,0.09375,0.359375,0.099853515625,0.0,0.0625,0.6875
48
+ 46,model.layers.6.mlp.gate_proj,tx.6.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0693359375,0.099609375,0.17529296875,0.0998535230755806,0.0,0.1015625,1.0,0.0998535230755806,0.0,0.0625,0.5625
49
+ 47,model.layers.6.mlp.up_proj,tx.6.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07080078125,0.099609375,0.17431640625,0.0998535230755806,0.0,0.1015625,1.0,0.0998535230755806,0.0,0.0625,0.625
50
+ 48,model.layers.6.mlp.down_proj,tx.6.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.07177734375,0.099609375,0.13818359375,0.0999581515789032,0.0,0.1015625,0.3671875,0.0999581515789032,0.0,0.0625,0.5625
51
+ 49,model.layers.7.self_attn.q_proj,tx.7.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.064453125,0.099609375,0.12939453125,0.099853515625,0.0,0.1015625,0.2734375,0.099853515625,0.0,0.0625,0.5625
52
+ 50,model.layers.7.self_attn.k_proj,tx.7.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07080078125,0.10009765625,0.1318359375,0.099853515625,0.0,0.1015625,0.4296875,0.099853515625,0.0,0.0625,0.5
53
+ 51,model.layers.7.self_attn.v_proj,tx.7.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07080078125,0.09912109375,0.171875,0.099853515625,0.0,0.1015625,0.9765625,0.099853515625,0.0,0.0625,0.5625
54
+ 52,model.layers.7.self_attn.o_proj,tx.7.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.05712890625,0.09814453125,0.16162109375,0.099853515625,0.0,0.1015625,0.3203125,0.099853515625,0.0,0.0625,0.5625
55
+ 53,model.layers.7.mlp.gate_proj,tx.7.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0703125,0.099609375,0.13232421875,0.0998535230755806,0.0,0.1015625,0.25,0.0998535230755806,0.0,0.0625,0.625
56
+ 54,model.layers.7.mlp.up_proj,tx.7.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07421875,0.099609375,0.1298828125,0.0998535230755806,0.0,0.1015625,0.2421875,0.0998535230755806,0.0,0.0625,0.6875
57
+ 55,model.layers.7.mlp.down_proj,tx.7.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.06787109375,0.099609375,0.13525390625,0.0999581515789032,0.0,0.1015625,0.3203125,0.0999581515789032,0.0,0.0625,0.625
58
+ 56,model.layers.8.self_attn.q_proj,tx.8.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0693359375,0.099609375,0.13330078125,0.099853515625,0.0,0.1015625,0.2734375,0.099853515625,0.0,0.0625,0.625
59
+ 57,model.layers.8.self_attn.k_proj,tx.8.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.072265625,0.099609375,0.13037109375,0.099853515625,0.0,0.1015625,0.328125,0.099853515625,0.0,0.0625,0.5625
60
+ 58,model.layers.8.self_attn.v_proj,tx.8.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0712890625,0.099609375,0.17431640625,0.099853515625,0.0,0.1015625,0.9921875,0.099853515625,0.0,0.0625,0.5625
61
+ 59,model.layers.8.self_attn.o_proj,tx.8.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.04833984375,0.09619140625,0.19091796875,0.099853515625,0.0,0.09375,0.3515625,0.099853515625,0.0,0.0625,0.625
62
+ 60,model.layers.8.mlp.gate_proj,tx.8.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07177734375,0.099609375,0.17626953125,0.0998535230755806,0.0,0.1015625,1.0,0.0998535230755806,0.0,0.0625,0.625
63
+ 61,model.layers.8.mlp.up_proj,tx.8.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07275390625,0.099609375,0.17529296875,0.0998535230755806,0.0,0.1015625,1.0,0.0998535230755806,0.0,0.0625,0.6875
64
+ 62,model.layers.8.mlp.down_proj,tx.8.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.0654296875,0.10009765625,0.13818359375,0.0999581515789032,0.0,0.1015625,0.328125,0.0999581515789032,0.0,0.0625,0.625
65
+ 63,model.layers.9.self_attn.q_proj,tx.9.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0712890625,0.099609375,0.1318359375,0.099853515625,0.0,0.1015625,0.2578125,0.099853515625,0.0,0.0625,0.5625
66
+ 64,model.layers.9.self_attn.k_proj,tx.9.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0712890625,0.099609375,0.1298828125,0.099853515625,0.0,0.1015625,0.359375,0.099853515625,0.0,0.0625,0.5625
67
+ 65,model.layers.9.self_attn.v_proj,tx.9.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07666015625,0.09912109375,0.16552734375,0.099853515625,0.0,0.1015625,0.9765625,0.099853515625,0.0,0.0625,0.5625
68
+ 66,model.layers.9.self_attn.o_proj,tx.9.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0458984375,0.09619140625,0.21240234375,0.099853515625,0.0,0.09375,0.328125,0.099853515625,0.0,0.0625,0.625
69
+ 67,model.layers.9.mlp.gate_proj,tx.9.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.06884765625,0.099609375,0.130859375,0.0998535230755806,0.0,0.1015625,0.25,0.0998535230755806,0.0,0.0625,0.625
70
+ 68,model.layers.9.mlp.up_proj,tx.9.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07470703125,0.099609375,0.12841796875,0.0998535230755806,0.0,0.1015625,0.2578125,0.0998535230755806,0.0,0.0625,0.625
71
+ 69,model.layers.9.mlp.down_proj,tx.9.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.06787109375,0.099609375,0.130859375,0.0999581515789032,0.0,0.1015625,0.3046875,0.0999581515789032,0.0,0.0625,0.625
72
+ 70,model.layers.10.self_attn.q_proj,tx.10.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0732421875,0.099609375,0.13427734375,0.099853515625,0.0,0.1015625,0.25,0.099853515625,0.0,0.0625,0.5625
73
+ 71,model.layers.10.self_attn.k_proj,tx.10.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0703125,0.099609375,0.126953125,0.099853515625,0.0,0.1015625,0.3359375,0.099853515625,0.0,0.0625,0.5
74
+ 72,model.layers.10.self_attn.v_proj,tx.10.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07568359375,0.09912109375,0.1611328125,0.099853515625,0.0,0.1015625,0.9921875,0.099853515625,0.0,0.0625,0.5625
75
+ 73,model.layers.10.self_attn.o_proj,tx.10.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.04248046875,0.0888671875,0.23583984375,0.099853515625,0.0,0.09375,0.3515625,0.099853515625,0.0,0.0625,0.6875
76
+ 74,model.layers.10.mlp.gate_proj,tx.10.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0703125,0.099609375,0.1318359375,0.0998535230755806,0.0,0.1015625,0.2421875,0.0998535230755806,0.0,0.0625,0.5625
77
+ 75,model.layers.10.mlp.up_proj,tx.10.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07080078125,0.099609375,0.1279296875,0.0998535230755806,0.0,0.1015625,0.2421875,0.0998535230755806,0.0,0.0625,0.6875
78
+ 76,model.layers.10.mlp.down_proj,tx.10.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.06689453125,0.10009765625,0.13525390625,0.0999581515789032,0.0,0.1015625,0.3125,0.0999581515789032,0.0,0.0625,0.6875
79
+ 77,model.layers.11.self_attn.q_proj,tx.11.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0693359375,0.099609375,0.13134765625,0.099853515625,0.0,0.1015625,0.2890625,0.099853515625,0.0,0.0625,0.625
80
+ 78,model.layers.11.self_attn.k_proj,tx.11.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.072265625,0.099609375,0.1318359375,0.099853515625,0.0,0.1015625,0.4140625,0.099853515625,0.0,0.0625,0.625
81
+ 79,model.layers.11.self_attn.v_proj,tx.11.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.06982421875,0.099609375,0.14892578125,0.099853515625,0.0,0.1015625,0.9765625,0.099853515625,0.0,0.0625,0.5625
82
+ 80,model.layers.11.self_attn.o_proj,tx.11.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.04931640625,0.0927734375,0.23876953125,0.099853515625,0.0,0.09375,0.359375,0.099853515625,0.0,0.0625,0.625
83
+ 81,model.layers.11.mlp.gate_proj,tx.11.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07177734375,0.099609375,0.12939453125,0.0998535230755806,0.0,0.1015625,0.2421875,0.0998535230755806,0.0,0.0625,0.625
84
+ 82,model.layers.11.mlp.up_proj,tx.11.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0703125,0.10009765625,0.126953125,0.0998535230755806,0.0,0.1015625,0.25,0.0998535230755806,0.0,0.0625,0.625
85
+ 83,model.layers.11.mlp.down_proj,tx.11.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.06591796875,0.099609375,0.1337890625,0.0999581515789032,0.0,0.1015625,0.3515625,0.0999581515789032,0.0,0.0625,0.625
86
+ 84,model.layers.12.self_attn.q_proj,tx.12.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.072265625,0.099609375,0.12890625,0.099853515625,0.0,0.1015625,0.2734375,0.099853515625,0.0,0.0625,0.5625
87
+ 85,model.layers.12.self_attn.k_proj,tx.12.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07470703125,0.10009765625,0.13232421875,0.099853515625,0.0,0.1015625,0.375,0.099853515625,0.0,0.0625,0.5
88
+ 86,model.layers.12.self_attn.v_proj,tx.12.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.076171875,0.099609375,0.1611328125,0.099853515625,0.0,0.1015625,0.9765625,0.099853515625,0.0,0.0625,0.5625
89
+ 87,model.layers.12.self_attn.o_proj,tx.12.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.03515625,0.099609375,0.16357421875,0.099853515625,0.0,0.1015625,0.3046875,0.099853515625,0.0,0.0625,0.6875
90
+ 88,model.layers.12.mlp.gate_proj,tx.12.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.06982421875,0.099609375,0.1328125,0.0998535230755806,0.0,0.1015625,0.25,0.0998535230755806,0.0,0.0625,0.5625
91
+ 89,model.layers.12.mlp.up_proj,tx.12.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07177734375,0.099609375,0.12646484375,0.0998535230755806,0.0,0.1015625,0.25,0.0998535230755806,0.0,0.0625,0.625
92
+ 90,model.layers.12.mlp.down_proj,tx.12.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.06982421875,0.10009765625,0.1396484375,0.0999581515789032,0.0,0.1015625,0.296875,0.0999581515789032,0.0,0.0625,0.5625
93
+ 91,model.layers.13.self_attn.q_proj,tx.13.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.06884765625,0.099609375,0.12841796875,0.099853515625,0.0,0.1015625,0.265625,0.099853515625,0.0,0.0625,0.5625
94
+ 92,model.layers.13.self_attn.k_proj,tx.13.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0693359375,0.099609375,0.12744140625,0.099853515625,0.0,0.1015625,0.453125,0.099853515625,0.0,0.0625,0.5625
95
+ 93,model.layers.13.self_attn.v_proj,tx.13.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07568359375,0.09912109375,0.171875,0.099853515625,0.0,0.1015625,0.9921875,0.099853515625,0.0,0.0625,0.5625
96
+ 94,model.layers.13.self_attn.o_proj,tx.13.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.04296875,0.095703125,0.20458984375,0.099853515625,0.0,0.09375,0.3359375,0.099853515625,0.0,0.0625,0.625
97
+ 95,model.layers.13.mlp.gate_proj,tx.13.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07080078125,0.099609375,0.13671875,0.0998535230755806,0.0,0.1015625,0.25,0.0998535230755806,0.0,0.0625,0.625
98
+ 96,model.layers.13.mlp.up_proj,tx.13.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07080078125,0.099609375,0.1279296875,0.0998535230755806,0.0,0.1015625,0.234375,0.0998535230755806,0.0,0.0625,0.625
99
+ 97,model.layers.13.mlp.down_proj,tx.13.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.06884765625,0.099609375,0.13427734375,0.0999581515789032,0.0,0.1015625,0.34375,0.0999581515789032,0.0,0.0625,0.625
100
+ 98,model.layers.14.self_attn.q_proj,tx.14.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.06787109375,0.099609375,0.12646484375,0.099853515625,0.0,0.1015625,0.25,0.099853515625,0.0,0.0625,0.5625
101
+ 99,model.layers.14.self_attn.k_proj,tx.14.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.06884765625,0.099609375,0.1328125,0.099853515625,0.0,0.1015625,0.421875,0.099853515625,0.0,0.0625,0.5625
102
+ 100,model.layers.14.self_attn.v_proj,tx.14.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07666015625,0.099609375,0.16015625,0.099853515625,0.0,0.1015625,0.9765625,0.099853515625,0.0,0.0625,0.5
103
+ 101,model.layers.14.self_attn.o_proj,tx.14.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.044921875,0.09521484375,0.2099609375,0.099853515625,0.0,0.09375,0.3515625,0.099853515625,0.0,0.0625,0.6875
104
+ 102,model.layers.14.mlp.gate_proj,tx.14.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.068359375,0.099609375,0.130859375,0.0998535230755806,0.0,0.1015625,0.2421875,0.0998535230755806,0.0,0.0625,0.625
105
+ 103,model.layers.14.mlp.up_proj,tx.14.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0732421875,0.099609375,0.12890625,0.0998535230755806,0.0,0.1015625,0.25,0.0998535230755806,0.0,0.0625,0.625
106
+ 104,model.layers.14.mlp.down_proj,tx.14.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.0673828125,0.099609375,0.13525390625,0.0999581515789032,0.0,0.1015625,0.2890625,0.0999581515789032,0.0,0.0625,0.5625
107
+ 105,model.layers.15.self_attn.q_proj,tx.15.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.07275390625,0.099609375,0.12939453125,0.099853515625,0.0,0.1015625,0.296875,0.099853515625,0.0,0.0625,0.6875
108
+ 106,model.layers.15.self_attn.k_proj,tx.15.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0615234375,0.099609375,0.1279296875,0.099853515625,0.0,0.1015625,0.3359375,0.099853515625,0.0,0.0625,0.5625
109
+ 107,model.layers.15.self_attn.v_proj,tx.15.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07080078125,0.09912109375,0.16259765625,0.099853515625,0.0,0.1015625,0.9921875,0.099853515625,0.0,0.0625,0.5625
110
+ 108,model.layers.15.self_attn.o_proj,tx.15.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.03662109375,0.0869140625,0.2470703125,0.099853515625,0.0,0.0859375,0.390625,0.099853515625,0.0,0.0625,0.75
111
+ 109,model.layers.15.mlp.gate_proj,tx.15.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07080078125,0.099609375,0.13037109375,0.0998535230755806,0.0,0.1015625,0.2578125,0.0998535230755806,0.0,0.0625,0.625
112
+ 110,model.layers.15.mlp.up_proj,tx.15.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0654296875,0.099609375,0.12841796875,0.0998535230755806,0.0,0.1015625,0.25,0.0998535230755806,0.0,0.0625,0.625
113
+ 111,model.layers.15.mlp.down_proj,tx.15.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.06494140625,0.10009765625,0.13623046875,0.0999581515789032,0.0,0.1015625,0.3046875,0.0999581515789032,0.0,0.0625,0.5625
114
+ 112,model.layers.16.self_attn.q_proj,tx.16.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0732421875,0.10009765625,0.13232421875,0.099853515625,0.0,0.1015625,0.25,0.099853515625,0.0,0.0625,0.625
115
+ 113,model.layers.16.self_attn.k_proj,tx.16.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0703125,0.10009765625,0.12548828125,0.099853515625,0.0,0.1015625,0.3828125,0.099853515625,0.0,0.0625,0.5625
116
+ 114,model.layers.16.self_attn.v_proj,tx.16.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0712890625,0.099609375,0.16748046875,0.099853515625,0.0,0.1015625,0.9921875,0.099853515625,0.0,0.0625,0.5
117
+ 115,model.layers.16.self_attn.o_proj,tx.16.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.03173828125,0.09375,0.21484375,0.099853515625,0.0,0.09375,0.3359375,0.099853515625,0.0,0.0625,0.6875
118
+ 116,model.layers.16.mlp.gate_proj,tx.16.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07373046875,0.099609375,0.12939453125,0.0998535230755806,0.0,0.1015625,0.2421875,0.0998535230755806,0.0,0.0625,0.625
119
+ 117,model.layers.16.mlp.up_proj,tx.16.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07275390625,0.099609375,0.12939453125,0.0998535230755806,0.0,0.1015625,0.25,0.0998535230755806,0.0,0.0625,0.5625
120
+ 118,model.layers.16.mlp.down_proj,tx.16.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.06982421875,0.10009765625,0.1376953125,0.0999581515789032,0.0,0.1015625,0.3046875,0.0999581515789032,0.0,0.0625,0.625
121
+ 119,model.layers.17.self_attn.q_proj,tx.17.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.06787109375,0.099609375,0.1279296875,0.099853515625,0.0,0.1015625,0.2421875,0.099853515625,0.0,0.0625,0.5625
122
+ 120,model.layers.17.self_attn.k_proj,tx.17.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.072265625,0.099609375,0.13720703125,0.099853515625,0.0,0.1015625,0.3828125,0.099853515625,0.0,0.0625,0.5625
123
+ 121,model.layers.17.self_attn.v_proj,tx.17.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07421875,0.099609375,0.16943359375,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.5
124
+ 122,model.layers.17.self_attn.o_proj,tx.17.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.03515625,0.09033203125,0.23583984375,0.099853515625,0.0,0.09375,0.359375,0.099853515625,0.0,0.0625,0.6875
125
+ 123,model.layers.17.mlp.gate_proj,tx.17.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0703125,0.099609375,0.12890625,0.0998535230755806,0.0,0.1015625,0.2421875,0.0998535230755806,0.0,0.0625,0.625
126
+ 124,model.layers.17.mlp.up_proj,tx.17.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07470703125,0.099609375,0.12841796875,0.0998535230755806,0.0,0.1015625,0.2734375,0.0998535230755806,0.0,0.0625,0.625
127
+ 125,model.layers.17.mlp.down_proj,tx.17.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.0703125,0.099609375,0.1337890625,0.0999581515789032,0.0,0.1015625,0.3671875,0.0999581515789032,0.0,0.0625,0.625
128
+ 126,model.layers.18.self_attn.q_proj,tx.18.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.072265625,0.099609375,0.12890625,0.099853515625,0.0,0.1015625,0.2890625,0.099853515625,0.0,0.0625,0.625
129
+ 127,model.layers.18.self_attn.k_proj,tx.18.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0712890625,0.10009765625,0.13720703125,0.099853515625,0.0,0.1015625,0.46875,0.099853515625,0.0,0.0625,0.5625
130
+ 128,model.layers.18.self_attn.v_proj,tx.18.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07373046875,0.099609375,0.16748046875,0.099853515625,0.0,0.1015625,0.9921875,0.099853515625,0.0,0.0625,0.5
131
+ 129,model.layers.18.self_attn.o_proj,tx.18.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0302734375,0.076171875,0.31396484375,0.099853515625,0.0,0.0859375,0.4765625,0.099853515625,0.0,0.0625,0.8125
132
+ 130,model.layers.18.mlp.gate_proj,tx.18.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0703125,0.099609375,0.130859375,0.0998535230755806,0.0,0.1015625,0.265625,0.0998535230755806,0.0,0.0625,0.625
133
+ 131,model.layers.18.mlp.up_proj,tx.18.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.072265625,0.099609375,0.1298828125,0.0998535230755806,0.0,0.1015625,0.2734375,0.0998535230755806,0.0,0.0625,0.625
134
+ 132,model.layers.18.mlp.down_proj,tx.18.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.0693359375,0.10009765625,0.1337890625,0.0999581515789032,0.0,0.1015625,0.28125,0.0999581515789032,0.0,0.0625,0.625
135
+ 133,model.layers.19.self_attn.q_proj,tx.19.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0732421875,0.099609375,0.12744140625,0.099853515625,0.0,0.1015625,0.2578125,0.099853515625,0.0,0.0625,0.5625
136
+ 134,model.layers.19.self_attn.k_proj,tx.19.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07080078125,0.10009765625,0.1357421875,0.099853515625,0.0,0.1015625,0.3828125,0.099853515625,0.0,0.0625,0.5625
137
+ 135,model.layers.19.self_attn.v_proj,tx.19.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0732421875,0.099609375,0.16845703125,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.5625
138
+ 136,model.layers.19.self_attn.o_proj,tx.19.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.02490234375,0.09521484375,0.2490234375,0.099853515625,0.0,0.09375,0.3984375,0.099853515625,0.0,0.0625,0.8125
139
+ 137,model.layers.19.mlp.gate_proj,tx.19.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0732421875,0.099609375,0.126953125,0.0998535230755806,0.0,0.1015625,0.25,0.0998535230755806,0.0,0.0625,0.625
140
+ 138,model.layers.19.mlp.up_proj,tx.19.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07421875,0.099609375,0.12939453125,0.0998535230755806,0.0,0.1015625,0.296875,0.0998535230755806,0.0,0.0625,0.625
141
+ 139,model.layers.19.mlp.down_proj,tx.19.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.06396484375,0.10009765625,0.1357421875,0.0999581515789032,0.0,0.1015625,0.2734375,0.0999581515789032,0.0,0.0625,0.625
142
+ 140,model.layers.20.self_attn.q_proj,tx.20.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0712890625,0.10009765625,0.13134765625,0.099853515625,0.0,0.1015625,0.2890625,0.099853515625,0.0,0.0625,0.5625
143
+ 141,model.layers.20.self_attn.k_proj,tx.20.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.072265625,0.10009765625,0.1328125,0.099853515625,0.0,0.1015625,0.4296875,0.099853515625,0.0,0.0625,0.625
144
+ 142,model.layers.20.self_attn.v_proj,tx.20.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07373046875,0.099609375,0.17626953125,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.5
145
+ 143,model.layers.20.self_attn.o_proj,tx.20.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.03662109375,0.0859375,0.2783203125,0.099853515625,0.0,0.0859375,0.46875,0.099853515625,0.0,0.0625,0.75
146
+ 144,model.layers.20.mlp.gate_proj,tx.20.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0703125,0.099609375,0.1279296875,0.0998535230755806,0.0,0.1015625,0.265625,0.0998535230755806,0.0,0.0625,0.625
147
+ 145,model.layers.20.mlp.up_proj,tx.20.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07275390625,0.099609375,0.1259765625,0.0998535230755806,0.0,0.1015625,0.3125,0.0998535230755806,0.0,0.0625,0.5625
148
+ 146,model.layers.20.mlp.down_proj,tx.20.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.06884765625,0.10009765625,0.1318359375,0.0999581515789032,0.0,0.1015625,0.2734375,0.0999581515789032,0.0,0.0625,0.625
149
+ 147,model.layers.21.self_attn.q_proj,tx.21.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.06884765625,0.10009765625,0.1298828125,0.099853515625,0.0,0.1015625,0.28125,0.099853515625,0.0,0.0625,0.5625
150
+ 148,model.layers.21.self_attn.k_proj,tx.21.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07373046875,0.099609375,0.12744140625,0.099853515625,0.0,0.1015625,0.375,0.099853515625,0.0,0.0625,0.5
151
+ 149,model.layers.21.self_attn.v_proj,tx.21.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07275390625,0.09912109375,0.16162109375,0.099853515625,0.0,0.1015625,0.9921875,0.099853515625,0.0,0.0625,0.625
152
+ 150,model.layers.21.self_attn.o_proj,tx.21.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.02734375,0.08349609375,0.3203125,0.099853515625,0.0,0.09375,0.5234375,0.099853515625,0.0,0.0625,0.75
153
+ 151,model.layers.21.mlp.gate_proj,tx.21.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07275390625,0.099609375,0.13134765625,0.0998535230755806,0.0,0.1015625,0.265625,0.0998535230755806,0.0,0.0625,0.5625
154
+ 152,model.layers.21.mlp.up_proj,tx.21.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.06884765625,0.099609375,0.12646484375,0.0998535230755806,0.0,0.1015625,0.2734375,0.0998535230755806,0.0,0.0625,0.625
155
+ 153,model.layers.21.mlp.down_proj,tx.21.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.06689453125,0.10009765625,0.13623046875,0.0999581515789032,0.0,0.1015625,0.296875,0.0999581515789032,0.0,0.0625,0.5625
156
+ 154,model.layers.22.self_attn.q_proj,tx.22.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0703125,0.10009765625,0.12646484375,0.099853515625,0.0,0.1015625,0.2421875,0.099853515625,0.0,0.0625,0.5625
157
+ 155,model.layers.22.self_attn.k_proj,tx.22.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0732421875,0.099609375,0.13134765625,0.099853515625,0.0,0.1015625,0.4609375,0.099853515625,0.0,0.0625,0.5625
158
+ 156,model.layers.22.self_attn.v_proj,tx.22.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0732421875,0.099609375,0.1669921875,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.5625
159
+ 157,model.layers.22.self_attn.o_proj,tx.22.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0341796875,0.095703125,0.22021484375,0.099853515625,0.0,0.09375,0.3515625,0.099853515625,0.0,0.0625,0.625
160
+ 158,model.layers.22.mlp.gate_proj,tx.22.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0732421875,0.099609375,0.13134765625,0.0998535230755806,0.0,0.1015625,0.265625,0.0998535230755806,0.0,0.0625,0.625
161
+ 159,model.layers.22.mlp.up_proj,tx.22.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07568359375,0.099609375,0.12744140625,0.0998535230755806,0.0,0.1015625,0.28125,0.0998535230755806,0.0,0.0625,0.6875
162
+ 160,model.layers.22.mlp.down_proj,tx.22.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.06494140625,0.10009765625,0.13525390625,0.0999581515789032,0.0,0.1015625,0.265625,0.0999581515789032,0.0,0.0625,0.625
163
+ 161,model.layers.23.self_attn.q_proj,tx.23.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.07080078125,0.10009765625,0.13330078125,0.099853515625,0.0,0.1015625,0.2421875,0.099853515625,0.0,0.0625,0.625
164
+ 162,model.layers.23.self_attn.k_proj,tx.23.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0732421875,0.099609375,0.1298828125,0.099853515625,0.0,0.1015625,0.40625,0.099853515625,0.0,0.0625,0.5625
165
+ 163,model.layers.23.self_attn.v_proj,tx.23.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07470703125,0.09912109375,0.16552734375,0.099853515625,0.0,0.1015625,0.9921875,0.099853515625,0.0,0.0625,0.5625
166
+ 164,model.layers.23.self_attn.o_proj,tx.23.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.03173828125,0.0947265625,0.22021484375,0.099853515625,0.0,0.09375,0.328125,0.099853515625,0.0,0.0625,0.625
167
+ 165,model.layers.23.mlp.gate_proj,tx.23.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07470703125,0.099609375,0.13037109375,0.0998535230755806,0.0,0.1015625,0.296875,0.0998535230755806,0.0,0.0625,0.5625
168
+ 166,model.layers.23.mlp.up_proj,tx.23.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0703125,0.099609375,0.12890625,0.0998535230755806,0.0,0.1015625,0.3125,0.0998535230755806,0.0,0.0625,0.625
169
+ 167,model.layers.23.mlp.down_proj,tx.23.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.0673828125,0.10009765625,0.13671875,0.0999581515789032,0.0,0.1015625,0.265625,0.0999581515789032,0.0,0.0625,0.6875
170
+ 168,model.layers.24.self_attn.q_proj,tx.24.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.06982421875,0.099609375,0.126953125,0.099853515625,0.0,0.1015625,0.2421875,0.099853515625,0.0,0.0625,0.6875
171
+ 169,model.layers.24.self_attn.k_proj,tx.24.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07470703125,0.099609375,0.13671875,0.099853515625,0.0,0.1015625,0.453125,0.099853515625,0.0,0.0625,0.5
172
+ 170,model.layers.24.self_attn.v_proj,tx.24.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0673828125,0.09912109375,0.1669921875,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.625
173
+ 171,model.layers.24.self_attn.o_proj,tx.24.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.03125,0.087890625,0.2255859375,0.099853515625,0.0,0.09375,0.3671875,0.099853515625,0.0,0.0625,0.6875
174
+ 172,model.layers.24.mlp.gate_proj,tx.24.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07275390625,0.099609375,0.126953125,0.0998535230755806,0.0,0.1015625,0.2734375,0.0998535230755806,0.0,0.0625,0.625
175
+ 173,model.layers.24.mlp.up_proj,tx.24.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0693359375,0.099609375,0.12841796875,0.0998535230755806,0.0,0.1015625,0.296875,0.0998535230755806,0.0,0.0625,0.5625
176
+ 174,model.layers.24.mlp.down_proj,tx.24.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.0654296875,0.10009765625,0.1357421875,0.0999581515789032,0.0,0.1015625,0.265625,0.0999581515789032,0.0,0.0625,0.625
177
+ 175,model.layers.25.self_attn.q_proj,tx.25.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0732421875,0.099609375,0.1298828125,0.099853515625,0.0,0.1015625,0.2578125,0.099853515625,0.0,0.0625,0.625
178
+ 176,model.layers.25.self_attn.k_proj,tx.25.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0712890625,0.099609375,0.13330078125,0.099853515625,0.0,0.1015625,0.3515625,0.099853515625,0.0,0.0625,0.5
179
+ 177,model.layers.25.self_attn.v_proj,tx.25.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07177734375,0.099609375,0.16552734375,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.5625
180
+ 178,model.layers.25.self_attn.o_proj,tx.25.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.02685546875,0.080078125,0.3447265625,0.099853515625,0.0,0.0859375,0.5078125,0.099853515625,0.0,0.0625,0.8125
181
+ 179,model.layers.25.mlp.gate_proj,tx.25.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0712890625,0.099609375,0.12939453125,0.0998535230755806,0.0,0.1015625,0.296875,0.0998535230755806,0.0,0.0625,0.625
182
+ 180,model.layers.25.mlp.up_proj,tx.25.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.072265625,0.099609375,0.13232421875,0.0998535230755806,0.0,0.1015625,0.3125,0.0998535230755806,0.0,0.0625,0.5625
183
+ 181,model.layers.25.mlp.down_proj,tx.25.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.0654296875,0.10009765625,0.140625,0.0999581515789032,0.0,0.1015625,0.265625,0.0999581515789032,0.0,0.0625,0.625
184
+ 182,model.layers.26.self_attn.q_proj,tx.26.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0732421875,0.10009765625,0.1279296875,0.099853515625,0.0,0.1015625,0.2578125,0.099853515625,0.0,0.0625,0.5625
185
+ 183,model.layers.26.self_attn.k_proj,tx.26.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07568359375,0.10009765625,0.130859375,0.099853515625,0.0,0.1015625,0.3828125,0.099853515625,0.0,0.0625,0.5
186
+ 184,model.layers.26.self_attn.v_proj,tx.26.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07373046875,0.099609375,0.1689453125,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.5625
187
+ 185,model.layers.26.self_attn.o_proj,tx.26.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.02978515625,0.0849609375,0.23291015625,0.099853515625,0.0,0.0859375,0.375,0.099853515625,0.0,0.0625,0.6875
188
+ 186,model.layers.26.mlp.gate_proj,tx.26.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.072265625,0.099609375,0.1298828125,0.0998535230755806,0.0,0.1015625,0.296875,0.0998535230755806,0.0,0.0625,0.625
189
+ 187,model.layers.26.mlp.up_proj,tx.26.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0712890625,0.099609375,0.12841796875,0.0998535230755806,0.0,0.1015625,0.28125,0.0998535230755806,0.0,0.0625,0.5625
190
+ 188,model.layers.26.mlp.down_proj,tx.26.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.06396484375,0.10009765625,0.13525390625,0.0999581515789032,0.0,0.1015625,0.265625,0.0999581515789032,0.0,0.0625,0.625
191
+ 189,model.layers.27.self_attn.q_proj,tx.27.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.068359375,0.099609375,0.17431640625,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.5625
192
+ 190,model.layers.27.self_attn.k_proj,tx.27.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07080078125,0.099609375,0.1611328125,0.099853515625,0.0,0.1015625,0.96875,0.099853515625,0.0,0.0625,0.5
193
+ 191,model.layers.27.self_attn.v_proj,tx.27.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0703125,0.099609375,0.16552734375,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.5625
194
+ 192,model.layers.27.self_attn.o_proj,tx.27.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0302734375,0.087890625,0.2216796875,0.099853515625,0.0,0.0859375,0.3671875,0.099853515625,0.0,0.0625,0.6875
195
+ 193,model.layers.27.mlp.gate_proj,tx.27.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07470703125,0.099609375,0.12939453125,0.0998535230755806,0.0,0.1015625,0.28125,0.0998535230755806,0.0,0.0625,0.5625
196
+ 194,model.layers.27.mlp.up_proj,tx.27.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0712890625,0.099609375,0.12744140625,0.0998535230755806,0.0,0.1015625,0.28125,0.0998535230755806,0.0,0.0625,0.5625
197
+ 195,model.layers.27.mlp.down_proj,tx.27.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.068359375,0.10009765625,0.1337890625,0.0999581515789032,0.0,0.1015625,0.265625,0.0999581515789032,0.0,0.0625,0.625
198
+ 196,model.layers.28.self_attn.q_proj,tx.28.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0703125,0.10009765625,0.1279296875,0.099853515625,0.0,0.1015625,0.25,0.099853515625,0.0,0.0625,0.5625
199
+ 197,model.layers.28.self_attn.k_proj,tx.28.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07177734375,0.10009765625,0.12890625,0.099853515625,0.0,0.1015625,0.3515625,0.099853515625,0.0,0.0625,0.5625
200
+ 198,model.layers.28.self_attn.v_proj,tx.28.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0703125,0.099609375,0.17333984375,0.099853515625,0.0,0.1015625,0.9921875,0.099853515625,0.0,0.0625,0.5625
201
+ 199,model.layers.28.self_attn.o_proj,tx.28.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.025390625,0.095703125,0.25634765625,0.099853515625,0.0,0.09375,0.3671875,0.099853515625,0.0,0.0625,0.75
202
+ 200,model.layers.28.mlp.gate_proj,tx.28.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07275390625,0.10009765625,0.130859375,0.0998535230755806,0.0,0.1015625,0.296875,0.0998535230755806,0.0,0.0625,0.625
203
+ 201,model.layers.28.mlp.up_proj,tx.28.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0732421875,0.099609375,0.1318359375,0.0998535230755806,0.0,0.1015625,0.296875,0.0998535230755806,0.0,0.0625,0.625
204
+ 202,model.layers.28.mlp.down_proj,tx.28.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.06396484375,0.10009765625,0.1396484375,0.0999581515789032,0.0,0.1015625,0.265625,0.0999581515789032,0.0,0.0625,0.6875
205
+ 203,model.layers.29.self_attn.q_proj,tx.29.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0693359375,0.099609375,0.18115234375,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.625
206
+ 204,model.layers.29.self_attn.k_proj,tx.29.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0693359375,0.099609375,0.16259765625,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.5625
207
+ 205,model.layers.29.self_attn.v_proj,tx.29.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07666015625,0.09912109375,0.169921875,0.099853515625,0.0,0.1015625,1.0,0.099853515625,0.0,0.0625,0.5625
208
+ 206,model.layers.29.self_attn.o_proj,tx.29.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.01806640625,0.1064453125,0.20068359375,0.099853515625,0.0,0.09375,0.328125,0.099853515625,0.0,0.0625,0.625
209
+ 207,model.layers.29.mlp.gate_proj,tx.29.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0712890625,0.099609375,0.126953125,0.0998535230755806,0.0,0.1015625,0.2734375,0.0998535230755806,0.0,0.0625,0.625
210
+ 208,model.layers.29.mlp.up_proj,tx.29.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.072265625,0.099609375,0.13232421875,0.0998535230755806,0.0,0.1015625,0.2890625,0.0998535230755806,0.0,0.0625,0.625
211
+ 209,model.layers.29.mlp.down_proj,tx.29.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.06689453125,0.10009765625,0.134765625,0.0999581515789032,0.0,0.1015625,0.2578125,0.0999581515789032,0.0,0.0625,0.5625
212
+ 210,model.layers.30.self_attn.q_proj,tx.30.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.07080078125,0.10009765625,0.13427734375,0.099853515625,0.0,0.1015625,0.2578125,0.099853515625,0.0,0.0625,0.625
213
+ 211,model.layers.30.self_attn.k_proj,tx.30.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.06884765625,0.10009765625,0.12646484375,0.099853515625,0.0,0.1015625,0.34375,0.099853515625,0.0,0.0625,0.5625
214
+ 212,model.layers.30.self_attn.v_proj,tx.30.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.07421875,0.09912109375,0.18212890625,0.099853515625,0.0,0.1015625,0.9921875,0.099853515625,0.0,0.0625,0.5625
215
+ 213,model.layers.30.self_attn.o_proj,tx.30.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.0244140625,0.08984375,0.337890625,0.099853515625,0.0,0.0859375,0.4609375,0.099853515625,0.0,0.0625,0.75
216
+ 214,model.layers.30.mlp.gate_proj,tx.30.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.07177734375,0.099609375,0.13134765625,0.0998535230755806,0.0,0.1015625,0.328125,0.0998535230755806,0.0,0.0625,0.625
217
+ 215,model.layers.30.mlp.up_proj,tx.30.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0732421875,0.099609375,0.12841796875,0.0998535230755806,0.0,0.1015625,0.3125,0.0998535230755806,0.0,0.0625,0.625
218
+ 216,model.layers.30.mlp.down_proj,tx.30.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.0625,0.10009765625,0.14111328125,0.0999581515789032,0.0,0.1015625,0.28125,0.0999581515789032,0.0,0.0625,0.625
219
+ 217,model.layers.31.self_attn.q_proj,tx.31.attn.q,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.06982421875,0.099609375,0.1328125,0.099853515625,0.0,0.1015625,0.375,0.099853515625,0.0,0.0625,0.625
220
+ 218,model.layers.31.self_attn.k_proj,tx.31.attn.k,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.06689453125,0.10009765625,0.12646484375,0.099853515625,0.0,0.1015625,0.3828125,0.099853515625,0.0,0.0625,0.5625
221
+ 219,model.layers.31.self_attn.v_proj,tx.31.attn.v,Linear,weight,"[1024, 4096]",4194304,3775488,0.099853515625,"(128, 16)",8 x 256,2048,0.099853515625,0.0673828125,0.099609375,0.15771484375,0.099853515625,0.0,0.1015625,0.90625,0.099853515625,0.0,0.0625,0.5625
222
+ 220,model.layers.31.self_attn.o_proj,tx.31.attn.o,Linear,weight,"[4096, 4096]",16777216,15101952,0.099853515625,"(128, 16)",32 x 256,8192,0.099853515625,0.01025390625,0.08642578125,0.22802734375,0.099853515625,0.0,0.0859375,0.3671875,0.099853515625,0.0,0.0625,0.6875
223
+ 221,model.layers.31.mlp.gate_proj,tx.31.mlp.gate,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.0712890625,0.099609375,0.13037109375,0.0998535230755806,0.0,0.1015625,0.328125,0.0998535230755806,0.0,0.0625,0.6875
224
+ 222,model.layers.31.mlp.up_proj,tx.31.mlp.up,Linear,weight,"[14336, 4096]",58720256,52856832,0.09985345602035522,"(128, 16)",112 x 256,28672,0.0998535230755806,0.06982421875,0.099609375,0.12890625,0.0998535230755806,0.0,0.1015625,0.3125,0.0998535230755806,0.0,0.0625,0.625
225
+ 223,model.layers.31.mlp.down_proj,tx.31.mlp.down,Linear,weight,"[4096, 14336]",58720256,52850688,0.09995812177658081,"(128, 16)",32 x 896,28672,0.0999581515789032,0.05859375,0.10009765625,0.15234375,0.0999581515789032,0.0,0.1015625,0.3359375,0.0999581515789032,0.0,0.0625,0.5625
226
+ 224,lm_head,lm_head,Linear,weight,"[128256, 4096]",525336576,525336576,0.0,"(128, 16)",1002 x 256,256512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sparsity_report_Meta-Llama-3.1-8B-wanda-unstructured-0.2.csv ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ row,layer_id,short_id,layer_type,param_type,shape,nparam,nnz,sparsity,tile_shape,n_tile,n_tile_total,tile_avg,tile_min,tile_med,tile_max,col_avg,col_min,col_med,col_max,row_avg,row_min,row_med,row_max
2
+ 0,model.layers.0.self_attn.q_proj,tx.0.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.02099609375,0.189453125,0.5751953125,0.199951171875,0.0,0.0078125,1.0,0.199951171875,0.0,0.1875,0.75
3
+ 1,model.layers.0.self_attn.k_proj,tx.0.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.0361328125,0.18701171875,0.56884765625,0.199951171875,0.0,0.0078125,1.0,0.199951171875,0.0,0.1875,0.6875
4
+ 2,model.layers.0.self_attn.v_proj,tx.0.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.02880859375,0.19189453125,0.53271484375,0.199951171875,0.0,0.015625,1.0,0.199951171875,0.0,0.1875,0.6875
5
+ 3,model.layers.0.self_attn.o_proj,tx.0.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.01513671875,0.13525390625,0.8046875,0.199951171875,0.0,0.125,0.9453125,0.199951171875,0.0,0.125,1.0
6
+ 4,model.layers.0.mlp.gate_proj,tx.0.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15625,0.197265625,0.31494140625,0.1999511867761612,0.0078125,0.1953125,1.0,0.1999511867761612,0.0,0.1875,0.8125
7
+ 5,model.layers.0.mlp.up_proj,tx.0.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15771484375,0.197265625,0.318359375,0.1999511867761612,0.015625,0.1953125,1.0,0.1999511867761612,0.0,0.1875,0.8125
8
+ 6,model.layers.0.mlp.down_proj,tx.0.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.12841796875,0.2001953125,0.26318359375,0.1999860554933548,0.0,0.1953125,0.4765625,0.1999860554933548,0.0,0.1875,0.8125
9
+ 7,model.layers.1.self_attn.q_proj,tx.1.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.11181640625,0.19677734375,0.39453125,0.199951171875,0.0,0.1953125,1.0,0.199951171875,0.0,0.1875,0.75
10
+ 8,model.layers.1.self_attn.k_proj,tx.1.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.10791015625,0.197265625,0.38427734375,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.6875
11
+ 9,model.layers.1.self_attn.v_proj,tx.1.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.11328125,0.1953125,0.36669921875,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.6875
12
+ 10,model.layers.1.self_attn.o_proj,tx.1.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.07080078125,0.17822265625,0.61328125,0.199951171875,0.0,0.1640625,0.859375,0.199951171875,0.0,0.1875,1.0
13
+ 11,model.layers.1.mlp.gate_proj,tx.1.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.150390625,0.197265625,0.30810546875,0.1999511867761612,0.015625,0.1953125,1.0,0.1999511867761612,0.0,0.1875,0.8125
14
+ 12,model.layers.1.mlp.up_proj,tx.1.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.1591796875,0.197265625,0.31396484375,0.1999511867761612,0.0234375,0.1953125,1.0,0.1999511867761612,0.0,0.1875,0.75
15
+ 13,model.layers.1.mlp.down_proj,tx.1.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.14453125,0.2001953125,0.2685546875,0.1999860554933548,0.0,0.203125,0.4296875,0.1999860554933548,0.0,0.1875,0.75
16
+ 14,model.layers.2.self_attn.q_proj,tx.2.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.1279296875,0.19921875,0.31005859375,0.199951171875,0.0,0.2109375,1.0,0.199951171875,0.0,0.1875,0.8125
17
+ 15,model.layers.2.self_attn.k_proj,tx.2.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.134765625,0.19970703125,0.31103515625,0.199951171875,0.0,0.2109375,1.0,0.199951171875,0.0,0.1875,0.6875
18
+ 16,model.layers.2.self_attn.v_proj,tx.2.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.150390625,0.19873046875,0.3037109375,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.6875
19
+ 17,model.layers.2.self_attn.o_proj,tx.2.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.07861328125,0.17041015625,0.5810546875,0.199951171875,0.0,0.171875,0.765625,0.199951171875,0.0,0.1875,1.0
20
+ 18,model.layers.2.mlp.gate_proj,tx.2.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15771484375,0.197265625,0.3154296875,0.1999511867761612,0.0234375,0.1953125,1.0,0.1999511867761612,0.0,0.1875,0.8125
21
+ 19,model.layers.2.mlp.up_proj,tx.2.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15673828125,0.197265625,0.3212890625,0.1999511867761612,0.03125,0.1953125,1.0,0.1999511867761612,0.0,0.1875,0.8125
22
+ 20,model.layers.2.mlp.down_proj,tx.2.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.13916015625,0.2001953125,0.26025390625,0.1999860554933548,0.0,0.203125,0.5,0.1999860554933548,0.0,0.1875,0.75
23
+ 21,model.layers.3.self_attn.q_proj,tx.3.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.1474609375,0.2001953125,0.28955078125,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.8125
24
+ 22,model.layers.3.self_attn.k_proj,tx.3.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.14306640625,0.19970703125,0.27294921875,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.75
25
+ 23,model.layers.3.self_attn.v_proj,tx.3.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15673828125,0.19921875,0.28173828125,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.75
26
+ 24,model.layers.3.self_attn.o_proj,tx.3.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.099609375,0.1904296875,0.3515625,0.199951171875,0.0,0.1953125,0.6015625,0.199951171875,0.0,0.1875,0.8125
27
+ 25,model.layers.3.mlp.gate_proj,tx.3.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.16064453125,0.1982421875,0.31201171875,0.1999511867761612,0.0078125,0.1953125,1.0,0.1999511867761612,0.0,0.1875,0.75
28
+ 26,model.layers.3.mlp.up_proj,tx.3.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15869140625,0.1982421875,0.31494140625,0.1999511867761612,0.03125,0.1953125,1.0,0.1999511867761612,0.0,0.1875,0.8125
29
+ 27,model.layers.3.mlp.down_proj,tx.3.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.146484375,0.19970703125,0.25634765625,0.1999860554933548,0.0,0.203125,0.4609375,0.1999860554933548,0.0,0.1875,0.8125
30
+ 28,model.layers.4.self_attn.q_proj,tx.4.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.13671875,0.2001953125,0.271484375,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.75
31
+ 29,model.layers.4.self_attn.k_proj,tx.4.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.1435546875,0.2001953125,0.2734375,0.199951171875,0.0,0.2109375,1.0,0.199951171875,0.0,0.1875,0.75
32
+ 30,model.layers.4.self_attn.v_proj,tx.4.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15283203125,0.19921875,0.27734375,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.75
33
+ 31,model.layers.4.self_attn.o_proj,tx.4.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.087890625,0.17578125,0.5419921875,0.199951171875,0.0,0.1796875,0.7265625,0.199951171875,0.0,0.1875,0.9375
34
+ 32,model.layers.4.mlp.gate_proj,tx.4.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15478515625,0.19970703125,0.26953125,0.1999511867761612,0.0,0.203125,1.0,0.1999511867761612,0.0,0.1875,0.75
35
+ 33,model.layers.4.mlp.up_proj,tx.4.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.1572265625,0.19970703125,0.27783203125,0.1999511867761612,0.0078125,0.203125,1.0,0.1999511867761612,0.0,0.1875,0.75
36
+ 34,model.layers.4.mlp.down_proj,tx.4.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.1513671875,0.19970703125,0.25732421875,0.1999860554933548,0.0,0.1953125,0.6484375,0.1999860554933548,0.0,0.1875,0.8125
37
+ 35,model.layers.5.self_attn.q_proj,tx.5.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.13818359375,0.2001953125,0.25634765625,0.199951171875,0.0,0.203125,0.4921875,0.199951171875,0.0,0.1875,0.75
38
+ 36,model.layers.5.self_attn.k_proj,tx.5.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.13134765625,0.19970703125,0.26318359375,0.199951171875,0.0,0.2109375,0.6171875,0.199951171875,0.0,0.1875,0.75
39
+ 37,model.layers.5.self_attn.v_proj,tx.5.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15380859375,0.19970703125,0.275390625,0.199951171875,0.0078125,0.203125,0.9921875,0.199951171875,0.0,0.1875,0.6875
40
+ 38,model.layers.5.self_attn.o_proj,tx.5.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.06640625,0.16162109375,0.48193359375,0.199951171875,0.0,0.171875,0.6796875,0.199951171875,0.0,0.1875,0.9375
41
+ 39,model.layers.5.mlp.gate_proj,tx.5.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15380859375,0.19970703125,0.2666015625,0.1999511867761612,0.0,0.203125,1.0,0.1999511867761612,0.0,0.1875,0.75
42
+ 40,model.layers.5.mlp.up_proj,tx.5.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.16015625,0.19970703125,0.2705078125,0.1999511867761612,0.015625,0.203125,1.0,0.1999511867761612,0.0,0.1875,0.8125
43
+ 41,model.layers.5.mlp.down_proj,tx.5.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.15380859375,0.19970703125,0.2666015625,0.1999860554933548,0.0,0.1953125,0.53125,0.1999860554933548,0.0,0.1875,0.75
44
+ 42,model.layers.6.self_attn.q_proj,tx.6.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.1494140625,0.2001953125,0.24560546875,0.199951171875,0.0,0.203125,0.515625,0.199951171875,0.0,0.1875,0.75
45
+ 43,model.layers.6.self_attn.k_proj,tx.6.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.14990234375,0.2001953125,0.24853515625,0.199951171875,0.0,0.203125,0.6484375,0.199951171875,0.0,0.1875,0.75
46
+ 44,model.layers.6.self_attn.v_proj,tx.6.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.1484375,0.19921875,0.27392578125,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.75
47
+ 45,model.layers.6.self_attn.o_proj,tx.6.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.08984375,0.18798828125,0.3974609375,0.199951171875,0.0,0.1875,0.5859375,0.199951171875,0.0,0.1875,0.8125
48
+ 46,model.layers.6.mlp.gate_proj,tx.6.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15771484375,0.19970703125,0.27001953125,0.1999511867761612,0.0,0.203125,1.0,0.1999511867761612,0.0,0.1875,0.8125
49
+ 47,model.layers.6.mlp.up_proj,tx.6.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15966796875,0.19970703125,0.2744140625,0.1999511867761612,0.015625,0.203125,1.0,0.1999511867761612,0.0,0.1875,0.8125
50
+ 48,model.layers.6.mlp.down_proj,tx.6.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.1533203125,0.19970703125,0.255859375,0.1999860554933548,0.0,0.1953125,0.546875,0.1999860554933548,0.0,0.1875,0.75
51
+ 49,model.layers.7.self_attn.q_proj,tx.7.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.15087890625,0.2001953125,0.24560546875,0.199951171875,0.0,0.203125,0.4453125,0.199951171875,0.0,0.1875,0.8125
52
+ 50,model.layers.7.self_attn.k_proj,tx.7.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.14990234375,0.2001953125,0.25634765625,0.199951171875,0.0,0.203125,0.640625,0.199951171875,0.0,0.1875,0.75
53
+ 51,model.layers.7.self_attn.v_proj,tx.7.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.16015625,0.19921875,0.2705078125,0.199951171875,0.0078125,0.203125,0.9921875,0.199951171875,0.0,0.1875,0.75
54
+ 52,model.layers.7.self_attn.o_proj,tx.7.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.11865234375,0.1962890625,0.31201171875,0.199951171875,0.0,0.203125,0.515625,0.199951171875,0.0,0.1875,0.75
55
+ 53,model.layers.7.mlp.gate_proj,tx.7.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15966796875,0.2001953125,0.2529296875,0.1999511867761612,0.0,0.203125,0.3671875,0.1999511867761612,0.0,0.1875,0.75
56
+ 54,model.layers.7.mlp.up_proj,tx.7.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15869140625,0.2001953125,0.23876953125,0.1999511867761612,0.015625,0.203125,0.390625,0.1999511867761612,0.0,0.1875,0.75
57
+ 55,model.layers.7.mlp.down_proj,tx.7.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.14990234375,0.19970703125,0.25732421875,0.1999860554933548,0.0,0.1953125,0.5546875,0.1999860554933548,0.0,0.1875,0.75
58
+ 56,model.layers.8.self_attn.q_proj,tx.8.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.1396484375,0.2001953125,0.25048828125,0.199951171875,0.0,0.203125,0.4921875,0.199951171875,0.0,0.1875,0.75
59
+ 57,model.layers.8.self_attn.k_proj,tx.8.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.1533203125,0.19970703125,0.248046875,0.199951171875,0.0,0.203125,0.515625,0.199951171875,0.0,0.1875,0.6875
60
+ 58,model.layers.8.self_attn.v_proj,tx.8.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15625,0.19970703125,0.2744140625,0.199951171875,0.0,0.203125,0.9921875,0.199951171875,0.0,0.1875,0.75
61
+ 59,model.layers.8.self_attn.o_proj,tx.8.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.1005859375,0.19384765625,0.3740234375,0.199951171875,0.0,0.1953125,0.5546875,0.199951171875,0.0,0.1875,0.875
62
+ 60,model.layers.8.mlp.gate_proj,tx.8.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15869140625,0.19970703125,0.26806640625,0.1999511867761612,0.0,0.203125,1.0,0.1999511867761612,0.0,0.1875,0.8125
63
+ 61,model.layers.8.mlp.up_proj,tx.8.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15625,0.19970703125,0.2744140625,0.1999511867761612,0.015625,0.203125,1.0,0.1999511867761612,0.0,0.1875,0.8125
64
+ 62,model.layers.8.mlp.down_proj,tx.8.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.14501953125,0.19970703125,0.25537109375,0.1999860554933548,0.0078125,0.1953125,0.5546875,0.1999860554933548,0.0,0.1875,0.8125
65
+ 63,model.layers.9.self_attn.q_proj,tx.9.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.15283203125,0.19970703125,0.24560546875,0.199951171875,0.0,0.203125,0.46875,0.199951171875,0.0,0.1875,0.75
66
+ 64,model.layers.9.self_attn.k_proj,tx.9.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15185546875,0.2001953125,0.23974609375,0.199951171875,0.0,0.203125,0.5390625,0.199951171875,0.0,0.1875,0.6875
67
+ 65,model.layers.9.self_attn.v_proj,tx.9.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.1611328125,0.19921875,0.26318359375,0.199951171875,0.0,0.1953125,0.9921875,0.199951171875,0.0,0.1875,0.75
68
+ 66,model.layers.9.self_attn.o_proj,tx.9.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.0986328125,0.19287109375,0.39111328125,0.199951171875,0.0,0.1953125,0.5546875,0.199951171875,0.0,0.1875,0.8125
69
+ 67,model.layers.9.mlp.gate_proj,tx.9.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.158203125,0.2001953125,0.23974609375,0.1999511867761612,0.0,0.203125,0.4140625,0.1999511867761612,0.0,0.1875,0.8125
70
+ 68,model.layers.9.mlp.up_proj,tx.9.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15966796875,0.2001953125,0.2431640625,0.1999511867761612,0.0078125,0.203125,0.390625,0.1999511867761612,0.0,0.1875,0.75
71
+ 69,model.layers.9.mlp.down_proj,tx.9.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.140625,0.2001953125,0.24658203125,0.1999860554933548,0.0,0.1953125,0.5078125,0.1999860554933548,0.0,0.1875,0.8125
72
+ 70,model.layers.10.self_attn.q_proj,tx.10.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.146484375,0.2001953125,0.2490234375,0.199951171875,0.0,0.203125,0.4453125,0.199951171875,0.0,0.1875,0.75
73
+ 71,model.layers.10.self_attn.k_proj,tx.10.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.1484375,0.2001953125,0.2412109375,0.199951171875,0.0,0.203125,0.546875,0.199951171875,0.0,0.1875,0.6875
74
+ 72,model.layers.10.self_attn.v_proj,tx.10.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.150390625,0.19970703125,0.255859375,0.199951171875,0.0,0.203125,0.9921875,0.199951171875,0.0,0.1875,0.75
75
+ 73,model.layers.10.self_attn.o_proj,tx.10.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.0849609375,0.1787109375,0.43212890625,0.199951171875,0.0,0.1875,0.609375,0.199951171875,0.0,0.1875,0.875
76
+ 74,model.layers.10.mlp.gate_proj,tx.10.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.1611328125,0.2001953125,0.2412109375,0.1999511867761612,0.0,0.203125,0.3828125,0.1999511867761612,0.0,0.1875,0.8125
77
+ 75,model.layers.10.mlp.up_proj,tx.10.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.1640625,0.2001953125,0.23681640625,0.1999511867761612,0.0078125,0.203125,0.3984375,0.1999511867761612,0.0,0.1875,0.75
78
+ 76,model.layers.10.mlp.down_proj,tx.10.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.14599609375,0.2001953125,0.2548828125,0.1999860554933548,0.0,0.203125,0.46875,0.1999860554933548,0.0,0.1875,0.8125
79
+ 77,model.layers.11.self_attn.q_proj,tx.11.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.1484375,0.2001953125,0.2431640625,0.199951171875,0.0,0.203125,0.46875,0.199951171875,0.0,0.1875,0.75
80
+ 78,model.layers.11.self_attn.k_proj,tx.11.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.1572265625,0.19970703125,0.24755859375,0.199951171875,0.0,0.203125,0.6484375,0.199951171875,0.0,0.1875,0.75
81
+ 79,model.layers.11.self_attn.v_proj,tx.11.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.1611328125,0.19970703125,0.25341796875,0.199951171875,0.015625,0.1953125,0.9921875,0.199951171875,0.0,0.1875,0.8125
82
+ 80,model.layers.11.self_attn.o_proj,tx.11.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.1064453125,0.1865234375,0.42578125,0.199951171875,0.0,0.1953125,0.578125,0.199951171875,0.0,0.1875,0.9375
83
+ 81,model.layers.11.mlp.gate_proj,tx.11.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.1484375,0.2001953125,0.240234375,0.1999511867761612,0.0,0.203125,0.390625,0.1999511867761612,0.0,0.1875,0.8125
84
+ 82,model.layers.11.mlp.up_proj,tx.11.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15966796875,0.2001953125,0.23974609375,0.1999511867761612,0.0,0.203125,0.4140625,0.1999511867761612,0.0,0.1875,0.8125
85
+ 83,model.layers.11.mlp.down_proj,tx.11.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.1494140625,0.19970703125,0.24951171875,0.1999860554933548,0.0,0.203125,0.6015625,0.1999860554933548,0.0,0.1875,0.75
86
+ 84,model.layers.12.self_attn.q_proj,tx.12.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.1474609375,0.2001953125,0.24267578125,0.199951171875,0.0,0.203125,0.4609375,0.199951171875,0.0,0.1875,0.75
87
+ 85,model.layers.12.self_attn.k_proj,tx.12.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.1484375,0.2001953125,0.2431640625,0.199951171875,0.0,0.203125,0.6015625,0.199951171875,0.0,0.1875,0.6875
88
+ 86,model.layers.12.self_attn.v_proj,tx.12.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.1630859375,0.19970703125,0.26416015625,0.199951171875,0.0078125,0.1953125,0.9921875,0.199951171875,0.0,0.1875,0.75
89
+ 87,model.layers.12.self_attn.o_proj,tx.12.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.0771484375,0.2001953125,0.32080078125,0.199951171875,0.0,0.203125,0.4921875,0.199951171875,0.0,0.1875,0.8125
90
+ 88,model.layers.12.mlp.gate_proj,tx.12.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15771484375,0.2001953125,0.23876953125,0.1999511867761612,0.0,0.203125,0.375,0.1999511867761612,0.0,0.1875,0.75
91
+ 89,model.layers.12.mlp.up_proj,tx.12.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15576171875,0.2001953125,0.23876953125,0.1999511867761612,0.0078125,0.203125,0.3984375,0.1999511867761612,0.0,0.1875,0.75
92
+ 90,model.layers.12.mlp.down_proj,tx.12.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.14501953125,0.19970703125,0.25341796875,0.1999860554933548,0.0,0.203125,0.484375,0.1999860554933548,0.0,0.1875,0.75
93
+ 91,model.layers.13.self_attn.q_proj,tx.13.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.15234375,0.2001953125,0.2490234375,0.199951171875,0.0,0.203125,0.4609375,0.199951171875,0.0,0.1875,0.75
94
+ 92,model.layers.13.self_attn.k_proj,tx.13.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.146484375,0.19970703125,0.24365234375,0.199951171875,0.0,0.203125,0.6640625,0.199951171875,0.0,0.1875,0.6875
95
+ 93,model.layers.13.self_attn.v_proj,tx.13.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.16357421875,0.19970703125,0.26904296875,0.199951171875,0.015625,0.203125,0.9921875,0.199951171875,0.0,0.1875,0.6875
96
+ 94,model.layers.13.self_attn.o_proj,tx.13.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.095703125,0.193359375,0.3701171875,0.199951171875,0.0,0.1953125,0.5234375,0.199951171875,0.0,0.1875,0.8125
97
+ 95,model.layers.13.mlp.gate_proj,tx.13.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15185546875,0.2001953125,0.2392578125,0.1999511867761612,0.0,0.203125,0.3984375,0.1999511867761612,0.0,0.1875,0.75
98
+ 96,model.layers.13.mlp.up_proj,tx.13.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15771484375,0.2001953125,0.23828125,0.1999511867761612,0.0078125,0.203125,0.390625,0.1999511867761612,0.0,0.1875,0.75
99
+ 97,model.layers.13.mlp.down_proj,tx.13.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.150390625,0.19970703125,0.26416015625,0.1999860554933548,0.0,0.203125,0.546875,0.1999860554933548,0.0,0.1875,0.8125
100
+ 98,model.layers.14.self_attn.q_proj,tx.14.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.14892578125,0.2001953125,0.24365234375,0.199951171875,0.0,0.203125,0.4609375,0.199951171875,0.0,0.1875,0.75
101
+ 99,model.layers.14.self_attn.k_proj,tx.14.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.14794921875,0.19970703125,0.24658203125,0.199951171875,0.0,0.203125,0.6953125,0.199951171875,0.0,0.1875,0.6875
102
+ 100,model.layers.14.self_attn.v_proj,tx.14.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.16015625,0.19970703125,0.2705078125,0.199951171875,0.0,0.203125,0.9921875,0.199951171875,0.0,0.1875,0.75
103
+ 101,model.layers.14.self_attn.o_proj,tx.14.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.10009765625,0.19189453125,0.41015625,0.199951171875,0.0,0.1953125,0.5703125,0.199951171875,0.0,0.1875,0.875
104
+ 102,model.layers.14.mlp.gate_proj,tx.14.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.1552734375,0.2001953125,0.24072265625,0.1999511867761612,0.0,0.203125,0.390625,0.1999511867761612,0.0,0.1875,0.75
105
+ 103,model.layers.14.mlp.up_proj,tx.14.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.16259765625,0.19970703125,0.23486328125,0.1999511867761612,0.0,0.203125,0.40625,0.1999511867761612,0.0,0.1875,0.75
106
+ 104,model.layers.14.mlp.down_proj,tx.14.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.1318359375,0.2001953125,0.2607421875,0.1999860554933548,0.0078125,0.1953125,0.5078125,0.1999860554933548,0.0,0.1875,0.75
107
+ 105,model.layers.15.self_attn.q_proj,tx.15.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.154296875,0.2001953125,0.24609375,0.199951171875,0.0,0.203125,0.4453125,0.199951171875,0.0,0.1875,0.75
108
+ 106,model.layers.15.self_attn.k_proj,tx.15.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15087890625,0.2001953125,0.25244140625,0.199951171875,0.0,0.203125,0.5859375,0.199951171875,0.0,0.1875,0.75
109
+ 107,model.layers.15.self_attn.v_proj,tx.15.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.150390625,0.19970703125,0.27294921875,0.199951171875,0.0078125,0.203125,0.9921875,0.199951171875,0.0,0.1875,0.6875
110
+ 108,model.layers.15.self_attn.o_proj,tx.15.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.07568359375,0.1767578125,0.453125,0.199951171875,0.0,0.1796875,0.6015625,0.199951171875,0.0,0.1875,0.9375
111
+ 109,model.layers.15.mlp.gate_proj,tx.15.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.16015625,0.2001953125,0.24267578125,0.1999511867761612,0.0,0.203125,0.390625,0.1999511867761612,0.0,0.1875,0.8125
112
+ 110,model.layers.15.mlp.up_proj,tx.15.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15625,0.19970703125,0.2373046875,0.1999511867761612,0.0,0.203125,0.3828125,0.1999511867761612,0.0,0.1875,0.75
113
+ 111,model.layers.15.mlp.down_proj,tx.15.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.14794921875,0.19970703125,0.251953125,0.1999860554933548,0.0,0.1953125,0.53125,0.1999860554933548,0.0,0.1875,0.75
114
+ 112,model.layers.16.self_attn.q_proj,tx.16.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.15576171875,0.2001953125,0.2421875,0.199951171875,0.0,0.203125,0.4296875,0.199951171875,0.0,0.1875,0.75
115
+ 113,model.layers.16.self_attn.k_proj,tx.16.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.1533203125,0.2001953125,0.240234375,0.199951171875,0.0,0.203125,0.578125,0.199951171875,0.0,0.1875,0.6875
116
+ 114,model.layers.16.self_attn.v_proj,tx.16.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15283203125,0.19921875,0.26806640625,0.199951171875,0.0,0.203125,0.9921875,0.199951171875,0.0,0.1875,0.8125
117
+ 115,model.layers.16.self_attn.o_proj,tx.16.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.076171875,0.18798828125,0.4033203125,0.199951171875,0.0,0.1875,0.546875,0.199951171875,0.0,0.1875,0.9375
118
+ 116,model.layers.16.mlp.gate_proj,tx.16.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15478515625,0.2001953125,0.24169921875,0.1999511867761612,0.0,0.203125,0.3828125,0.1999511867761612,0.0,0.1875,0.75
119
+ 117,model.layers.16.mlp.up_proj,tx.16.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.16064453125,0.19970703125,0.23681640625,0.1999511867761612,0.0078125,0.203125,0.421875,0.1999511867761612,0.0,0.1875,0.8125
120
+ 118,model.layers.16.mlp.down_proj,tx.16.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.15185546875,0.2001953125,0.2548828125,0.1999860554933548,0.0078125,0.1953125,0.484375,0.1999860554933548,0.0,0.1875,0.8125
121
+ 119,model.layers.17.self_attn.q_proj,tx.17.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.1533203125,0.2001953125,0.24267578125,0.199951171875,0.0,0.203125,0.4609375,0.199951171875,0.0,0.1875,0.75
122
+ 120,model.layers.17.self_attn.k_proj,tx.17.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15966796875,0.19970703125,0.2529296875,0.199951171875,0.0,0.203125,0.59375,0.199951171875,0.0,0.1875,0.6875
123
+ 121,model.layers.17.self_attn.v_proj,tx.17.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.16015625,0.19921875,0.271484375,0.199951171875,0.0078125,0.203125,1.0,0.199951171875,0.0,0.1875,0.75
124
+ 122,model.layers.17.self_attn.o_proj,tx.17.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.0849609375,0.18310546875,0.44189453125,0.199951171875,0.0,0.1875,0.640625,0.199951171875,0.0,0.1875,0.875
125
+ 123,model.layers.17.mlp.gate_proj,tx.17.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.158203125,0.19970703125,0.2392578125,0.1999511867761612,0.0,0.203125,0.421875,0.1999511867761612,0.0,0.1875,0.75
126
+ 124,model.layers.17.mlp.up_proj,tx.17.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.1611328125,0.2001953125,0.2373046875,0.1999511867761612,0.0,0.203125,0.4140625,0.1999511867761612,0.0,0.1875,0.8125
127
+ 125,model.layers.17.mlp.down_proj,tx.17.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.146484375,0.19970703125,0.27392578125,0.1999860554933548,0.0,0.203125,0.625,0.1999860554933548,0.0,0.1875,0.75
128
+ 126,model.layers.18.self_attn.q_proj,tx.18.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.154296875,0.2001953125,0.24560546875,0.199951171875,0.0,0.203125,0.53125,0.199951171875,0.0,0.1875,0.75
129
+ 127,model.layers.18.self_attn.k_proj,tx.18.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15673828125,0.2001953125,0.24462890625,0.199951171875,0.0,0.203125,0.65625,0.199951171875,0.0,0.1875,0.75
130
+ 128,model.layers.18.self_attn.v_proj,tx.18.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15234375,0.19921875,0.27001953125,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.75
131
+ 129,model.layers.18.self_attn.o_proj,tx.18.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.07421875,0.1513671875,0.587890625,0.199951171875,0.0,0.1640625,0.796875,0.199951171875,0.0,0.1875,1.0
132
+ 130,model.layers.18.mlp.gate_proj,tx.18.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.158203125,0.2001953125,0.23974609375,0.1999511867761612,0.0,0.203125,0.4140625,0.1999511867761612,0.0,0.1875,0.8125
133
+ 131,model.layers.18.mlp.up_proj,tx.18.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.162109375,0.19970703125,0.23828125,0.1999511867761612,0.0078125,0.203125,0.4765625,0.1999511867761612,0.0,0.1875,0.75
134
+ 132,model.layers.18.mlp.down_proj,tx.18.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.15283203125,0.2001953125,0.255859375,0.1999860554933548,0.0,0.203125,0.5,0.1999860554933548,0.0,0.1875,0.8125
135
+ 133,model.layers.19.self_attn.q_proj,tx.19.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.15478515625,0.2001953125,0.24267578125,0.199951171875,0.0,0.203125,0.4375,0.199951171875,0.0,0.1875,0.75
136
+ 134,model.layers.19.self_attn.k_proj,tx.19.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15380859375,0.2001953125,0.25,0.199951171875,0.0,0.203125,0.625,0.199951171875,0.0,0.1875,0.6875
137
+ 135,model.layers.19.self_attn.v_proj,tx.19.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15478515625,0.19970703125,0.26953125,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.75
138
+ 136,model.layers.19.self_attn.o_proj,tx.19.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.05419921875,0.18798828125,0.49072265625,0.199951171875,0.0,0.1875,0.6875,0.199951171875,0.0,0.1875,1.0
139
+ 137,model.layers.19.mlp.gate_proj,tx.19.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.16015625,0.2001953125,0.2353515625,0.1999511867761612,0.0,0.203125,0.453125,0.1999511867761612,0.0,0.1875,0.8125
140
+ 138,model.layers.19.mlp.up_proj,tx.19.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.162109375,0.2001953125,0.23681640625,0.1999511867761612,0.0078125,0.203125,0.4765625,0.1999511867761612,0.0,0.1875,0.8125
141
+ 139,model.layers.19.mlp.down_proj,tx.19.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.1435546875,0.19970703125,0.25830078125,0.1999860554933548,0.0,0.203125,0.4609375,0.1999860554933548,0.0,0.1875,0.75
142
+ 140,model.layers.20.self_attn.q_proj,tx.20.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.14892578125,0.2001953125,0.24169921875,0.199951171875,0.0,0.203125,0.4375,0.199951171875,0.0,0.1875,0.75
143
+ 141,model.layers.20.self_attn.k_proj,tx.20.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.1474609375,0.2001953125,0.24755859375,0.199951171875,0.0,0.203125,0.625,0.199951171875,0.0,0.1875,0.6875
144
+ 142,model.layers.20.self_attn.v_proj,tx.20.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15625,0.19970703125,0.28515625,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.75
145
+ 143,model.layers.20.self_attn.o_proj,tx.20.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.0791015625,0.171875,0.5244140625,0.199951171875,0.0,0.171875,0.7890625,0.199951171875,0.0,0.1875,1.0
146
+ 144,model.layers.20.mlp.gate_proj,tx.20.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.16259765625,0.2001953125,0.2392578125,0.1999511867761612,0.0,0.203125,0.4296875,0.1999511867761612,0.0,0.1875,0.8125
147
+ 145,model.layers.20.mlp.up_proj,tx.20.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15771484375,0.19970703125,0.2373046875,0.1999511867761612,0.0,0.203125,0.4921875,0.1999511867761612,0.0,0.1875,0.8125
148
+ 146,model.layers.20.mlp.down_proj,tx.20.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.14990234375,0.2001953125,0.255859375,0.1999860554933548,0.0,0.203125,0.453125,0.1999860554933548,0.0,0.1875,0.8125
149
+ 147,model.layers.21.self_attn.q_proj,tx.21.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.15380859375,0.2001953125,0.24365234375,0.199951171875,0.0,0.203125,0.40625,0.199951171875,0.0,0.1875,0.75
150
+ 148,model.layers.21.self_attn.k_proj,tx.21.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15185546875,0.2001953125,0.2431640625,0.199951171875,0.0,0.203125,0.625,0.199951171875,0.0,0.1875,0.75
151
+ 149,model.layers.21.self_attn.v_proj,tx.21.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15869140625,0.19970703125,0.263671875,0.199951171875,0.0,0.203125,0.9921875,0.199951171875,0.0,0.1875,0.6875
152
+ 150,model.layers.21.self_attn.o_proj,tx.21.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.0634765625,0.169921875,0.57275390625,0.199951171875,0.0,0.1796875,0.8359375,0.199951171875,0.0,0.1875,0.9375
153
+ 151,model.layers.21.mlp.gate_proj,tx.21.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15869140625,0.2001953125,0.24169921875,0.1999511867761612,0.0,0.203125,0.484375,0.1999511867761612,0.0,0.1875,0.8125
154
+ 152,model.layers.21.mlp.up_proj,tx.21.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.162109375,0.19970703125,0.2373046875,0.1999511867761612,0.0078125,0.203125,0.4609375,0.1999511867761612,0.0,0.1875,0.8125
155
+ 153,model.layers.21.mlp.down_proj,tx.21.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.142578125,0.19970703125,0.25390625,0.1999860554933548,0.0,0.203125,0.4296875,0.1999860554933548,0.0,0.1875,0.75
156
+ 154,model.layers.22.self_attn.q_proj,tx.22.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.15283203125,0.2001953125,0.248046875,0.199951171875,0.0,0.203125,0.40625,0.199951171875,0.0,0.1875,0.8125
157
+ 155,model.layers.22.self_attn.k_proj,tx.22.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15380859375,0.2001953125,0.24853515625,0.199951171875,0.0,0.203125,0.6875,0.199951171875,0.0,0.1875,0.625
158
+ 156,model.layers.22.self_attn.v_proj,tx.22.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15283203125,0.19921875,0.267578125,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.75
159
+ 157,model.layers.22.self_attn.o_proj,tx.22.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.07470703125,0.1943359375,0.41650390625,0.199951171875,0.0,0.1875,0.59375,0.199951171875,0.0,0.1875,0.875
160
+ 158,model.layers.22.mlp.gate_proj,tx.22.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.1572265625,0.2001953125,0.2373046875,0.1999511867761612,0.0,0.203125,0.4296875,0.1999511867761612,0.0,0.1875,0.8125
161
+ 159,model.layers.22.mlp.up_proj,tx.22.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.1650390625,0.2001953125,0.23876953125,0.1999511867761612,0.0,0.203125,0.484375,0.1999511867761612,0.0,0.1875,0.8125
162
+ 160,model.layers.22.mlp.down_proj,tx.22.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.14306640625,0.19970703125,0.25537109375,0.1999860554933548,0.0,0.203125,0.4375,0.1999860554933548,0.0,0.1875,0.75
163
+ 161,model.layers.23.self_attn.q_proj,tx.23.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.14892578125,0.2001953125,0.24560546875,0.199951171875,0.0,0.203125,0.40625,0.199951171875,0.0,0.1875,0.75
164
+ 162,model.layers.23.self_attn.k_proj,tx.23.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15380859375,0.2001953125,0.244140625,0.199951171875,0.0,0.203125,0.6328125,0.199951171875,0.0,0.1875,0.75
165
+ 163,model.layers.23.self_attn.v_proj,tx.23.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15966796875,0.2001953125,0.26953125,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.75
166
+ 164,model.layers.23.self_attn.o_proj,tx.23.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.06884765625,0.19287109375,0.3935546875,0.199951171875,0.0,0.1875,0.5546875,0.199951171875,0.0,0.1875,0.875
167
+ 165,model.layers.23.mlp.gate_proj,tx.23.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.16015625,0.2001953125,0.2392578125,0.1999511867761612,0.0,0.203125,0.5,0.1999511867761612,0.0,0.1875,0.8125
168
+ 166,model.layers.23.mlp.up_proj,tx.23.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.162109375,0.2001953125,0.24072265625,0.1999511867761612,0.0,0.203125,0.5,0.1999511867761612,0.0,0.1875,0.8125
169
+ 167,model.layers.23.mlp.down_proj,tx.23.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.13720703125,0.2001953125,0.255859375,0.1999860554933548,0.0,0.203125,0.4453125,0.1999860554933548,0.0,0.1875,0.75
170
+ 168,model.layers.24.self_attn.q_proj,tx.24.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.15234375,0.19970703125,0.244140625,0.199951171875,0.0,0.203125,0.4140625,0.199951171875,0.0,0.1875,0.75
171
+ 169,model.layers.24.self_attn.k_proj,tx.24.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15380859375,0.2001953125,0.2470703125,0.199951171875,0.0,0.203125,0.65625,0.199951171875,0.0,0.1875,0.6875
172
+ 170,model.layers.24.self_attn.v_proj,tx.24.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15380859375,0.19970703125,0.27197265625,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.6875
173
+ 171,model.layers.24.self_attn.o_proj,tx.24.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.06396484375,0.17626953125,0.43017578125,0.199951171875,0.0,0.1796875,0.609375,0.199951171875,0.0,0.1875,0.9375
174
+ 172,model.layers.24.mlp.gate_proj,tx.24.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.1591796875,0.2001953125,0.240234375,0.1999511867761612,0.0,0.203125,0.4453125,0.1999511867761612,0.0,0.1875,0.75
175
+ 173,model.layers.24.mlp.up_proj,tx.24.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15966796875,0.2001953125,0.2431640625,0.1999511867761612,0.0,0.203125,0.4765625,0.1999511867761612,0.0,0.1875,0.8125
176
+ 174,model.layers.24.mlp.down_proj,tx.24.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.1455078125,0.2001953125,0.25244140625,0.1999860554933548,0.0,0.203125,0.4140625,0.1999860554933548,0.0,0.1875,0.8125
177
+ 175,model.layers.25.self_attn.q_proj,tx.25.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.14990234375,0.2001953125,0.24755859375,0.199951171875,0.0,0.203125,0.390625,0.199951171875,0.0,0.1875,0.75
178
+ 176,model.layers.25.self_attn.k_proj,tx.25.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.14990234375,0.2001953125,0.248046875,0.199951171875,0.0,0.203125,0.578125,0.199951171875,0.0,0.1875,0.6875
179
+ 177,model.layers.25.self_attn.v_proj,tx.25.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15087890625,0.19970703125,0.27392578125,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.6875
180
+ 178,model.layers.25.self_attn.o_proj,tx.25.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.06201171875,0.162109375,0.609375,0.199951171875,0.0,0.171875,0.8046875,0.199951171875,0.0,0.1875,1.0
181
+ 179,model.layers.25.mlp.gate_proj,tx.25.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15576171875,0.19970703125,0.24365234375,0.1999511867761612,0.0,0.203125,0.4453125,0.1999511867761612,0.0,0.1875,0.8125
182
+ 180,model.layers.25.mlp.up_proj,tx.25.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15283203125,0.19970703125,0.2431640625,0.1999511867761612,0.0,0.203125,0.4921875,0.1999511867761612,0.0,0.1875,0.8125
183
+ 181,model.layers.25.mlp.down_proj,tx.25.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.1435546875,0.2001953125,0.25732421875,0.1999860554933548,0.0,0.203125,0.4296875,0.1999860554933548,0.0,0.1875,0.8125
184
+ 182,model.layers.26.self_attn.q_proj,tx.26.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.150390625,0.2001953125,0.248046875,0.199951171875,0.0,0.203125,0.4140625,0.199951171875,0.0,0.1875,0.75
185
+ 183,model.layers.26.self_attn.k_proj,tx.26.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15234375,0.2001953125,0.25146484375,0.199951171875,0.0,0.203125,0.6640625,0.199951171875,0.0,0.1875,0.6875
186
+ 184,model.layers.26.self_attn.v_proj,tx.26.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.1591796875,0.19970703125,0.275390625,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.6875
187
+ 185,model.layers.26.self_attn.o_proj,tx.26.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.068359375,0.169921875,0.4501953125,0.199951171875,0.0,0.171875,0.6015625,0.199951171875,0.0,0.1875,0.9375
188
+ 186,model.layers.26.mlp.gate_proj,tx.26.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15966796875,0.2001953125,0.2392578125,0.1999511867761612,0.0,0.203125,0.484375,0.1999511867761612,0.0,0.1875,0.8125
189
+ 187,model.layers.26.mlp.up_proj,tx.26.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.1533203125,0.19970703125,0.2421875,0.1999511867761612,0.0,0.203125,0.4921875,0.1999511867761612,0.0,0.1875,0.8125
190
+ 188,model.layers.26.mlp.down_proj,tx.26.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.13818359375,0.2001953125,0.251953125,0.1999860554933548,0.0,0.203125,0.4375,0.1999860554933548,0.0,0.1875,0.8125
191
+ 189,model.layers.27.self_attn.q_proj,tx.27.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.15087890625,0.2001953125,0.27099609375,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.75
192
+ 190,model.layers.27.self_attn.k_proj,tx.27.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.14697265625,0.19970703125,0.271484375,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.75
193
+ 191,model.layers.27.self_attn.v_proj,tx.27.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15673828125,0.19970703125,0.26953125,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.6875
194
+ 192,model.layers.27.self_attn.o_proj,tx.27.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.06787109375,0.18115234375,0.42138671875,0.199951171875,0.0,0.1796875,0.6328125,0.199951171875,0.0,0.1875,0.875
195
+ 193,model.layers.27.mlp.gate_proj,tx.27.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.162109375,0.2001953125,0.24072265625,0.1999511867761612,0.0,0.203125,0.4609375,0.1999511867761612,0.0,0.1875,0.75
196
+ 194,model.layers.27.mlp.up_proj,tx.27.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.15673828125,0.2001953125,0.236328125,0.1999511867761612,0.0,0.203125,0.5234375,0.1999511867761612,0.0,0.1875,0.8125
197
+ 195,model.layers.27.mlp.down_proj,tx.27.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.14013671875,0.2001953125,0.2509765625,0.1999860554933548,0.0,0.203125,0.4296875,0.1999860554933548,0.0,0.1875,0.75
198
+ 196,model.layers.28.self_attn.q_proj,tx.28.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.14794921875,0.2001953125,0.24853515625,0.199951171875,0.0,0.203125,0.3828125,0.199951171875,0.0,0.1875,0.75
199
+ 197,model.layers.28.self_attn.k_proj,tx.28.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15234375,0.2001953125,0.25,0.199951171875,0.0,0.203125,0.6171875,0.199951171875,0.0,0.1875,0.75
200
+ 198,model.layers.28.self_attn.v_proj,tx.28.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15869140625,0.19921875,0.2744140625,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.75
201
+ 199,model.layers.28.self_attn.o_proj,tx.28.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.05908203125,0.19091796875,0.46923828125,0.199951171875,0.0,0.1875,0.6015625,0.199951171875,0.0,0.1875,0.875
202
+ 200,model.layers.28.mlp.gate_proj,tx.28.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.14990234375,0.2001953125,0.2392578125,0.1999511867761612,0.0,0.203125,0.453125,0.1999511867761612,0.0,0.1875,0.75
203
+ 201,model.layers.28.mlp.up_proj,tx.28.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.1572265625,0.19970703125,0.23876953125,0.1999511867761612,0.0,0.203125,0.4375,0.1999511867761612,0.0,0.1875,0.8125
204
+ 202,model.layers.28.mlp.down_proj,tx.28.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.1357421875,0.2001953125,0.25927734375,0.1999860554933548,0.0,0.2109375,0.4140625,0.1999860554933548,0.0,0.1875,0.75
205
+ 203,model.layers.29.self_attn.q_proj,tx.29.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.15234375,0.2001953125,0.291015625,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.75
206
+ 204,model.layers.29.self_attn.k_proj,tx.29.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.14599609375,0.20068359375,0.26904296875,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.6875
207
+ 205,model.layers.29.self_attn.v_proj,tx.29.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.15966796875,0.19921875,0.27099609375,0.199951171875,0.0078125,0.203125,1.0,0.199951171875,0.0,0.1875,0.8125
208
+ 206,model.layers.29.self_attn.o_proj,tx.29.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.04248046875,0.21533203125,0.38720703125,0.199951171875,0.0,0.1953125,0.546875,0.199951171875,0.0,0.1875,0.9375
209
+ 207,model.layers.29.mlp.gate_proj,tx.29.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.1552734375,0.19970703125,0.23876953125,0.1999511867761612,0.0,0.203125,0.484375,0.1999511867761612,0.0,0.1875,0.8125
210
+ 208,model.layers.29.mlp.up_proj,tx.29.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.158203125,0.2001953125,0.24072265625,0.1999511867761612,0.0,0.203125,0.4453125,0.1999511867761612,0.0,0.1875,0.75
211
+ 209,model.layers.29.mlp.down_proj,tx.29.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.14453125,0.2001953125,0.2587890625,0.1999860554933548,0.0,0.2109375,0.421875,0.1999860554933548,0.0,0.1875,0.8125
212
+ 210,model.layers.30.self_attn.q_proj,tx.30.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.14404296875,0.2001953125,0.24462890625,0.199951171875,0.0,0.203125,0.390625,0.199951171875,0.0,0.1875,0.75
213
+ 211,model.layers.30.self_attn.k_proj,tx.30.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.14453125,0.2001953125,0.25,0.199951171875,0.0,0.203125,0.59375,0.199951171875,0.0,0.1875,0.75
214
+ 212,model.layers.30.self_attn.v_proj,tx.30.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.1513671875,0.19921875,0.28173828125,0.199951171875,0.0,0.203125,1.0,0.199951171875,0.0,0.1875,0.75
215
+ 213,model.layers.30.self_attn.o_proj,tx.30.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.05322265625,0.17919921875,0.6240234375,0.199951171875,0.0,0.1796875,0.7421875,0.199951171875,0.0,0.1875,1.0
216
+ 214,model.layers.30.mlp.gate_proj,tx.30.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.162109375,0.2001953125,0.24609375,0.1999511867761612,0.0,0.203125,0.5703125,0.1999511867761612,0.0,0.1875,0.75
217
+ 215,model.layers.30.mlp.up_proj,tx.30.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.1552734375,0.2001953125,0.2412109375,0.1999511867761612,0.0,0.203125,0.515625,0.1999511867761612,0.0,0.1875,0.8125
218
+ 216,model.layers.30.mlp.down_proj,tx.30.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.13427734375,0.2001953125,0.25927734375,0.1999860554933548,0.0,0.2109375,0.46875,0.1999860554933548,0.0,0.1875,0.75
219
+ 217,model.layers.31.self_attn.q_proj,tx.31.attn.q,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.146484375,0.19970703125,0.24951171875,0.199951171875,0.0,0.203125,0.625,0.199951171875,0.0,0.1875,0.75
220
+ 218,model.layers.31.self_attn.k_proj,tx.31.attn.k,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.14404296875,0.19970703125,0.23974609375,0.199951171875,0.0,0.203125,0.6484375,0.199951171875,0.0,0.1875,0.75
221
+ 219,model.layers.31.self_attn.v_proj,tx.31.attn.v,Linear,weight,"[1024, 4096]",4194304,3355648,0.199951171875,"(128, 16)",8 x 256,2048,0.199951171875,0.1357421875,0.19970703125,0.26953125,0.199951171875,0.0,0.203125,0.9921875,0.199951171875,0.0,0.1875,0.6875
222
+ 220,model.layers.31.self_attn.o_proj,tx.31.attn.o,Linear,weight,"[4096, 4096]",16777216,13422592,0.199951171875,"(128, 16)",32 x 256,8192,0.199951171875,0.02783203125,0.1728515625,0.435546875,0.199951171875,0.0,0.1796875,0.65625,0.199951171875,0.0,0.1875,0.9375
223
+ 221,model.layers.31.mlp.gate_proj,tx.31.mlp.gate,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.162109375,0.2001953125,0.2470703125,0.1999511867761612,0.0078125,0.203125,0.5546875,0.1999511867761612,0.0,0.1875,0.8125
224
+ 222,model.layers.31.mlp.up_proj,tx.31.mlp.up,Linear,weight,"[14336, 4096]",58720256,46979072,0.19995111227035522,"(128, 16)",112 x 256,28672,0.1999511867761612,0.1591796875,0.2001953125,0.24072265625,0.1999511867761612,0.0078125,0.203125,0.53125,0.1999511867761612,0.0,0.1875,0.8125
225
+ 223,model.layers.31.mlp.down_proj,tx.31.mlp.down,Linear,weight,"[4096, 14336]",58720256,46977024,0.1999860405921936,"(128, 16)",32 x 896,28672,0.1999860554933548,0.12353515625,0.2001953125,0.283203125,0.1999860554933548,0.0,0.21875,0.5703125,0.1999860554933548,0.0,0.1875,0.8125
226
+ 224,lm_head,lm_head,Linear,weight,"[128256, 4096]",525336576,525336576,0.0,"(128, 16)",1002 x 256,256512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sparsity_report_Meta-Llama-3.1-8B-wanda-unstructured-0.3.csv ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ row,layer_id,short_id,layer_type,param_type,shape,nparam,nnz,sparsity,tile_shape,n_tile,n_tile_total,tile_avg,tile_min,tile_med,tile_max,col_avg,col_min,col_med,col_max,row_avg,row_min,row_med,row_max
2
+ 0,model.layers.0.self_attn.q_proj,tx.0.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.06787109375,0.29833984375,0.66650390625,0.2998046875,0.0,0.03125,1.0,0.2998046875,0.0,0.3125,0.875
3
+ 1,model.layers.0.self_attn.k_proj,tx.0.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.0712890625,0.2978515625,0.66748046875,0.2998046875,0.0,0.03125,1.0,0.2998046875,0.0,0.3125,0.8125
4
+ 2,model.layers.0.self_attn.v_proj,tx.0.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.0791015625,0.29345703125,0.62255859375,0.2998046875,0.0,0.0546875,1.0,0.2998046875,0.0,0.3125,0.8125
5
+ 3,model.layers.0.self_attn.o_proj,tx.0.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.0283203125,0.21826171875,0.9521484375,0.2998046875,0.0,0.2109375,1.0,0.2998046875,0.0,0.25,1.0
6
+ 4,model.layers.0.mlp.gate_proj,tx.0.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.2431640625,0.2978515625,0.40576171875,0.2998046875,0.015625,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
7
+ 5,model.layers.0.mlp.up_proj,tx.0.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.25244140625,0.2978515625,0.4130859375,0.2998046875,0.0234375,0.296875,1.0,0.2998046875,0.0,0.3125,0.9375
8
+ 6,model.layers.0.mlp.down_proj,tx.0.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.2001953125,0.30029296875,0.38623046875,0.2999442219734192,0.0,0.3046875,0.671875,0.2999442219734192,0.0,0.3125,0.9375
9
+ 7,model.layers.1.self_attn.q_proj,tx.1.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.17529296875,0.29833984375,0.5009765625,0.2998046875,0.0,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
10
+ 8,model.layers.1.self_attn.k_proj,tx.1.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.16796875,0.2998046875,0.482421875,0.2998046875,0.0,0.3125,1.0,0.2998046875,0.0,0.3125,0.875
11
+ 9,model.layers.1.self_attn.v_proj,tx.1.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.1787109375,0.2978515625,0.4599609375,0.2998046875,0.0,0.3125,1.0,0.2998046875,0.0,0.3125,0.8125
12
+ 10,model.layers.1.self_attn.o_proj,tx.1.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.11083984375,0.2783203125,0.796875,0.2998046875,0.0078125,0.2578125,0.9921875,0.2998046875,0.0,0.25,1.0
13
+ 11,model.layers.1.mlp.gate_proj,tx.1.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.2421875,0.2978515625,0.40185546875,0.2998046875,0.046875,0.296875,1.0,0.2998046875,0.0,0.3125,0.9375
14
+ 12,model.layers.1.mlp.up_proj,tx.1.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.25048828125,0.2978515625,0.39892578125,0.2998046875,0.046875,0.296875,1.0,0.2998046875,0.0,0.3125,0.9375
15
+ 13,model.layers.1.mlp.down_proj,tx.1.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.22607421875,0.30078125,0.38818359375,0.2999442219734192,0.0,0.3125,0.5859375,0.2999442219734192,0.0,0.3125,0.9375
16
+ 14,model.layers.2.self_attn.q_proj,tx.2.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.1982421875,0.2998046875,0.4267578125,0.2998046875,0.0,0.3203125,1.0,0.2998046875,0.0,0.3125,0.875
17
+ 15,model.layers.2.self_attn.k_proj,tx.2.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.20703125,0.30029296875,0.43310546875,0.2998046875,0.0,0.3203125,1.0,0.2998046875,0.0,0.3125,0.8125
18
+ 16,model.layers.2.self_attn.v_proj,tx.2.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.2265625,0.29931640625,0.41015625,0.2998046875,0.015625,0.3125,1.0,0.2998046875,0.0,0.3125,0.875
19
+ 17,model.layers.2.self_attn.o_proj,tx.2.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.125,0.2587890625,0.783203125,0.2998046875,0.0,0.2578125,0.953125,0.2998046875,0.0,0.25,1.0
20
+ 18,model.layers.2.mlp.gate_proj,tx.2.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.25146484375,0.2978515625,0.40869140625,0.2998046875,0.0390625,0.296875,1.0,0.2998046875,0.0,0.3125,0.875
21
+ 19,model.layers.2.mlp.up_proj,tx.2.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.244140625,0.2978515625,0.4052734375,0.2998046875,0.0703125,0.296875,1.0,0.2998046875,0.0,0.3125,0.875
22
+ 20,model.layers.2.mlp.down_proj,tx.2.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.22119140625,0.2998046875,0.39111328125,0.2999442219734192,0.0078125,0.3046875,0.65625,0.2999442219734192,0.0,0.3125,0.9375
23
+ 21,model.layers.3.self_attn.q_proj,tx.3.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.2314453125,0.30029296875,0.3857421875,0.2998046875,0.0,0.3125,1.0,0.2998046875,0.0,0.3125,0.875
24
+ 22,model.layers.3.self_attn.k_proj,tx.3.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.21630859375,0.30029296875,0.380859375,0.2998046875,0.0,0.3125,1.0,0.2998046875,0.0,0.3125,0.8125
25
+ 23,model.layers.3.self_attn.v_proj,tx.3.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.2451171875,0.29931640625,0.3828125,0.2998046875,0.015625,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
26
+ 24,model.layers.3.self_attn.o_proj,tx.3.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.14453125,0.287109375,0.521484375,0.2998046875,0.0078125,0.2890625,0.7421875,0.2998046875,0.0,0.3125,1.0
27
+ 25,model.layers.3.mlp.gate_proj,tx.3.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24853515625,0.29833984375,0.39892578125,0.2998046875,0.0234375,0.296875,1.0,0.2998046875,0.0,0.3125,0.875
28
+ 26,model.layers.3.mlp.up_proj,tx.3.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24609375,0.29833984375,0.40087890625,0.2998046875,0.0703125,0.296875,1.0,0.2998046875,0.0,0.3125,0.875
29
+ 27,model.layers.3.mlp.down_proj,tx.3.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.22705078125,0.29931640625,0.3720703125,0.2999442219734192,0.0,0.3046875,0.609375,0.2999442219734192,0.0,0.3125,0.875
30
+ 28,model.layers.4.self_attn.q_proj,tx.4.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.2177734375,0.30029296875,0.373046875,0.2998046875,0.0,0.3125,1.0,0.2998046875,0.0,0.3125,0.875
31
+ 29,model.layers.4.self_attn.k_proj,tx.4.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.2197265625,0.30029296875,0.3916015625,0.2998046875,0.0,0.3125,1.0,0.2998046875,0.0,0.3125,0.875
32
+ 30,model.layers.4.self_attn.v_proj,tx.4.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.23974609375,0.2998046875,0.384765625,0.2998046875,0.0078125,0.3046875,1.0,0.2998046875,0.0,0.3125,0.8125
33
+ 31,model.layers.4.self_attn.o_proj,tx.4.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.14208984375,0.2685546875,0.74365234375,0.2998046875,0.0,0.265625,0.8984375,0.2998046875,0.0,0.25,1.0
34
+ 32,model.layers.4.mlp.gate_proj,tx.4.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24560546875,0.2998046875,0.37841796875,0.2998046875,0.0078125,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
35
+ 33,model.layers.4.mlp.up_proj,tx.4.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.25244140625,0.2998046875,0.37353515625,0.2998046875,0.0625,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
36
+ 34,model.layers.4.mlp.down_proj,tx.4.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.23291015625,0.30029296875,0.37158203125,0.2999442219734192,0.0078125,0.296875,0.8359375,0.2999442219734192,0.0,0.3125,0.9375
37
+ 35,model.layers.5.self_attn.q_proj,tx.5.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.21484375,0.30029296875,0.37890625,0.2998046875,0.0,0.3125,0.6796875,0.2998046875,0.0,0.3125,0.875
38
+ 36,model.layers.5.self_attn.k_proj,tx.5.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.2158203125,0.30029296875,0.3759765625,0.2998046875,0.0,0.3125,0.75,0.2998046875,0.0,0.3125,0.875
39
+ 37,model.layers.5.self_attn.v_proj,tx.5.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.24560546875,0.2998046875,0.3876953125,0.2998046875,0.015625,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
40
+ 38,model.layers.5.self_attn.o_proj,tx.5.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.10009765625,0.2470703125,0.70556640625,0.2998046875,0.0,0.265625,0.859375,0.2998046875,0.0,0.25,1.0
41
+ 39,model.layers.5.mlp.gate_proj,tx.5.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.23828125,0.2998046875,0.36669921875,0.2998046875,0.0,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
42
+ 40,model.layers.5.mlp.up_proj,tx.5.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.244140625,0.2998046875,0.38037109375,0.2998046875,0.0546875,0.296875,1.0,0.2998046875,0.0,0.3125,0.9375
43
+ 41,model.layers.5.mlp.down_proj,tx.5.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.24267578125,0.2998046875,0.37939453125,0.2999442219734192,0.0,0.296875,0.6953125,0.2999442219734192,0.0,0.3125,0.875
44
+ 42,model.layers.6.self_attn.q_proj,tx.6.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.22998046875,0.2998046875,0.37109375,0.2998046875,0.0,0.3125,0.6484375,0.2998046875,0.0,0.3125,0.875
45
+ 43,model.layers.6.self_attn.k_proj,tx.6.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.2216796875,0.2998046875,0.3720703125,0.2998046875,0.0,0.3125,0.71875,0.2998046875,0.0,0.3125,0.875
46
+ 44,model.layers.6.self_attn.v_proj,tx.6.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.236328125,0.29931640625,0.37255859375,0.2998046875,0.0078125,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
47
+ 45,model.layers.6.self_attn.o_proj,tx.6.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.13427734375,0.28369140625,0.56689453125,0.2998046875,0.0078125,0.28125,0.7890625,0.2998046875,0.0,0.3125,1.0
48
+ 46,model.layers.6.mlp.gate_proj,tx.6.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24462890625,0.2998046875,0.3681640625,0.2998046875,0.0078125,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
49
+ 47,model.layers.6.mlp.up_proj,tx.6.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.248046875,0.2998046875,0.373046875,0.2998046875,0.046875,0.296875,1.0,0.2998046875,0.0,0.3125,0.9375
50
+ 48,model.layers.6.mlp.down_proj,tx.6.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.23876953125,0.2998046875,0.37548828125,0.2999442219734192,0.0,0.296875,0.734375,0.2999442219734192,0.0,0.3125,0.875
51
+ 49,model.layers.7.self_attn.q_proj,tx.7.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.22900390625,0.2998046875,0.3681640625,0.2998046875,0.0,0.3046875,0.65625,0.2998046875,0.0,0.3125,0.875
52
+ 50,model.layers.7.self_attn.k_proj,tx.7.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.23876953125,0.30078125,0.3642578125,0.2998046875,0.0,0.3125,0.734375,0.2998046875,0.0,0.3125,0.875
53
+ 51,model.layers.7.self_attn.v_proj,tx.7.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.2412109375,0.29931640625,0.369140625,0.2998046875,0.0078125,0.3046875,0.9921875,0.2998046875,0.0,0.3125,0.875
54
+ 52,model.layers.7.self_attn.o_proj,tx.7.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.19091796875,0.294921875,0.45068359375,0.2998046875,0.0,0.3046875,0.7265625,0.2998046875,0.0,0.3125,0.875
55
+ 53,model.layers.7.mlp.gate_proj,tx.7.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.244140625,0.30029296875,0.34375,0.2998046875,0.0078125,0.3046875,0.515625,0.2998046875,0.0,0.3125,0.875
56
+ 54,model.layers.7.mlp.up_proj,tx.7.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.23681640625,0.2998046875,0.3505859375,0.2998046875,0.03125,0.3046875,0.5,0.2998046875,0.0,0.3125,0.9375
57
+ 55,model.layers.7.mlp.down_proj,tx.7.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.23388671875,0.2998046875,0.37109375,0.2999442219734192,0.0,0.296875,0.7265625,0.2999442219734192,0.0,0.3125,0.9375
58
+ 56,model.layers.8.self_attn.q_proj,tx.8.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.23095703125,0.2998046875,0.3603515625,0.2998046875,0.0,0.3046875,0.6171875,0.2998046875,0.0,0.3125,0.875
59
+ 57,model.layers.8.self_attn.k_proj,tx.8.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.22900390625,0.2998046875,0.357421875,0.2998046875,0.0,0.3046875,0.6484375,0.2998046875,0.0,0.3125,0.875
60
+ 58,model.layers.8.self_attn.v_proj,tx.8.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.24462890625,0.2998046875,0.37109375,0.2998046875,0.015625,0.3046875,0.9921875,0.2998046875,0.0,0.3125,0.8125
61
+ 59,model.layers.8.self_attn.o_proj,tx.8.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.15673828125,0.29248046875,0.51708984375,0.2998046875,0.0,0.2890625,0.6953125,0.2998046875,0.0,0.3125,0.9375
62
+ 60,model.layers.8.mlp.gate_proj,tx.8.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24267578125,0.30029296875,0.37060546875,0.2998046875,0.0,0.3046875,1.0,0.2998046875,0.0,0.3125,0.9375
63
+ 61,model.layers.8.mlp.up_proj,tx.8.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24560546875,0.2998046875,0.3681640625,0.2998046875,0.0234375,0.296875,1.0,0.2998046875,0.0,0.3125,0.875
64
+ 62,model.layers.8.mlp.down_proj,tx.8.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.2353515625,0.2998046875,0.37060546875,0.2999442219734192,0.03125,0.296875,0.703125,0.2999442219734192,0.0,0.3125,0.9375
65
+ 63,model.layers.9.self_attn.q_proj,tx.9.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.2314453125,0.30029296875,0.35693359375,0.2998046875,0.0,0.3046875,0.625,0.2998046875,0.0,0.3125,0.875
66
+ 64,model.layers.9.self_attn.k_proj,tx.9.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.23291015625,0.2998046875,0.35595703125,0.2998046875,0.0,0.3046875,0.71875,0.2998046875,0.0,0.3125,0.8125
67
+ 65,model.layers.9.self_attn.v_proj,tx.9.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.24609375,0.29931640625,0.36669921875,0.2998046875,0.0078125,0.296875,0.9921875,0.2998046875,0.0,0.3125,0.875
68
+ 66,model.layers.9.self_attn.o_proj,tx.9.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.15771484375,0.29052734375,0.54150390625,0.2998046875,0.0078125,0.296875,0.7265625,0.2998046875,0.0,0.3125,0.9375
69
+ 67,model.layers.9.mlp.gate_proj,tx.9.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.23828125,0.30029296875,0.34619140625,0.2998046875,0.0,0.3046875,0.5078125,0.2998046875,0.0,0.3125,0.9375
70
+ 68,model.layers.9.mlp.up_proj,tx.9.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24853515625,0.30029296875,0.34228515625,0.2998046875,0.015625,0.3046875,0.5234375,0.2998046875,0.0,0.3125,0.875
71
+ 69,model.layers.9.mlp.down_proj,tx.9.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.2275390625,0.2998046875,0.36181640625,0.2999442219734192,0.0,0.296875,0.671875,0.2999442219734192,0.0,0.3125,0.875
72
+ 70,model.layers.10.self_attn.q_proj,tx.10.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.2314453125,0.30029296875,0.36181640625,0.2998046875,0.0,0.3046875,0.5859375,0.2998046875,0.0,0.3125,0.875
73
+ 71,model.layers.10.self_attn.k_proj,tx.10.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.22998046875,0.30029296875,0.3583984375,0.2998046875,0.0,0.3046875,0.7109375,0.2998046875,0.0,0.3125,0.875
74
+ 72,model.layers.10.self_attn.v_proj,tx.10.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.236328125,0.2998046875,0.357421875,0.2998046875,0.0078125,0.296875,0.9921875,0.2998046875,0.0,0.3125,0.8125
75
+ 73,model.layers.10.self_attn.o_proj,tx.10.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.1318359375,0.27099609375,0.5927734375,0.2998046875,0.0,0.28125,0.765625,0.2998046875,0.0,0.3125,1.0
76
+ 74,model.layers.10.mlp.gate_proj,tx.10.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.2451171875,0.30029296875,0.3466796875,0.2998046875,0.0078125,0.3046875,0.515625,0.2998046875,0.0,0.3125,0.9375
77
+ 75,model.layers.10.mlp.up_proj,tx.10.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.2529296875,0.30029296875,0.35107421875,0.2998046875,0.0234375,0.296875,0.5390625,0.2998046875,0.0,0.3125,0.875
78
+ 76,model.layers.10.mlp.down_proj,tx.10.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.23291015625,0.30029296875,0.3662109375,0.2999442219734192,0.0,0.296875,0.640625,0.2999442219734192,0.0,0.3125,1.0
79
+ 77,model.layers.11.self_attn.q_proj,tx.11.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.2265625,0.30029296875,0.35595703125,0.2998046875,0.0,0.3046875,0.6640625,0.2998046875,0.0,0.3125,0.875
80
+ 78,model.layers.11.self_attn.k_proj,tx.11.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.2373046875,0.30029296875,0.36181640625,0.2998046875,0.0,0.3046875,0.765625,0.2998046875,0.0,0.3125,0.8125
81
+ 79,model.layers.11.self_attn.v_proj,tx.11.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.24072265625,0.2998046875,0.35693359375,0.2998046875,0.0234375,0.296875,0.9921875,0.2998046875,0.0,0.3125,0.875
82
+ 80,model.layers.11.self_attn.o_proj,tx.11.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.166015625,0.28173828125,0.59228515625,0.2998046875,0.0078125,0.2890625,0.7578125,0.2998046875,0.0,0.3125,1.0
83
+ 81,model.layers.11.mlp.gate_proj,tx.11.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.23974609375,0.30029296875,0.34375,0.2998046875,0.0078125,0.3046875,0.515625,0.2998046875,0.0,0.3125,0.875
84
+ 82,model.layers.11.mlp.up_proj,tx.11.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24072265625,0.30029296875,0.3505859375,0.2998046875,0.03125,0.3046875,0.5546875,0.2998046875,0.0,0.3125,0.875
85
+ 83,model.layers.11.mlp.down_proj,tx.11.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.228515625,0.2998046875,0.36669921875,0.2999442219734192,0.0,0.3046875,0.78125,0.2999442219734192,0.0,0.3125,0.875
86
+ 84,model.layers.12.self_attn.q_proj,tx.12.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.228515625,0.30029296875,0.35693359375,0.2998046875,0.0,0.3046875,0.6171875,0.2998046875,0.0,0.3125,0.875
87
+ 85,model.layers.12.self_attn.k_proj,tx.12.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.22509765625,0.30029296875,0.35302734375,0.2998046875,0.0,0.3046875,0.734375,0.2998046875,0.0,0.3125,0.875
88
+ 86,model.layers.12.self_attn.v_proj,tx.12.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.24267578125,0.29931640625,0.36572265625,0.2998046875,0.0234375,0.296875,0.9921875,0.2998046875,0.0,0.3125,0.8125
89
+ 87,model.layers.12.self_attn.o_proj,tx.12.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.12744140625,0.30078125,0.47216796875,0.2998046875,0.0078125,0.3046875,0.625,0.2998046875,0.0,0.3125,0.9375
90
+ 88,model.layers.12.mlp.gate_proj,tx.12.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.2421875,0.30029296875,0.3447265625,0.2998046875,0.0,0.3046875,0.5078125,0.2998046875,0.0,0.3125,0.875
91
+ 89,model.layers.12.mlp.up_proj,tx.12.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.2470703125,0.30029296875,0.3447265625,0.2998046875,0.0234375,0.3046875,0.515625,0.2998046875,0.0,0.3125,0.9375
92
+ 90,model.layers.12.mlp.down_proj,tx.12.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.2275390625,0.2998046875,0.3681640625,0.2999442219734192,0.0,0.3046875,0.6640625,0.2999442219734192,0.0,0.3125,0.875
93
+ 91,model.layers.13.self_attn.q_proj,tx.13.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.2392578125,0.2998046875,0.35888671875,0.2998046875,0.0,0.3046875,0.6640625,0.2998046875,0.0,0.3125,0.875
94
+ 92,model.layers.13.self_attn.k_proj,tx.13.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.2275390625,0.29931640625,0.3544921875,0.2998046875,0.0,0.3046875,0.7890625,0.2998046875,0.0,0.3125,0.8125
95
+ 93,model.layers.13.self_attn.v_proj,tx.13.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.248046875,0.29931640625,0.36572265625,0.2998046875,0.015625,0.296875,0.9921875,0.2998046875,0.0,0.3125,0.9375
96
+ 94,model.layers.13.self_attn.o_proj,tx.13.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.150390625,0.29296875,0.525390625,0.2998046875,0.0,0.296875,0.6953125,0.2998046875,0.0,0.3125,0.9375
97
+ 95,model.layers.13.mlp.gate_proj,tx.13.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24365234375,0.30029296875,0.349609375,0.2998046875,0.0,0.3046875,0.515625,0.2998046875,0.0,0.3125,0.9375
98
+ 96,model.layers.13.mlp.up_proj,tx.13.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.25,0.30029296875,0.34326171875,0.2998046875,0.015625,0.3046875,0.5078125,0.2998046875,0.0,0.3125,0.9375
99
+ 97,model.layers.13.mlp.down_proj,tx.13.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.22802734375,0.2998046875,0.3798828125,0.2999442219734192,0.0,0.3046875,0.734375,0.2999442219734192,0.0,0.3125,0.9375
100
+ 98,model.layers.14.self_attn.q_proj,tx.14.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.23193359375,0.30029296875,0.361328125,0.2998046875,0.0,0.3046875,0.59375,0.2998046875,0.0,0.3125,0.875
101
+ 99,model.layers.14.self_attn.k_proj,tx.14.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.234375,0.2998046875,0.3525390625,0.2998046875,0.0,0.3046875,0.8359375,0.2998046875,0.0,0.3125,0.8125
102
+ 100,model.layers.14.self_attn.v_proj,tx.14.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.23779296875,0.2998046875,0.3720703125,0.2998046875,0.0078125,0.3046875,0.9921875,0.2998046875,0.0,0.3125,0.875
103
+ 101,model.layers.14.self_attn.o_proj,tx.14.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.16064453125,0.28955078125,0.5732421875,0.2998046875,0.0,0.2890625,0.75,0.2998046875,0.0,0.3125,1.0
104
+ 102,model.layers.14.mlp.gate_proj,tx.14.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24169921875,0.30029296875,0.34619140625,0.2998046875,0.0,0.3046875,0.5078125,0.2998046875,0.0,0.3125,1.0
105
+ 103,model.layers.14.mlp.up_proj,tx.14.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.240234375,0.2998046875,0.34228515625,0.2998046875,0.0,0.3046875,0.5078125,0.2998046875,0.0,0.3125,0.9375
106
+ 104,model.layers.14.mlp.down_proj,tx.14.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.22119140625,0.30029296875,0.37060546875,0.2999442219734192,0.015625,0.296875,0.671875,0.2999442219734192,0.0,0.3125,0.9375
107
+ 105,model.layers.15.self_attn.q_proj,tx.15.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.23828125,0.30029296875,0.357421875,0.2998046875,0.0,0.3046875,0.59375,0.2998046875,0.0,0.3125,0.875
108
+ 106,model.layers.15.self_attn.k_proj,tx.15.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.236328125,0.30029296875,0.3525390625,0.2998046875,0.0,0.3046875,0.7578125,0.2998046875,0.0,0.3125,0.875
109
+ 107,model.layers.15.self_attn.v_proj,tx.15.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.232421875,0.2998046875,0.37451171875,0.2998046875,0.015625,0.3046875,1.0,0.2998046875,0.0,0.3125,0.8125
110
+ 108,model.layers.15.self_attn.o_proj,tx.15.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.1240234375,0.2705078125,0.6328125,0.2998046875,0.0078125,0.2734375,0.7734375,0.2998046875,0.0,0.25,1.0
111
+ 109,model.layers.15.mlp.gate_proj,tx.15.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.240234375,0.30029296875,0.35400390625,0.2998046875,0.0,0.3046875,0.5234375,0.2998046875,0.0,0.3125,0.9375
112
+ 110,model.layers.15.mlp.up_proj,tx.15.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.248046875,0.2998046875,0.3447265625,0.2998046875,0.015625,0.3046875,0.515625,0.2998046875,0.0,0.3125,0.875
113
+ 111,model.layers.15.mlp.down_proj,tx.15.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.22900390625,0.2998046875,0.375,0.2999442219734192,0.015625,0.296875,0.7265625,0.2999442219734192,0.0,0.3125,0.875
114
+ 112,model.layers.16.self_attn.q_proj,tx.16.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.23779296875,0.30029296875,0.357421875,0.2998046875,0.0,0.3046875,0.640625,0.2998046875,0.0,0.3125,0.8125
115
+ 113,model.layers.16.self_attn.k_proj,tx.16.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.232421875,0.30029296875,0.35009765625,0.2998046875,0.0,0.3046875,0.7578125,0.2998046875,0.0,0.3125,0.875
116
+ 114,model.layers.16.self_attn.v_proj,tx.16.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.23046875,0.29931640625,0.36572265625,0.2998046875,0.0078125,0.3046875,1.0,0.2998046875,0.0,0.3125,0.8125
117
+ 115,model.layers.16.self_attn.o_proj,tx.16.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.11962890625,0.2841796875,0.5732421875,0.2998046875,0.0078125,0.2890625,0.7421875,0.2998046875,0.0,0.3125,1.0
118
+ 116,model.layers.16.mlp.gate_proj,tx.16.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24609375,0.30029296875,0.34814453125,0.2998046875,0.0,0.3046875,0.53125,0.2998046875,0.0,0.3125,0.9375
119
+ 117,model.layers.16.mlp.up_proj,tx.16.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.25,0.2998046875,0.349609375,0.2998046875,0.0078125,0.3046875,0.5390625,0.2998046875,0.0,0.3125,0.875
120
+ 118,model.layers.16.mlp.down_proj,tx.16.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.23486328125,0.2998046875,0.37255859375,0.2999442219734192,0.0078125,0.296875,0.640625,0.2999442219734192,0.0,0.3125,0.9375
121
+ 119,model.layers.17.self_attn.q_proj,tx.17.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.23193359375,0.2998046875,0.3564453125,0.2998046875,0.0,0.3046875,0.59375,0.2998046875,0.0,0.3125,0.875
122
+ 120,model.layers.17.self_attn.k_proj,tx.17.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.2431640625,0.30029296875,0.35595703125,0.2998046875,0.0,0.3125,0.734375,0.2998046875,0.0,0.3125,0.8125
123
+ 121,model.layers.17.self_attn.v_proj,tx.17.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.24658203125,0.2998046875,0.369140625,0.2998046875,0.015625,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
124
+ 122,model.layers.17.self_attn.o_proj,tx.17.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.13037109375,0.279296875,0.64697265625,0.2998046875,0.0078125,0.28125,0.8203125,0.2998046875,0.0,0.25,1.0
125
+ 123,model.layers.17.mlp.gate_proj,tx.17.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24365234375,0.2998046875,0.34326171875,0.2998046875,0.0,0.3046875,0.5859375,0.2998046875,0.0,0.3125,0.9375
126
+ 124,model.layers.17.mlp.up_proj,tx.17.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.2412109375,0.2998046875,0.34521484375,0.2998046875,0.0,0.3046875,0.5859375,0.2998046875,0.0,0.3125,0.875
127
+ 125,model.layers.17.mlp.down_proj,tx.17.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.22705078125,0.2998046875,0.3828125,0.2999442219734192,0.0234375,0.3046875,0.8046875,0.2999442219734192,0.0,0.3125,0.875
128
+ 126,model.layers.18.self_attn.q_proj,tx.18.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.2353515625,0.30029296875,0.357421875,0.2998046875,0.0,0.3046875,0.71875,0.2998046875,0.0,0.3125,0.875
129
+ 127,model.layers.18.self_attn.k_proj,tx.18.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.23779296875,0.30078125,0.3583984375,0.2998046875,0.0,0.3125,0.7890625,0.2998046875,0.0,0.3125,0.8125
130
+ 128,model.layers.18.self_attn.v_proj,tx.18.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.23095703125,0.2998046875,0.37255859375,0.2998046875,0.0078125,0.3046875,1.0,0.2998046875,0.0,0.3125,0.8125
131
+ 129,model.layers.18.self_attn.o_proj,tx.18.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.1220703125,0.23291015625,0.77880859375,0.2998046875,0.0,0.25,0.96875,0.2998046875,0.0,0.25,1.0
132
+ 130,model.layers.18.mlp.gate_proj,tx.18.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24365234375,0.2998046875,0.3505859375,0.2998046875,0.0,0.3046875,0.5625,0.2998046875,0.0,0.3125,0.875
133
+ 131,model.layers.18.mlp.up_proj,tx.18.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.2509765625,0.2998046875,0.34228515625,0.2998046875,0.015625,0.3046875,0.625,0.2998046875,0.0,0.3125,0.9375
134
+ 132,model.layers.18.mlp.down_proj,tx.18.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.22998046875,0.2998046875,0.365234375,0.2999442219734192,0.0,0.3046875,0.6484375,0.2999442219734192,0.0,0.3125,0.875
135
+ 133,model.layers.19.self_attn.q_proj,tx.19.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.23779296875,0.30029296875,0.357421875,0.2998046875,0.0,0.3046875,0.6328125,0.2998046875,0.0,0.3125,0.875
136
+ 134,model.layers.19.self_attn.k_proj,tx.19.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.22900390625,0.30029296875,0.361328125,0.2998046875,0.0,0.3125,0.796875,0.2998046875,0.0,0.3125,0.875
137
+ 135,model.layers.19.self_attn.v_proj,tx.19.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.23828125,0.29931640625,0.37646484375,0.2998046875,0.0078125,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
138
+ 136,model.layers.19.self_attn.o_proj,tx.19.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.0908203125,0.2783203125,0.72607421875,0.2998046875,0.0,0.28125,0.90625,0.2998046875,0.0,0.25,1.0
139
+ 137,model.layers.19.mlp.gate_proj,tx.19.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.2412109375,0.2998046875,0.34375,0.2998046875,0.0,0.3046875,0.5703125,0.2998046875,0.0,0.3125,0.9375
140
+ 138,model.layers.19.mlp.up_proj,tx.19.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.2509765625,0.2998046875,0.34375,0.2998046875,0.0078125,0.3046875,0.625,0.2998046875,0.0,0.3125,0.9375
141
+ 139,model.layers.19.mlp.down_proj,tx.19.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.22705078125,0.2998046875,0.37109375,0.2999442219734192,0.0078125,0.3046875,0.625,0.2999442219734192,0.0,0.3125,0.875
142
+ 140,model.layers.20.self_attn.q_proj,tx.20.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.234375,0.30029296875,0.35107421875,0.2998046875,0.0,0.3046875,0.578125,0.2998046875,0.0,0.3125,0.8125
143
+ 141,model.layers.20.self_attn.k_proj,tx.20.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.23193359375,0.30078125,0.35546875,0.2998046875,0.0,0.3125,0.7734375,0.2998046875,0.0,0.3125,0.875
144
+ 142,model.layers.20.self_attn.v_proj,tx.20.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.23974609375,0.30029296875,0.3779296875,0.2998046875,0.0078125,0.3046875,1.0,0.2998046875,0.0,0.3125,0.8125
145
+ 143,model.layers.20.self_attn.o_proj,tx.20.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.12451171875,0.2587890625,0.7373046875,0.2998046875,0.0,0.265625,0.921875,0.2998046875,0.0,0.25,1.0
146
+ 144,model.layers.20.mlp.gate_proj,tx.20.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24755859375,0.2998046875,0.34423828125,0.2998046875,0.0,0.3046875,0.59375,0.2998046875,0.0,0.3125,0.875
147
+ 145,model.layers.20.mlp.up_proj,tx.20.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.25341796875,0.2998046875,0.34375,0.2998046875,0.015625,0.3046875,0.6328125,0.2998046875,0.0,0.3125,0.875
148
+ 146,model.layers.20.mlp.down_proj,tx.20.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.23583984375,0.30029296875,0.3671875,0.2999442219734192,0.0,0.3046875,0.625,0.2999442219734192,0.0,0.3125,0.875
149
+ 147,model.layers.21.self_attn.q_proj,tx.21.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.2294921875,0.30029296875,0.35400390625,0.2998046875,0.0,0.3046875,0.53125,0.2998046875,0.0,0.3125,1.0
150
+ 148,model.layers.21.self_attn.k_proj,tx.21.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.22998046875,0.30029296875,0.36474609375,0.2998046875,0.0,0.3125,0.71875,0.2998046875,0.0,0.3125,0.8125
151
+ 149,model.layers.21.self_attn.v_proj,tx.21.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.24169921875,0.2998046875,0.376953125,0.2998046875,0.0078125,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
152
+ 150,model.layers.21.self_attn.o_proj,tx.21.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.10009765625,0.2607421875,0.7841796875,0.2998046875,0.0,0.2734375,0.9765625,0.2998046875,0.0,0.25,1.0
153
+ 151,model.layers.21.mlp.gate_proj,tx.21.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24853515625,0.2998046875,0.34228515625,0.2998046875,0.0,0.3046875,0.6015625,0.2998046875,0.0,0.3125,0.875
154
+ 152,model.layers.21.mlp.up_proj,tx.21.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24560546875,0.2998046875,0.345703125,0.2998046875,0.015625,0.3046875,0.6328125,0.2998046875,0.0,0.3125,0.875
155
+ 153,model.layers.21.mlp.down_proj,tx.21.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.2265625,0.2998046875,0.3671875,0.2999442219734192,0.015625,0.3046875,0.6171875,0.2999442219734192,0.0,0.3125,0.875
156
+ 154,model.layers.22.self_attn.q_proj,tx.22.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.23486328125,0.30029296875,0.3564453125,0.2998046875,0.0,0.3046875,0.546875,0.2998046875,0.0,0.3125,0.875
157
+ 155,model.layers.22.self_attn.k_proj,tx.22.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.232421875,0.2998046875,0.359375,0.2998046875,0.0,0.3125,0.765625,0.2998046875,0.0,0.3125,0.8125
158
+ 156,model.layers.22.self_attn.v_proj,tx.22.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.2353515625,0.2998046875,0.36669921875,0.2998046875,0.015625,0.3046875,1.0,0.2998046875,0.0,0.3125,0.8125
159
+ 157,model.layers.22.self_attn.o_proj,tx.22.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.1240234375,0.2919921875,0.59521484375,0.2998046875,0.0078125,0.2890625,0.78125,0.2998046875,0.0,0.3125,1.0
160
+ 158,model.layers.22.mlp.gate_proj,tx.22.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.25,0.30029296875,0.34619140625,0.2998046875,0.0,0.3046875,0.5859375,0.2998046875,0.0,0.3125,0.875
161
+ 159,model.layers.22.mlp.up_proj,tx.22.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.25341796875,0.2998046875,0.34619140625,0.2998046875,0.0,0.296875,0.640625,0.2998046875,0.0,0.3125,0.875
162
+ 160,model.layers.22.mlp.down_proj,tx.22.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.22021484375,0.2998046875,0.369140625,0.2999442219734192,0.0,0.3046875,0.5703125,0.2999442219734192,0.0,0.3125,0.875
163
+ 161,model.layers.23.self_attn.q_proj,tx.23.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.22705078125,0.30029296875,0.3583984375,0.2998046875,0.0,0.3046875,0.546875,0.2998046875,0.0,0.3125,0.875
164
+ 162,model.layers.23.self_attn.k_proj,tx.23.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.23876953125,0.30078125,0.3583984375,0.2998046875,0.0,0.3125,0.765625,0.2998046875,0.0,0.3125,0.8125
165
+ 163,model.layers.23.self_attn.v_proj,tx.23.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.2392578125,0.2998046875,0.37109375,0.2998046875,0.015625,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
166
+ 164,model.layers.23.self_attn.o_proj,tx.23.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.103515625,0.29150390625,0.55322265625,0.2998046875,0.0,0.2890625,0.7265625,0.2998046875,0.0,0.3125,0.9375
167
+ 165,model.layers.23.mlp.gate_proj,tx.23.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24951171875,0.2998046875,0.34619140625,0.2998046875,0.0,0.3046875,0.640625,0.2998046875,0.0,0.3125,0.875
168
+ 166,model.layers.23.mlp.up_proj,tx.23.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.2490234375,0.2998046875,0.34912109375,0.2998046875,0.015625,0.296875,0.6484375,0.2998046875,0.0,0.3125,0.875
169
+ 167,model.layers.23.mlp.down_proj,tx.23.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.21533203125,0.30029296875,0.3642578125,0.2999442219734192,0.0,0.3125,0.5859375,0.2999442219734192,0.0,0.3125,0.875
170
+ 168,model.layers.24.self_attn.q_proj,tx.24.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.23486328125,0.30029296875,0.35498046875,0.2998046875,0.0,0.3125,0.59375,0.2998046875,0.0,0.3125,0.875
171
+ 169,model.layers.24.self_attn.k_proj,tx.24.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.234375,0.30029296875,0.3544921875,0.2998046875,0.0,0.3125,0.765625,0.2998046875,0.0,0.3125,0.875
172
+ 170,model.layers.24.self_attn.v_proj,tx.24.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.23388671875,0.2998046875,0.373046875,0.2998046875,0.0078125,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
173
+ 171,model.layers.24.self_attn.o_proj,tx.24.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.10693359375,0.26318359375,0.61572265625,0.2998046875,0.0,0.2734375,0.8125,0.2998046875,0.0,0.3125,1.0
174
+ 172,model.layers.24.mlp.gate_proj,tx.24.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.25,0.30029296875,0.34814453125,0.2998046875,0.0,0.3046875,0.625,0.2998046875,0.0,0.3125,0.875
175
+ 173,model.layers.24.mlp.up_proj,tx.24.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.2431640625,0.30029296875,0.34423828125,0.2998046875,0.015625,0.296875,0.6484375,0.2998046875,0.0,0.3125,0.9375
176
+ 174,model.layers.24.mlp.down_proj,tx.24.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.22509765625,0.30078125,0.369140625,0.2999442219734192,0.0,0.3125,0.578125,0.2999442219734192,0.0,0.3125,0.875
177
+ 175,model.layers.25.self_attn.q_proj,tx.25.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.23291015625,0.2998046875,0.35693359375,0.2998046875,0.0,0.3125,0.546875,0.2998046875,0.0,0.3125,0.9375
178
+ 176,model.layers.25.self_attn.k_proj,tx.25.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.2216796875,0.30029296875,0.36767578125,0.2998046875,0.0,0.3125,0.7421875,0.2998046875,0.0,0.3125,0.8125
179
+ 177,model.layers.25.self_attn.v_proj,tx.25.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.236328125,0.2998046875,0.3681640625,0.2998046875,0.0,0.3046875,1.0,0.2998046875,0.0,0.3125,0.8125
180
+ 178,model.layers.25.self_attn.o_proj,tx.25.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.10107421875,0.248046875,0.81982421875,0.2998046875,0.0078125,0.2578125,0.9609375,0.2998046875,0.0,0.25,1.0
181
+ 179,model.layers.25.mlp.gate_proj,tx.25.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24951171875,0.2998046875,0.34521484375,0.2998046875,0.0,0.3046875,0.609375,0.2998046875,0.0,0.3125,0.875
182
+ 180,model.layers.25.mlp.up_proj,tx.25.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.248046875,0.2998046875,0.34375,0.2998046875,0.0,0.296875,0.6328125,0.2998046875,0.0,0.3125,0.9375
183
+ 181,model.layers.25.mlp.down_proj,tx.25.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.2119140625,0.30029296875,0.37109375,0.2999442219734192,0.0078125,0.3125,0.578125,0.2999442219734192,0.0,0.3125,0.9375
184
+ 182,model.layers.26.self_attn.q_proj,tx.26.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.23583984375,0.30029296875,0.35693359375,0.2998046875,0.0,0.3125,0.5390625,0.2998046875,0.0,0.3125,0.875
185
+ 183,model.layers.26.self_attn.k_proj,tx.26.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.22998046875,0.30029296875,0.365234375,0.2998046875,0.0,0.3125,0.765625,0.2998046875,0.0,0.3125,0.8125
186
+ 184,model.layers.26.self_attn.v_proj,tx.26.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.23681640625,0.29931640625,0.38134765625,0.2998046875,0.0,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
187
+ 185,model.layers.26.self_attn.o_proj,tx.26.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.111328125,0.259765625,0.65185546875,0.2998046875,0.0,0.2578125,0.8046875,0.2998046875,0.0,0.25,1.0
188
+ 186,model.layers.26.mlp.gate_proj,tx.26.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24951171875,0.2998046875,0.34375,0.2998046875,0.0,0.3046875,0.640625,0.2998046875,0.0,0.3125,0.875
189
+ 187,model.layers.26.mlp.up_proj,tx.26.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.2451171875,0.2998046875,0.3505859375,0.2998046875,0.0078125,0.296875,0.625,0.2998046875,0.0,0.3125,0.875
190
+ 188,model.layers.26.mlp.down_proj,tx.26.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.21142578125,0.30078125,0.37060546875,0.2999442219734192,0.0,0.3125,0.6171875,0.2999442219734192,0.0,0.3125,0.9375
191
+ 189,model.layers.27.self_attn.q_proj,tx.27.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.23486328125,0.30029296875,0.3740234375,0.2998046875,0.0,0.3125,1.0,0.2998046875,0.0,0.3125,0.875
192
+ 190,model.layers.27.self_attn.k_proj,tx.27.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.2353515625,0.30029296875,0.37451171875,0.2998046875,0.0,0.3125,1.0,0.2998046875,0.0,0.3125,0.8125
193
+ 191,model.layers.27.self_attn.v_proj,tx.27.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.23779296875,0.2998046875,0.37548828125,0.2998046875,0.0078125,0.3046875,1.0,0.2998046875,0.0,0.3125,0.8125
194
+ 192,model.layers.27.self_attn.o_proj,tx.27.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.10791015625,0.27587890625,0.61572265625,0.2998046875,0.0,0.265625,0.796875,0.2998046875,0.0,0.25,1.0
195
+ 193,model.layers.27.mlp.gate_proj,tx.27.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.2470703125,0.2998046875,0.3505859375,0.2998046875,0.0,0.3046875,0.609375,0.2998046875,0.0,0.3125,0.9375
196
+ 194,model.layers.27.mlp.up_proj,tx.27.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.25341796875,0.2998046875,0.34619140625,0.2998046875,0.0,0.296875,0.671875,0.2998046875,0.0,0.3125,0.9375
197
+ 195,model.layers.27.mlp.down_proj,tx.27.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.21826171875,0.30029296875,0.36962890625,0.2999442219734192,0.0,0.3125,0.546875,0.2999442219734192,0.0,0.3125,0.9375
198
+ 196,model.layers.28.self_attn.q_proj,tx.28.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.22216796875,0.30078125,0.3564453125,0.2998046875,0.0,0.3125,0.5234375,0.2998046875,0.0,0.3125,0.875
199
+ 197,model.layers.28.self_attn.k_proj,tx.28.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.232421875,0.2998046875,0.35498046875,0.2998046875,0.0,0.3125,0.78125,0.2998046875,0.0,0.3125,0.8125
200
+ 198,model.layers.28.self_attn.v_proj,tx.28.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.2470703125,0.2998046875,0.36865234375,0.2998046875,0.015625,0.3046875,1.0,0.2998046875,0.0,0.3125,0.8125
201
+ 199,model.layers.28.self_attn.o_proj,tx.28.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.09423828125,0.2880859375,0.67041015625,0.2998046875,0.015625,0.2890625,0.7734375,0.2998046875,0.0,0.3125,1.0
202
+ 200,model.layers.28.mlp.gate_proj,tx.28.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24365234375,0.30029296875,0.3466796875,0.2998046875,0.0,0.3046875,0.609375,0.2998046875,0.0,0.3125,0.875
203
+ 201,model.layers.28.mlp.up_proj,tx.28.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24609375,0.2998046875,0.3515625,0.2998046875,0.0078125,0.3046875,0.59375,0.2998046875,0.0,0.3125,0.875
204
+ 202,model.layers.28.mlp.down_proj,tx.28.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.208984375,0.30078125,0.3759765625,0.2999442219734192,0.0078125,0.3203125,0.5703125,0.2999442219734192,0.0,0.3125,0.875
205
+ 203,model.layers.29.self_attn.q_proj,tx.29.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.22802734375,0.30029296875,0.38525390625,0.2998046875,0.0,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
206
+ 204,model.layers.29.self_attn.k_proj,tx.29.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.22705078125,0.30029296875,0.36474609375,0.2998046875,0.0,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
207
+ 205,model.layers.29.self_attn.v_proj,tx.29.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.2412109375,0.29931640625,0.3623046875,0.2998046875,0.0078125,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
208
+ 206,model.layers.29.self_attn.o_proj,tx.29.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.0693359375,0.330078125,0.55615234375,0.2998046875,0.0,0.3046875,0.703125,0.2998046875,0.0,0.3125,1.0
209
+ 207,model.layers.29.mlp.gate_proj,tx.29.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24462890625,0.2998046875,0.34423828125,0.2998046875,0.0,0.3046875,0.6171875,0.2998046875,0.0,0.3125,0.875
210
+ 208,model.layers.29.mlp.up_proj,tx.29.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24560546875,0.30029296875,0.34912109375,0.2998046875,0.0078125,0.3046875,0.625,0.2998046875,0.0,0.3125,0.875
211
+ 209,model.layers.29.mlp.down_proj,tx.29.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.2236328125,0.30078125,0.37548828125,0.2999442219734192,0.0,0.3125,0.59375,0.2999442219734192,0.0,0.3125,0.875
212
+ 210,model.layers.30.self_attn.q_proj,tx.30.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.2177734375,0.30029296875,0.3544921875,0.2998046875,0.0,0.3125,0.5390625,0.2998046875,0.0,0.3125,0.875
213
+ 211,model.layers.30.self_attn.k_proj,tx.30.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.22509765625,0.30126953125,0.3603515625,0.2998046875,0.0,0.3125,0.78125,0.2998046875,0.0,0.3125,0.875
214
+ 212,model.layers.30.self_attn.v_proj,tx.30.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.22802734375,0.2998046875,0.3837890625,0.2998046875,0.0078125,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
215
+ 213,model.layers.30.self_attn.o_proj,tx.30.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.0869140625,0.271484375,0.8271484375,0.2998046875,0.0234375,0.2734375,0.9453125,0.2998046875,0.0,0.25,1.0
216
+ 214,model.layers.30.mlp.gate_proj,tx.30.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.25048828125,0.30029296875,0.35205078125,0.2998046875,0.0078125,0.3046875,0.703125,0.2998046875,0.0,0.3125,0.9375
217
+ 215,model.layers.30.mlp.up_proj,tx.30.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.25048828125,0.30029296875,0.35009765625,0.2998046875,0.0,0.3046875,0.6640625,0.2998046875,0.0,0.3125,0.9375
218
+ 216,model.layers.30.mlp.down_proj,tx.30.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.21142578125,0.30078125,0.38427734375,0.2999442219734192,0.0,0.3203125,0.65625,0.2999442219734192,0.0,0.3125,0.875
219
+ 217,model.layers.31.self_attn.q_proj,tx.31.attn.q,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.21826171875,0.30029296875,0.3603515625,0.2998046875,0.0,0.3046875,0.8203125,0.2998046875,0.0,0.3125,0.9375
220
+ 218,model.layers.31.self_attn.k_proj,tx.31.attn.k,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.22607421875,0.2998046875,0.3525390625,0.2998046875,0.0,0.3125,0.8359375,0.2998046875,0.0,0.3125,0.8125
221
+ 219,model.layers.31.self_attn.v_proj,tx.31.attn.v,Linear,weight,"[1024, 4096]",4194304,2936832,0.2998046875,"(128, 16)",8 x 256,2048,0.2998046875,0.2138671875,0.2998046875,0.36279296875,0.2998046875,0.0078125,0.3046875,1.0,0.2998046875,0.0,0.3125,0.875
222
+ 220,model.layers.31.self_attn.o_proj,tx.31.attn.o,Linear,weight,"[4096, 4096]",16777216,11747328,0.2998046875,"(128, 16)",32 x 256,8192,0.2998046875,0.0390625,0.26416015625,0.65380859375,0.2998046875,0.0,0.2734375,0.828125,0.2998046875,0.0,0.25,1.0
223
+ 221,model.layers.31.mlp.gate_proj,tx.31.mlp.gate,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.25537109375,0.2998046875,0.35107421875,0.2998046875,0.0234375,0.3046875,0.7421875,0.2998046875,0.0,0.3125,0.875
224
+ 222,model.layers.31.mlp.up_proj,tx.31.mlp.up,Linear,weight,"[14336, 4096]",58720256,41115648,0.2998046278953552,"(128, 16)",112 x 256,28672,0.2998046875,0.24951171875,0.2998046875,0.3466796875,0.2998046875,0.015625,0.3046875,0.6875,0.2998046875,0.0,0.3125,0.875
225
+ 223,model.layers.31.mlp.down_proj,tx.31.mlp.down,Linear,weight,"[4096, 14336]",58720256,41107456,0.2999441623687744,"(128, 16)",32 x 896,28672,0.2999442219734192,0.18701171875,0.2998046875,0.404296875,0.2999442219734192,0.0,0.328125,0.765625,0.2999442219734192,0.0,0.3125,0.875
226
+ 224,lm_head,lm_head,Linear,weight,"[128256, 4096]",525336576,525336576,0.0,"(128, 16)",1002 x 256,256512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sparsity_report_Meta-Llama-3.1-8B-wanda-unstructured-0.4.csv ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ row,layer_id,short_id,layer_type,param_type,shape,nparam,nnz,sparsity,tile_shape,n_tile,n_tile_total,tile_avg,tile_min,tile_med,tile_max,col_avg,col_min,col_med,col_max,row_avg,row_min,row_med,row_max
2
+ 0,model.layers.0.self_attn.q_proj,tx.0.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1005859375,0.39990234375,0.736328125,0.39990234375,0.0,0.1484375,1.0,0.39990234375,0.0625,0.375,0.9375
3
+ 1,model.layers.0.self_attn.k_proj,tx.0.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.10595703125,0.3974609375,0.7314453125,0.39990234375,0.0,0.1640625,1.0,0.39990234375,0.0625,0.375,0.9375
4
+ 2,model.layers.0.self_attn.v_proj,tx.0.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.12109375,0.4033203125,0.697265625,0.39990234375,0.0,0.1484375,1.0,0.39990234375,0.0625,0.375,0.875
5
+ 3,model.layers.0.self_attn.o_proj,tx.0.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.0439453125,0.322265625,0.998046875,0.39990234375,0.0,0.3046875,1.0,0.39990234375,0.0,0.3125,1.0
6
+ 4,model.layers.0.mlp.gate_proj,tx.0.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3310546875,0.39892578125,0.49755859375,0.3999023735523224,0.0390625,0.40625,1.0,0.3999023735523224,0.0,0.375,1.0
7
+ 5,model.layers.0.mlp.up_proj,tx.0.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34375,0.3984375,0.5087890625,0.3999023735523224,0.046875,0.3984375,1.0,0.3999023735523224,0.0,0.375,1.0
8
+ 6,model.layers.0.mlp.down_proj,tx.0.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.26806640625,0.400390625,0.51025390625,0.3999721109867096,0.015625,0.40625,0.8671875,0.3999721109867096,0.0,0.375,1.0
9
+ 7,model.layers.1.self_attn.q_proj,tx.1.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.23974609375,0.400390625,0.59716796875,0.39990234375,0.0,0.421875,1.0,0.39990234375,0.0,0.375,0.9375
10
+ 8,model.layers.1.self_attn.k_proj,tx.1.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.23681640625,0.400390625,0.57861328125,0.39990234375,0.0,0.421875,1.0,0.39990234375,0.0,0.375,0.9375
11
+ 9,model.layers.1.self_attn.v_proj,tx.1.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.24169921875,0.39990234375,0.54736328125,0.39990234375,0.0078125,0.421875,1.0,0.39990234375,0.0,0.375,0.9375
12
+ 10,model.layers.1.self_attn.o_proj,tx.1.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1552734375,0.38818359375,0.91796875,0.39990234375,0.015625,0.3515625,1.0,0.39990234375,0.0,0.375,1.0
13
+ 11,model.layers.1.mlp.gate_proj,tx.1.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33740234375,0.3984375,0.49951171875,0.3999023735523224,0.0859375,0.40625,1.0,0.3999023735523224,0.0,0.375,1.0
14
+ 12,model.layers.1.mlp.up_proj,tx.1.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34228515625,0.3984375,0.4873046875,0.3999023735523224,0.0703125,0.3984375,1.0,0.3999023735523224,0.0,0.375,1.0
15
+ 13,model.layers.1.mlp.down_proj,tx.1.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3134765625,0.40087890625,0.498046875,0.3999721109867096,0.0,0.4140625,0.7421875,0.3999721109867096,0.0,0.375,1.0
16
+ 14,model.layers.2.self_attn.q_proj,tx.2.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.27880859375,0.40087890625,0.52685546875,0.39990234375,0.0,0.4296875,1.0,0.39990234375,0.0,0.375,0.9375
17
+ 15,model.layers.2.self_attn.k_proj,tx.2.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.28076171875,0.40087890625,0.548828125,0.39990234375,0.0,0.4296875,1.0,0.39990234375,0.0,0.375,0.9375
18
+ 16,model.layers.2.self_attn.v_proj,tx.2.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31298828125,0.400390625,0.50732421875,0.39990234375,0.03125,0.421875,1.0,0.39990234375,0.0,0.375,0.9375
19
+ 17,model.layers.2.self_attn.o_proj,tx.2.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.169921875,0.34814453125,0.912109375,0.39990234375,0.0,0.3515625,1.0,0.39990234375,0.0,0.375,1.0
20
+ 18,model.layers.2.mlp.gate_proj,tx.2.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34521484375,0.39892578125,0.49267578125,0.3999023735523224,0.0546875,0.3984375,1.0,0.3999023735523224,0.0,0.375,1.0
21
+ 19,model.layers.2.mlp.up_proj,tx.2.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3369140625,0.3984375,0.48876953125,0.3999023735523224,0.109375,0.3984375,1.0,0.3999023735523224,0.0,0.375,1.0
22
+ 20,model.layers.2.mlp.down_proj,tx.2.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.29931640625,0.400390625,0.5205078125,0.3999721109867096,0.0078125,0.4140625,0.78125,0.3999721109867096,0.0,0.375,1.0
23
+ 21,model.layers.3.self_attn.q_proj,tx.3.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.310546875,0.400390625,0.49658203125,0.39990234375,0.0,0.4140625,1.0,0.39990234375,0.0,0.375,1.0
24
+ 22,model.layers.3.self_attn.k_proj,tx.3.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.29541015625,0.40087890625,0.49072265625,0.39990234375,0.0078125,0.421875,1.0,0.39990234375,0.0,0.375,0.875
25
+ 23,model.layers.3.self_attn.v_proj,tx.3.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32470703125,0.400390625,0.49169921875,0.39990234375,0.046875,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
26
+ 24,model.layers.3.self_attn.o_proj,tx.3.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1865234375,0.38427734375,0.68994140625,0.39990234375,0.0234375,0.390625,0.8984375,0.39990234375,0.0,0.375,1.0
27
+ 25,model.layers.3.mlp.gate_proj,tx.3.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3486328125,0.39892578125,0.48876953125,0.3999023735523224,0.0390625,0.3984375,1.0,0.3999023735523224,0.0,0.375,1.0
28
+ 26,model.layers.3.mlp.up_proj,tx.3.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3349609375,0.39892578125,0.49169921875,0.3999023735523224,0.1015625,0.3984375,1.0,0.3999023735523224,0.0,0.375,1.0
29
+ 27,model.layers.3.mlp.down_proj,tx.3.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.30419921875,0.3994140625,0.4921875,0.3999721109867096,0.0078125,0.40625,0.75,0.3999721109867096,0.0,0.375,0.9375
30
+ 28,model.layers.4.self_attn.q_proj,tx.4.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.2998046875,0.40087890625,0.49462890625,0.39990234375,0.0,0.421875,1.0,0.39990234375,0.0,0.375,1.0
31
+ 29,model.layers.4.self_attn.k_proj,tx.4.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.298828125,0.400390625,0.49560546875,0.39990234375,0.0,0.421875,1.0,0.39990234375,0.0,0.375,0.9375
32
+ 30,model.layers.4.self_attn.v_proj,tx.4.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32275390625,0.39990234375,0.509765625,0.39990234375,0.0234375,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
33
+ 31,model.layers.4.self_attn.o_proj,tx.4.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1865234375,0.36181640625,0.8828125,0.39990234375,0.0078125,0.359375,0.9921875,0.39990234375,0.0,0.375,1.0
34
+ 32,model.layers.4.mlp.gate_proj,tx.4.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.337890625,0.39990234375,0.4677734375,0.3999023735523224,0.0234375,0.40625,1.0,0.3999023735523224,0.0,0.375,1.0
35
+ 33,model.layers.4.mlp.up_proj,tx.4.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3466796875,0.39990234375,0.46923828125,0.3999023735523224,0.09375,0.40625,1.0,0.3999023735523224,0.0,0.375,1.0
36
+ 34,model.layers.4.mlp.down_proj,tx.4.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.31787109375,0.400390625,0.48388671875,0.3999721109867096,0.015625,0.3984375,0.9375,0.3999721109867096,0.0,0.375,1.0
37
+ 35,model.layers.5.self_attn.q_proj,tx.5.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.29833984375,0.400390625,0.49609375,0.39990234375,0.0078125,0.421875,0.875,0.39990234375,0.0,0.375,0.9375
38
+ 36,model.layers.5.self_attn.k_proj,tx.5.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.28662109375,0.40087890625,0.48779296875,0.39990234375,0.0,0.421875,0.8046875,0.39990234375,0.0,0.375,1.0
39
+ 37,model.layers.5.self_attn.v_proj,tx.5.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32470703125,0.39990234375,0.5029296875,0.39990234375,0.0234375,0.4140625,1.0,0.39990234375,0.0,0.375,1.0
40
+ 38,model.layers.5.self_attn.o_proj,tx.5.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1396484375,0.3388671875,0.85498046875,0.39990234375,0.0078125,0.359375,0.9765625,0.39990234375,0.0,0.375,1.0
41
+ 39,model.layers.5.mlp.gate_proj,tx.5.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33251953125,0.400390625,0.4619140625,0.3999023735523224,0.0078125,0.40625,1.0,0.3999023735523224,0.0,0.375,1.0
42
+ 40,model.layers.5.mlp.up_proj,tx.5.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.333984375,0.39990234375,0.482421875,0.3999023735523224,0.0703125,0.3984375,1.0,0.3999023735523224,0.0,0.375,0.9375
43
+ 41,model.layers.5.mlp.down_proj,tx.5.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.32275390625,0.39990234375,0.48974609375,0.3999721109867096,0.0,0.3984375,0.8046875,0.3999721109867096,0.0,0.375,1.0
44
+ 42,model.layers.6.self_attn.q_proj,tx.6.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.314453125,0.400390625,0.482421875,0.39990234375,0.0,0.4140625,0.8046875,0.39990234375,0.0,0.375,0.9375
45
+ 43,model.layers.6.self_attn.k_proj,tx.6.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.2998046875,0.4013671875,0.47998046875,0.39990234375,0.0078125,0.421875,0.8515625,0.39990234375,0.0,0.375,0.9375
46
+ 44,model.layers.6.self_attn.v_proj,tx.6.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31982421875,0.400390625,0.478515625,0.39990234375,0.0234375,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
47
+ 45,model.layers.6.self_attn.o_proj,tx.6.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1962890625,0.3837890625,0.7177734375,0.39990234375,0.0078125,0.3828125,0.8984375,0.39990234375,0.0,0.375,1.0
48
+ 46,model.layers.6.mlp.gate_proj,tx.6.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3359375,0.400390625,0.4658203125,0.3999023735523224,0.015625,0.40625,1.0,0.3999023735523224,0.0,0.375,1.0
49
+ 47,model.layers.6.mlp.up_proj,tx.6.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34521484375,0.39990234375,0.46533203125,0.3999023735523224,0.0703125,0.3984375,1.0,0.3999023735523224,0.0,0.375,1.0
50
+ 48,model.layers.6.mlp.down_proj,tx.6.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.32861328125,0.39990234375,0.49365234375,0.3999721109867096,0.0,0.3984375,0.8515625,0.3999721109867096,0.0,0.375,1.0
51
+ 49,model.layers.7.self_attn.q_proj,tx.7.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.30908203125,0.400390625,0.474609375,0.39990234375,0.0,0.4140625,0.7734375,0.39990234375,0.0,0.375,0.9375
52
+ 50,model.layers.7.self_attn.k_proj,tx.7.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31201171875,0.40087890625,0.4736328125,0.39990234375,0.0,0.4140625,0.8125,0.39990234375,0.0,0.375,0.9375
53
+ 51,model.layers.7.self_attn.v_proj,tx.7.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32763671875,0.3994140625,0.4677734375,0.39990234375,0.0234375,0.40625,0.9921875,0.39990234375,0.0,0.375,0.9375
54
+ 52,model.layers.7.self_attn.o_proj,tx.7.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.271484375,0.39453125,0.5712890625,0.39990234375,0.0078125,0.40625,0.875,0.39990234375,0.0,0.375,1.0
55
+ 53,model.layers.7.mlp.gate_proj,tx.7.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33447265625,0.400390625,0.44921875,0.3999023735523224,0.015625,0.40625,0.6328125,0.3999023735523224,0.0,0.375,1.0
56
+ 54,model.layers.7.mlp.up_proj,tx.7.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33154296875,0.400390625,0.44921875,0.3999023735523224,0.0703125,0.40625,0.609375,0.3999023735523224,0.0,0.375,1.0
57
+ 55,model.layers.7.mlp.down_proj,tx.7.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3212890625,0.400390625,0.47998046875,0.3999721109867096,0.0,0.3984375,0.859375,0.3999721109867096,0.0,0.375,1.0
58
+ 56,model.layers.8.self_attn.q_proj,tx.8.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.3193359375,0.400390625,0.48388671875,0.39990234375,0.0,0.40625,0.734375,0.39990234375,0.0,0.375,1.0
59
+ 57,model.layers.8.self_attn.k_proj,tx.8.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.306640625,0.400390625,0.46875,0.39990234375,0.0,0.4140625,0.7734375,0.39990234375,0.0,0.375,0.9375
60
+ 58,model.layers.8.self_attn.v_proj,tx.8.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.33984375,0.39990234375,0.47021484375,0.39990234375,0.0234375,0.40625,0.9921875,0.39990234375,0.0,0.375,0.9375
61
+ 59,model.layers.8.self_attn.o_proj,tx.8.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.2197265625,0.3935546875,0.6259765625,0.39990234375,0.0078125,0.3984375,0.7890625,0.39990234375,0.0,0.375,1.0
62
+ 60,model.layers.8.mlp.gate_proj,tx.8.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33544921875,0.400390625,0.4736328125,0.3999023735523224,0.0078125,0.40625,1.0,0.3999023735523224,0.0,0.375,1.0
63
+ 61,model.layers.8.mlp.up_proj,tx.8.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3330078125,0.400390625,0.4755859375,0.3999023735523224,0.03125,0.3984375,1.0,0.3999023735523224,0.0,0.375,1.0
64
+ 62,model.layers.8.mlp.down_proj,tx.8.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3232421875,0.400390625,0.47412109375,0.3999721109867096,0.046875,0.3984375,0.828125,0.3999721109867096,0.0,0.375,0.9375
65
+ 63,model.layers.9.self_attn.q_proj,tx.9.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31689453125,0.400390625,0.47021484375,0.39990234375,0.0,0.40625,0.75,0.39990234375,0.0,0.375,0.9375
66
+ 64,model.layers.9.self_attn.k_proj,tx.9.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31591796875,0.40087890625,0.46923828125,0.39990234375,0.0,0.40625,0.7890625,0.39990234375,0.0,0.375,0.9375
67
+ 65,model.layers.9.self_attn.v_proj,tx.9.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.33251953125,0.39990234375,0.46630859375,0.39990234375,0.03125,0.3984375,0.9921875,0.39990234375,0.0,0.375,0.9375
68
+ 66,model.layers.9.self_attn.o_proj,tx.9.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.2197265625,0.39111328125,0.65283203125,0.39990234375,0.015625,0.3984375,0.8203125,0.39990234375,0.0,0.375,1.0
69
+ 67,model.layers.9.mlp.gate_proj,tx.9.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.333984375,0.40087890625,0.44775390625,0.3999023735523224,0.0078125,0.40625,0.6171875,0.3999023735523224,0.0,0.375,1.0
70
+ 68,model.layers.9.mlp.up_proj,tx.9.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33740234375,0.400390625,0.44921875,0.3999023735523224,0.046875,0.3984375,0.640625,0.3999023735523224,0.0,0.375,0.9375
71
+ 69,model.layers.9.mlp.down_proj,tx.9.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3095703125,0.400390625,0.4853515625,0.3999721109867096,0.0,0.3984375,0.828125,0.3999721109867096,0.0,0.375,1.0
72
+ 70,model.layers.10.self_attn.q_proj,tx.10.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31787109375,0.400390625,0.47119140625,0.39990234375,0.0,0.40625,0.7421875,0.39990234375,0.0,0.375,0.9375
73
+ 71,model.layers.10.self_attn.k_proj,tx.10.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31396484375,0.40087890625,0.47265625,0.39990234375,0.0,0.4140625,0.828125,0.39990234375,0.0,0.375,0.9375
74
+ 72,model.layers.10.self_attn.v_proj,tx.10.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31396484375,0.39990234375,0.4619140625,0.39990234375,0.0390625,0.40625,0.9921875,0.39990234375,0.0,0.375,0.9375
75
+ 73,model.layers.10.self_attn.o_proj,tx.10.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.19482421875,0.36962890625,0.7080078125,0.39990234375,0.0078125,0.3828125,0.8671875,0.39990234375,0.0,0.375,1.0
76
+ 74,model.layers.10.mlp.gate_proj,tx.10.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3173828125,0.40087890625,0.45458984375,0.3999023735523224,0.015625,0.40625,0.640625,0.3999023735523224,0.0,0.375,1.0
77
+ 75,model.layers.10.mlp.up_proj,tx.10.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34375,0.400390625,0.447265625,0.3999023735523224,0.046875,0.3984375,0.6484375,0.3999023735523224,0.0,0.375,1.0
78
+ 76,model.layers.10.mlp.down_proj,tx.10.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.306640625,0.39990234375,0.48095703125,0.3999721109867096,0.0078125,0.40625,0.8046875,0.3999721109867096,0.0,0.375,1.0
79
+ 77,model.layers.11.self_attn.q_proj,tx.11.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31103515625,0.400390625,0.4638671875,0.39990234375,0.0,0.40625,0.7890625,0.39990234375,0.0,0.375,0.9375
80
+ 78,model.layers.11.self_attn.k_proj,tx.11.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3212890625,0.40087890625,0.4658203125,0.39990234375,0.0,0.40625,0.8671875,0.39990234375,0.0,0.375,0.9375
81
+ 79,model.layers.11.self_attn.v_proj,tx.11.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.33154296875,0.39990234375,0.455078125,0.39990234375,0.0546875,0.3984375,0.9921875,0.39990234375,0.0,0.375,0.9375
82
+ 80,model.layers.11.self_attn.o_proj,tx.11.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.240234375,0.3818359375,0.70361328125,0.39990234375,0.0078125,0.3984375,0.84375,0.39990234375,0.0,0.375,1.0
83
+ 81,model.layers.11.mlp.gate_proj,tx.11.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.326171875,0.40087890625,0.45166015625,0.3999023735523224,0.015625,0.40625,0.6484375,0.3999023735523224,0.0,0.375,1.0
84
+ 82,model.layers.11.mlp.up_proj,tx.11.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3359375,0.400390625,0.45361328125,0.3999023735523224,0.046875,0.3984375,0.6796875,0.3999023735523224,0.0,0.375,1.0
85
+ 83,model.layers.11.mlp.down_proj,tx.11.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.31689453125,0.3994140625,0.482421875,0.3999721109867096,0.0,0.40625,0.875,0.3999721109867096,0.0,0.375,1.0
86
+ 84,model.layers.12.self_attn.q_proj,tx.12.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31884765625,0.40087890625,0.46240234375,0.39990234375,0.0,0.40625,0.765625,0.39990234375,0.0,0.375,1.0
87
+ 85,model.layers.12.self_attn.k_proj,tx.12.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31103515625,0.40087890625,0.4580078125,0.39990234375,0.0078125,0.40625,0.8359375,0.39990234375,0.0,0.375,0.9375
88
+ 86,model.layers.12.self_attn.v_proj,tx.12.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32861328125,0.3994140625,0.46240234375,0.39990234375,0.046875,0.3984375,0.9921875,0.39990234375,0.0,0.375,0.9375
89
+ 87,model.layers.12.self_attn.o_proj,tx.12.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.18359375,0.40087890625,0.60302734375,0.39990234375,0.015625,0.40625,0.7734375,0.39990234375,0.0,0.375,1.0
90
+ 88,model.layers.12.mlp.gate_proj,tx.12.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.337890625,0.40087890625,0.44873046875,0.3999023735523224,0.0078125,0.40625,0.640625,0.3999023735523224,0.0,0.375,1.0
91
+ 89,model.layers.12.mlp.up_proj,tx.12.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3408203125,0.400390625,0.4443359375,0.3999023735523224,0.0390625,0.40625,0.609375,0.3999023735523224,0.0,0.375,1.0
92
+ 90,model.layers.12.mlp.down_proj,tx.12.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3095703125,0.400390625,0.48095703125,0.3999721109867096,0.0,0.40625,0.8046875,0.3999721109867096,0.0,0.375,1.0
93
+ 91,model.layers.13.self_attn.q_proj,tx.13.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31884765625,0.400390625,0.478515625,0.39990234375,0.0,0.40625,0.8046875,0.39990234375,0.0,0.375,1.0
94
+ 92,model.layers.13.self_attn.k_proj,tx.13.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3232421875,0.39990234375,0.46240234375,0.39990234375,0.0,0.40625,0.890625,0.39990234375,0.0,0.375,0.9375
95
+ 93,model.layers.13.self_attn.v_proj,tx.13.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.33837890625,0.39990234375,0.462890625,0.39990234375,0.03125,0.3984375,0.9921875,0.39990234375,0.0,0.375,0.9375
96
+ 94,model.layers.13.self_attn.o_proj,tx.13.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.20947265625,0.39794921875,0.6337890625,0.39990234375,0.015625,0.40625,0.796875,0.39990234375,0.0,0.375,1.0
97
+ 95,model.layers.13.mlp.gate_proj,tx.13.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33203125,0.40087890625,0.44873046875,0.3999023735523224,0.0078125,0.40625,0.640625,0.3999023735523224,0.0,0.375,0.9375
98
+ 96,model.layers.13.mlp.up_proj,tx.13.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33984375,0.400390625,0.44970703125,0.3999023735523224,0.0390625,0.40625,0.625,0.3999023735523224,0.0,0.375,1.0
99
+ 97,model.layers.13.mlp.down_proj,tx.13.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3134765625,0.400390625,0.48876953125,0.3999721109867096,0.0,0.40625,0.875,0.3999721109867096,0.0,0.375,1.0
100
+ 98,model.layers.14.self_attn.q_proj,tx.14.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.3212890625,0.400390625,0.466796875,0.39990234375,0.0078125,0.40625,0.7421875,0.39990234375,0.0,0.375,0.9375
101
+ 99,model.layers.14.self_attn.k_proj,tx.14.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3173828125,0.400390625,0.45751953125,0.39990234375,0.0078125,0.40625,0.921875,0.39990234375,0.0,0.375,0.9375
102
+ 100,model.layers.14.self_attn.v_proj,tx.14.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.310546875,0.400390625,0.478515625,0.39990234375,0.0078125,0.40625,0.9921875,0.39990234375,0.0,0.375,0.9375
103
+ 101,model.layers.14.self_attn.o_proj,tx.14.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.22021484375,0.3876953125,0.7177734375,0.39990234375,0.015625,0.390625,0.8984375,0.39990234375,0.0,0.375,1.0
104
+ 102,model.layers.14.mlp.gate_proj,tx.14.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3349609375,0.400390625,0.45458984375,0.3999023735523224,0.0078125,0.40625,0.6171875,0.3999023735523224,0.0,0.375,1.0
105
+ 103,model.layers.14.mlp.up_proj,tx.14.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3369140625,0.400390625,0.4482421875,0.3999023735523224,0.03125,0.40625,0.6171875,0.3999023735523224,0.0,0.375,1.0
106
+ 104,model.layers.14.mlp.down_proj,tx.14.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.29638671875,0.400390625,0.48681640625,0.3999721109867096,0.03125,0.40625,0.84375,0.3999721109867096,0.0,0.375,1.0
107
+ 105,model.layers.15.self_attn.q_proj,tx.15.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.3232421875,0.40087890625,0.4609375,0.39990234375,0.0078125,0.40625,0.734375,0.39990234375,0.0,0.375,1.0
108
+ 106,model.layers.15.self_attn.k_proj,tx.15.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3173828125,0.400390625,0.458984375,0.39990234375,0.0078125,0.4140625,0.8671875,0.39990234375,0.0,0.375,0.9375
109
+ 107,model.layers.15.self_attn.v_proj,tx.15.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3271484375,0.400390625,0.47412109375,0.39990234375,0.0234375,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
110
+ 108,model.layers.15.self_attn.o_proj,tx.15.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1796875,0.37255859375,0.76904296875,0.39990234375,0.015625,0.375,0.8984375,0.39990234375,0.0,0.375,1.0
111
+ 109,model.layers.15.mlp.gate_proj,tx.15.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.330078125,0.400390625,0.44873046875,0.3999023735523224,0.0,0.40625,0.6171875,0.3999023735523224,0.0,0.375,1.0
112
+ 110,model.layers.15.mlp.up_proj,tx.15.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3330078125,0.400390625,0.44970703125,0.3999023735523224,0.03125,0.40625,0.625,0.3999023735523224,0.0,0.375,0.9375
113
+ 111,model.layers.15.mlp.down_proj,tx.15.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.31591796875,0.39990234375,0.4912109375,0.3999721109867096,0.0390625,0.3984375,0.8671875,0.3999721109867096,0.0,0.375,1.0
114
+ 112,model.layers.16.self_attn.q_proj,tx.16.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31982421875,0.40087890625,0.46923828125,0.39990234375,0.0078125,0.4140625,0.734375,0.39990234375,0.0,0.375,0.9375
115
+ 113,model.layers.16.self_attn.k_proj,tx.16.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31884765625,0.400390625,0.46435546875,0.39990234375,0.0,0.4140625,0.890625,0.39990234375,0.0,0.375,0.9375
116
+ 114,model.layers.16.self_attn.v_proj,tx.16.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31201171875,0.39990234375,0.4794921875,0.39990234375,0.015625,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
117
+ 115,model.layers.16.self_attn.o_proj,tx.16.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1708984375,0.38037109375,0.73876953125,0.39990234375,0.0078125,0.390625,0.8671875,0.39990234375,0.0,0.375,1.0
118
+ 116,model.layers.16.mlp.gate_proj,tx.16.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33447265625,0.400390625,0.45947265625,0.3999023735523224,0.0078125,0.40625,0.6640625,0.3999023735523224,0.0,0.375,1.0
119
+ 117,model.layers.16.mlp.up_proj,tx.16.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3408203125,0.400390625,0.4482421875,0.3999023735523224,0.0234375,0.40625,0.6875,0.3999023735523224,0.0,0.375,1.0
120
+ 118,model.layers.16.mlp.down_proj,tx.16.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.31396484375,0.400390625,0.48193359375,0.3999721109867096,0.015625,0.40625,0.7734375,0.3999721109867096,0.0,0.375,1.0
121
+ 119,model.layers.17.self_attn.q_proj,tx.17.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.330078125,0.400390625,0.4609375,0.39990234375,0.0078125,0.4140625,0.7421875,0.39990234375,0.0,0.375,1.0
122
+ 120,model.layers.17.self_attn.k_proj,tx.17.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32421875,0.40087890625,0.46337890625,0.39990234375,0.0,0.4140625,0.8515625,0.39990234375,0.0,0.375,0.9375
123
+ 121,model.layers.17.self_attn.v_proj,tx.17.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.33447265625,0.400390625,0.47119140625,0.39990234375,0.03125,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
124
+ 122,model.layers.17.self_attn.o_proj,tx.17.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1875,0.3798828125,0.80615234375,0.39990234375,0.015625,0.3828125,0.9375,0.39990234375,0.0,0.375,1.0
125
+ 123,model.layers.17.mlp.gate_proj,tx.17.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3359375,0.400390625,0.44873046875,0.3999023735523224,0.0,0.40625,0.75,0.3999023735523224,0.0,0.375,1.0
126
+ 124,model.layers.17.mlp.up_proj,tx.17.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33984375,0.400390625,0.4501953125,0.3999023735523224,0.0234375,0.40625,0.6875,0.3999023735523224,0.0,0.375,1.0
127
+ 125,model.layers.17.mlp.down_proj,tx.17.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3095703125,0.39990234375,0.4921875,0.3999721109867096,0.0390625,0.40625,0.859375,0.3999721109867096,0.0,0.375,1.0
128
+ 126,model.layers.18.self_attn.q_proj,tx.18.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31298828125,0.40087890625,0.47265625,0.39990234375,0.0078125,0.4140625,0.8359375,0.39990234375,0.0,0.375,0.9375
129
+ 127,model.layers.18.self_attn.k_proj,tx.18.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32666015625,0.40087890625,0.48486328125,0.39990234375,0.0,0.4140625,0.90625,0.39990234375,0.0,0.375,1.0
130
+ 128,model.layers.18.self_attn.v_proj,tx.18.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3203125,0.400390625,0.46484375,0.39990234375,0.015625,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
131
+ 129,model.layers.18.self_attn.o_proj,tx.18.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1689453125,0.3203125,0.92822265625,0.39990234375,0.015625,0.34375,1.0,0.39990234375,0.0,0.375,1.0
132
+ 130,model.layers.18.mlp.gate_proj,tx.18.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33251953125,0.400390625,0.45166015625,0.3999023735523224,0.0078125,0.40625,0.6953125,0.3999023735523224,0.0,0.375,1.0
133
+ 131,model.layers.18.mlp.up_proj,tx.18.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34228515625,0.400390625,0.4482421875,0.3999023735523224,0.03125,0.40625,0.7421875,0.3999023735523224,0.0,0.375,0.9375
134
+ 132,model.layers.18.mlp.down_proj,tx.18.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.31787109375,0.39990234375,0.47607421875,0.3999721109867096,0.0078125,0.40625,0.796875,0.3999721109867096,0.0,0.375,1.0
135
+ 133,model.layers.19.self_attn.q_proj,tx.19.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.32763671875,0.40087890625,0.4697265625,0.39990234375,0.0,0.4140625,0.7109375,0.39990234375,0.0,0.375,0.9375
136
+ 134,model.layers.19.self_attn.k_proj,tx.19.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.306640625,0.40087890625,0.48828125,0.39990234375,0.0,0.4140625,0.8671875,0.39990234375,0.0,0.375,0.9375
137
+ 135,model.layers.19.self_attn.v_proj,tx.19.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3232421875,0.39990234375,0.4794921875,0.39990234375,0.015625,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
138
+ 136,model.layers.19.self_attn.o_proj,tx.19.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1298828125,0.3603515625,0.89990234375,0.39990234375,0.0078125,0.3671875,0.9921875,0.39990234375,0.0,0.375,1.0
139
+ 137,model.layers.19.mlp.gate_proj,tx.19.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33642578125,0.400390625,0.4521484375,0.3999023735523224,0.0,0.40625,0.6796875,0.3999023735523224,0.0,0.375,0.9375
140
+ 138,model.layers.19.mlp.up_proj,tx.19.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3349609375,0.400390625,0.45166015625,0.3999023735523224,0.015625,0.40625,0.7578125,0.3999023735523224,0.0,0.375,1.0
141
+ 139,model.layers.19.mlp.down_proj,tx.19.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.30810546875,0.400390625,0.48193359375,0.3999721109867096,0.0078125,0.40625,0.8125,0.3999721109867096,0.0,0.375,1.0
142
+ 140,model.layers.20.self_attn.q_proj,tx.20.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31591796875,0.40087890625,0.47119140625,0.39990234375,0.0078125,0.4140625,0.734375,0.39990234375,0.0,0.375,0.9375
143
+ 141,model.layers.20.self_attn.k_proj,tx.20.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31787109375,0.40087890625,0.4716796875,0.39990234375,0.0078125,0.4140625,0.875,0.39990234375,0.0,0.375,0.9375
144
+ 142,model.layers.20.self_attn.v_proj,tx.20.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32275390625,0.400390625,0.48046875,0.39990234375,0.015625,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
145
+ 143,model.layers.20.self_attn.o_proj,tx.20.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.18310546875,0.34619140625,0.912109375,0.39990234375,0.015625,0.3515625,1.0,0.39990234375,0.0,0.375,1.0
146
+ 144,model.layers.20.mlp.gate_proj,tx.20.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33447265625,0.400390625,0.45068359375,0.3999023735523224,0.0,0.40625,0.6953125,0.3999023735523224,0.0,0.375,0.9375
147
+ 145,model.layers.20.mlp.up_proj,tx.20.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3466796875,0.400390625,0.45263671875,0.3999023735523224,0.03125,0.40625,0.7734375,0.3999023735523224,0.0,0.375,1.0
148
+ 146,model.layers.20.mlp.down_proj,tx.20.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3173828125,0.400390625,0.48876953125,0.3999721109867096,0.0078125,0.40625,0.765625,0.3999721109867096,0.0,0.375,1.0
149
+ 147,model.layers.21.self_attn.q_proj,tx.21.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31689453125,0.40087890625,0.462890625,0.39990234375,0.0078125,0.4140625,0.671875,0.39990234375,0.0,0.375,1.0
150
+ 148,model.layers.21.self_attn.k_proj,tx.21.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31201171875,0.40087890625,0.47119140625,0.39990234375,0.0,0.4140625,0.7890625,0.39990234375,0.0,0.375,0.9375
151
+ 149,model.layers.21.self_attn.v_proj,tx.21.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3232421875,0.39990234375,0.47314453125,0.39990234375,0.015625,0.40625,1.0,0.39990234375,0.0,0.375,1.0
152
+ 150,model.layers.21.self_attn.o_proj,tx.21.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1513671875,0.36376953125,0.931640625,0.39990234375,0.0,0.375,1.0,0.39990234375,0.0,0.375,1.0
153
+ 151,model.layers.21.mlp.gate_proj,tx.21.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33740234375,0.400390625,0.44921875,0.3999023735523224,0.0078125,0.40625,0.7421875,0.3999023735523224,0.0,0.375,1.0
154
+ 152,model.layers.21.mlp.up_proj,tx.21.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.330078125,0.400390625,0.455078125,0.3999023735523224,0.0234375,0.40625,0.7578125,0.3999023735523224,0.0,0.375,1.0
155
+ 153,model.layers.21.mlp.down_proj,tx.21.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.30517578125,0.400390625,0.486328125,0.3999721109867096,0.03125,0.4140625,0.7421875,0.3999721109867096,0.0,0.375,1.0
156
+ 154,model.layers.22.self_attn.q_proj,tx.22.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31494140625,0.40087890625,0.4716796875,0.39990234375,0.0,0.4140625,0.6484375,0.39990234375,0.0,0.375,0.9375
157
+ 155,model.layers.22.self_attn.k_proj,tx.22.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3134765625,0.40087890625,0.46923828125,0.39990234375,0.0,0.4140625,0.8515625,0.39990234375,0.0,0.375,1.0
158
+ 156,model.layers.22.self_attn.v_proj,tx.22.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.32666015625,0.39990234375,0.4677734375,0.39990234375,0.0234375,0.4140625,1.0,0.39990234375,0.0,0.375,0.875
159
+ 157,model.layers.22.self_attn.o_proj,tx.22.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.171875,0.38134765625,0.78173828125,0.39990234375,0.0078125,0.3828125,0.9296875,0.39990234375,0.0,0.375,1.0
160
+ 158,model.layers.22.mlp.gate_proj,tx.22.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34521484375,0.400390625,0.453125,0.3999023735523224,0.0078125,0.40625,0.7109375,0.3999023735523224,0.0,0.375,1.0
161
+ 159,model.layers.22.mlp.up_proj,tx.22.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3408203125,0.400390625,0.45068359375,0.3999023735523224,0.015625,0.3984375,0.75,0.3999023735523224,0.0,0.375,0.9375
162
+ 160,model.layers.22.mlp.down_proj,tx.22.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.298828125,0.39990234375,0.47998046875,0.3999721109867096,0.015625,0.4140625,0.7265625,0.3999721109867096,0.0,0.375,1.0
163
+ 161,model.layers.23.self_attn.q_proj,tx.23.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31103515625,0.40087890625,0.46875,0.39990234375,0.0,0.4140625,0.7265625,0.39990234375,0.0,0.375,1.0
164
+ 162,model.layers.23.self_attn.k_proj,tx.23.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31884765625,0.40087890625,0.46240234375,0.39990234375,0.0,0.4140625,0.84375,0.39990234375,0.0,0.375,0.875
165
+ 163,model.layers.23.self_attn.v_proj,tx.23.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.330078125,0.39990234375,0.46484375,0.39990234375,0.0234375,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
166
+ 164,model.layers.23.self_attn.o_proj,tx.23.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.15966796875,0.388671875,0.72705078125,0.39990234375,0.0078125,0.3828125,0.890625,0.39990234375,0.0,0.375,1.0
167
+ 165,model.layers.23.mlp.gate_proj,tx.23.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34375,0.400390625,0.4462890625,0.3999023735523224,0.0,0.40625,0.7421875,0.3999023735523224,0.0,0.375,1.0
168
+ 166,model.layers.23.mlp.up_proj,tx.23.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3388671875,0.400390625,0.451171875,0.3999023735523224,0.0234375,0.3984375,0.7578125,0.3999023735523224,0.0,0.375,1.0
169
+ 167,model.layers.23.mlp.down_proj,tx.23.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.29638671875,0.400390625,0.484375,0.3999721109867096,0.0,0.4140625,0.6953125,0.3999721109867096,0.0,0.375,1.0
170
+ 168,model.layers.24.self_attn.q_proj,tx.24.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31689453125,0.400390625,0.45947265625,0.39990234375,0.0,0.4140625,0.734375,0.39990234375,0.0,0.375,0.9375
171
+ 169,model.layers.24.self_attn.k_proj,tx.24.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3173828125,0.400390625,0.46875,0.39990234375,0.0,0.4140625,0.84375,0.39990234375,0.0,0.375,0.9375
172
+ 170,model.layers.24.self_attn.v_proj,tx.24.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31884765625,0.39990234375,0.4873046875,0.39990234375,0.015625,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
173
+ 171,model.layers.24.self_attn.o_proj,tx.24.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.14599609375,0.34130859375,0.81201171875,0.39990234375,0.0078125,0.359375,0.9765625,0.39990234375,0.0,0.375,1.0
174
+ 172,model.layers.24.mlp.gate_proj,tx.24.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34228515625,0.400390625,0.44970703125,0.3999023735523224,0.0,0.40625,0.7421875,0.3999023735523224,0.0,0.375,1.0
175
+ 173,model.layers.24.mlp.up_proj,tx.24.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34521484375,0.400390625,0.45263671875,0.3999023735523224,0.015625,0.3984375,0.78125,0.3999023735523224,0.0,0.375,0.9375
176
+ 174,model.layers.24.mlp.down_proj,tx.24.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.3056640625,0.40087890625,0.4814453125,0.3999721109867096,0.0,0.421875,0.703125,0.3999721109867096,0.0,0.375,1.0
177
+ 175,model.layers.25.self_attn.q_proj,tx.25.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31884765625,0.400390625,0.46728515625,0.39990234375,0.0,0.4140625,0.6796875,0.39990234375,0.0,0.375,0.9375
178
+ 176,model.layers.25.self_attn.k_proj,tx.25.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.318359375,0.40087890625,0.490234375,0.39990234375,0.0,0.4140625,0.8125,0.39990234375,0.0,0.375,0.9375
179
+ 177,model.layers.25.self_attn.v_proj,tx.25.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31201171875,0.39990234375,0.47900390625,0.39990234375,0.015625,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
180
+ 178,model.layers.25.self_attn.o_proj,tx.25.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.140625,0.33642578125,0.95654296875,0.39990234375,0.015625,0.34375,1.0,0.39990234375,0.0,0.375,1.0
181
+ 179,model.layers.25.mlp.gate_proj,tx.25.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34375,0.400390625,0.44970703125,0.3999023735523224,0.0,0.40625,0.734375,0.3999023735523224,0.0,0.375,0.9375
182
+ 180,model.layers.25.mlp.up_proj,tx.25.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3427734375,0.400390625,0.44580078125,0.3999023735523224,0.0234375,0.3984375,0.7578125,0.3999023735523224,0.0,0.375,1.0
183
+ 181,model.layers.25.mlp.down_proj,tx.25.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.2890625,0.400390625,0.49267578125,0.3999721109867096,0.015625,0.421875,0.671875,0.3999721109867096,0.0,0.375,1.0
184
+ 182,model.layers.26.self_attn.q_proj,tx.26.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.3212890625,0.40087890625,0.4677734375,0.39990234375,0.0,0.4140625,0.6484375,0.39990234375,0.0,0.375,0.9375
185
+ 183,model.layers.26.self_attn.k_proj,tx.26.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31103515625,0.40087890625,0.49462890625,0.39990234375,0.0,0.4140625,0.8359375,0.39990234375,0.0,0.375,0.9375
186
+ 184,model.layers.26.self_attn.v_proj,tx.26.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31201171875,0.400390625,0.47509765625,0.39990234375,0.0078125,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
187
+ 185,model.layers.26.self_attn.o_proj,tx.26.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.1533203125,0.35302734375,0.82568359375,0.39990234375,0.015625,0.3515625,0.9296875,0.39990234375,0.0,0.375,1.0
188
+ 186,model.layers.26.mlp.gate_proj,tx.26.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33740234375,0.400390625,0.44970703125,0.3999023735523224,0.0078125,0.40625,0.75,0.3999023735523224,0.0,0.375,1.0
189
+ 187,model.layers.26.mlp.up_proj,tx.26.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3369140625,0.400390625,0.447265625,0.3999023735523224,0.015625,0.3984375,0.78125,0.3999023735523224,0.0,0.375,1.0
190
+ 188,model.layers.26.mlp.down_proj,tx.26.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.29248046875,0.4013671875,0.48974609375,0.3999721109867096,0.015625,0.421875,0.7421875,0.3999721109867096,0.0,0.375,1.0
191
+ 189,model.layers.27.self_attn.q_proj,tx.27.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.3232421875,0.400390625,0.48046875,0.39990234375,0.0,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
192
+ 190,model.layers.27.self_attn.k_proj,tx.27.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3232421875,0.400390625,0.4892578125,0.39990234375,0.0,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
193
+ 191,model.layers.27.self_attn.v_proj,tx.27.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31982421875,0.40087890625,0.47998046875,0.39990234375,0.0234375,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
194
+ 192,model.layers.27.self_attn.o_proj,tx.27.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.14306640625,0.37255859375,0.794921875,0.39990234375,0.0078125,0.359375,0.9609375,0.39990234375,0.0,0.375,1.0
195
+ 193,model.layers.27.mlp.gate_proj,tx.27.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33349609375,0.400390625,0.4521484375,0.3999023735523224,0.0078125,0.40625,0.7109375,0.3999023735523224,0.0,0.375,1.0
196
+ 194,model.layers.27.mlp.up_proj,tx.27.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34228515625,0.400390625,0.4560546875,0.3999023735523224,0.0078125,0.3984375,0.7890625,0.3999023735523224,0.0,0.375,1.0
197
+ 195,model.layers.27.mlp.down_proj,tx.27.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.29736328125,0.40087890625,0.486328125,0.3999721109867096,0.0,0.421875,0.6953125,0.3999721109867096,0.0,0.375,1.0
198
+ 196,model.layers.28.self_attn.q_proj,tx.28.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.3056640625,0.4013671875,0.4599609375,0.39990234375,0.0,0.4140625,0.6484375,0.39990234375,0.0,0.375,1.0
199
+ 197,model.layers.28.self_attn.k_proj,tx.28.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31640625,0.40087890625,0.46630859375,0.39990234375,0.0,0.4140625,0.8515625,0.39990234375,0.0,0.375,0.9375
200
+ 198,model.layers.28.self_attn.v_proj,tx.28.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.3291015625,0.400390625,0.47119140625,0.39990234375,0.0234375,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
201
+ 199,model.layers.28.self_attn.o_proj,tx.28.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.130859375,0.38916015625,0.82421875,0.39990234375,0.015625,0.390625,0.9140625,0.39990234375,0.0,0.375,1.0
202
+ 200,model.layers.28.mlp.gate_proj,tx.28.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33544921875,0.40087890625,0.455078125,0.3999023735523224,0.0,0.40625,0.71875,0.3999023735523224,0.0,0.375,1.0
203
+ 201,model.layers.28.mlp.up_proj,tx.28.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.34033203125,0.400390625,0.45068359375,0.3999023735523224,0.0234375,0.40625,0.7421875,0.3999023735523224,0.0,0.375,1.0
204
+ 202,model.layers.28.mlp.down_proj,tx.28.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.28857421875,0.4013671875,0.486328125,0.3999721109867096,0.0078125,0.4296875,0.6953125,0.3999721109867096,0.0,0.375,1.0
205
+ 203,model.layers.29.self_attn.q_proj,tx.29.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.31787109375,0.40087890625,0.490234375,0.39990234375,0.0,0.4140625,1.0,0.39990234375,0.0,0.375,1.0
206
+ 204,model.layers.29.self_attn.k_proj,tx.29.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31640625,0.40087890625,0.46826171875,0.39990234375,0.0,0.4140625,1.0,0.39990234375,0.0,0.375,0.9375
207
+ 205,model.layers.29.self_attn.v_proj,tx.29.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.33349609375,0.39990234375,0.46826171875,0.39990234375,0.0078125,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
208
+ 206,model.layers.29.self_attn.o_proj,tx.29.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.08544921875,0.421875,0.71240234375,0.39990234375,0.0078125,0.4140625,0.8828125,0.39990234375,0.0,0.375,1.0
209
+ 207,model.layers.29.mlp.gate_proj,tx.29.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.337890625,0.400390625,0.44921875,0.3999023735523224,0.0078125,0.40625,0.7734375,0.3999023735523224,0.0,0.375,1.0
210
+ 208,model.layers.29.mlp.up_proj,tx.29.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3349609375,0.400390625,0.4501953125,0.3999023735523224,0.015625,0.40625,0.7421875,0.3999023735523224,0.0,0.375,1.0
211
+ 209,model.layers.29.mlp.down_proj,tx.29.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.306640625,0.4013671875,0.49267578125,0.3999721109867096,0.0,0.4296875,0.7421875,0.3999721109867096,0.0,0.375,1.0
212
+ 210,model.layers.30.self_attn.q_proj,tx.30.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.298828125,0.40087890625,0.4736328125,0.39990234375,0.0,0.4140625,0.65625,0.39990234375,0.0,0.375,0.9375
213
+ 211,model.layers.30.self_attn.k_proj,tx.30.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31103515625,0.40087890625,0.48095703125,0.39990234375,0.0,0.421875,0.859375,0.39990234375,0.0,0.375,0.9375
214
+ 212,model.layers.30.self_attn.v_proj,tx.30.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.31005859375,0.400390625,0.48583984375,0.39990234375,0.0078125,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
215
+ 213,model.layers.30.self_attn.o_proj,tx.30.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.125,0.357421875,0.9443359375,0.39990234375,0.03125,0.359375,0.9921875,0.39990234375,0.0,0.375,1.0
216
+ 214,model.layers.30.mlp.gate_proj,tx.30.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33935546875,0.400390625,0.4638671875,0.3999023735523224,0.0078125,0.40625,0.828125,0.3999023735523224,0.0,0.375,0.9375
217
+ 215,model.layers.30.mlp.up_proj,tx.30.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.33740234375,0.40087890625,0.4580078125,0.3999023735523224,0.015625,0.40625,0.8046875,0.3999023735523224,0.0,0.375,1.0
218
+ 216,model.layers.30.mlp.down_proj,tx.30.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.27734375,0.4013671875,0.49951171875,0.3999721109867096,0.0,0.4296875,0.8125,0.3999721109867096,0.0,0.375,0.9375
219
+ 217,model.layers.31.self_attn.q_proj,tx.31.attn.q,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.302734375,0.400390625,0.4716796875,0.39990234375,0.0,0.4140625,0.9375,0.39990234375,0.0,0.375,1.0
220
+ 218,model.layers.31.self_attn.k_proj,tx.31.attn.k,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.306640625,0.40087890625,0.4638671875,0.39990234375,0.0,0.4140625,0.90625,0.39990234375,0.0,0.375,0.9375
221
+ 219,model.layers.31.self_attn.v_proj,tx.31.attn.v,Linear,weight,"[1024, 4096]",4194304,2516992,0.39990234375,"(128, 16)",8 x 256,2048,0.39990234375,0.29638671875,0.400390625,0.46142578125,0.39990234375,0.0078125,0.40625,1.0,0.39990234375,0.0,0.375,0.9375
222
+ 220,model.layers.31.self_attn.o_proj,tx.31.attn.o,Linear,weight,"[4096, 4096]",16777216,10067968,0.39990234375,"(128, 16)",32 x 256,8192,0.39990234375,0.05615234375,0.361328125,0.888671875,0.39990234375,0.0,0.3671875,0.9921875,0.39990234375,0.0,0.375,1.0
223
+ 221,model.layers.31.mlp.gate_proj,tx.31.mlp.gate,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.3427734375,0.400390625,0.45947265625,0.3999023735523224,0.0390625,0.40625,0.84375,0.3999023735523224,0.0,0.375,1.0
224
+ 222,model.layers.31.mlp.up_proj,tx.31.mlp.up,Linear,weight,"[14336, 4096]",58720256,35237888,0.39990234375,"(128, 16)",112 x 256,28672,0.3999023735523224,0.337890625,0.400390625,0.451171875,0.3999023735523224,0.0390625,0.40625,0.796875,0.3999023735523224,0.0,0.375,1.0
225
+ 223,model.layers.31.mlp.down_proj,tx.31.mlp.down,Linear,weight,"[4096, 14336]",58720256,35233792,0.3999720811843872,"(128, 16)",32 x 896,28672,0.3999721109867096,0.2470703125,0.39990234375,0.5361328125,0.3999721109867096,0.0,0.453125,0.890625,0.3999721109867096,0.0,0.375,1.0
226
+ 224,lm_head,lm_head,Linear,weight,"[128256, 4096]",525336576,525336576,0.0,"(128, 16)",1002 x 256,256512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sparsity_report_Meta-Llama-3.1-8B-wanda-unstructured-0.5.csv ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ row,layer_id,short_id,layer_type,param_type,shape,nparam,nnz,sparsity,tile_shape,n_tile,n_tile_total,tile_avg,tile_min,tile_med,tile_max,col_avg,col_min,col_med,col_max,row_avg,row_min,row_med,row_max
2
+ 0,model.layers.0.self_attn.q_proj,tx.0.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.15673828125,0.49853515625,0.80322265625,0.5,0.0,0.3984375,1.0,0.5,0.0625,0.5,1.0
3
+ 1,model.layers.0.self_attn.k_proj,tx.0.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.1787109375,0.49755859375,0.79833984375,0.5,0.0,0.3984375,1.0,0.5,0.0625,0.5,1.0
4
+ 2,model.layers.0.self_attn.v_proj,tx.0.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.2021484375,0.50244140625,0.7734375,0.5,0.0,0.3359375,1.0,0.5,0.0625,0.5,1.0
5
+ 3,model.layers.0.self_attn.o_proj,tx.0.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.06494140625,0.45361328125,1.0,0.5,0.0,0.4375,1.0,0.5,0.0,0.5,1.0
6
+ 4,model.layers.0.mlp.gate_proj,tx.0.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.419921875,0.49951171875,0.587890625,0.5,0.0625,0.5078125,1.0,0.5,0.0,0.5,1.0
7
+ 5,model.layers.0.mlp.up_proj,tx.0.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.4345703125,0.49951171875,0.59326171875,0.5,0.0859375,0.5078125,1.0,0.5,0.0,0.5,1.0
8
+ 6,model.layers.0.mlp.down_proj,tx.0.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.341796875,0.5009765625,0.619140625,0.5,0.03125,0.5078125,0.9140625,0.5,0.0,0.5,1.0
9
+ 7,model.layers.1.self_attn.q_proj,tx.1.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.3037109375,0.5009765625,0.69140625,0.5,0.0,0.5390625,1.0,0.5,0.0,0.5,1.0
10
+ 8,model.layers.1.self_attn.k_proj,tx.1.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.29638671875,0.50048828125,0.66162109375,0.5,0.0,0.546875,1.0,0.5,0.0,0.5,1.0
11
+ 9,model.layers.1.self_attn.v_proj,tx.1.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.31298828125,0.5009765625,0.6376953125,0.5,0.0078125,0.5390625,1.0,0.5,0.0,0.5,1.0
12
+ 10,model.layers.1.self_attn.o_proj,tx.1.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.2197265625,0.50732421875,0.9677734375,0.5,0.03125,0.4609375,1.0,0.5,0.0,0.5,1.0
13
+ 11,model.layers.1.mlp.gate_proj,tx.1.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43115234375,0.49951171875,0.58544921875,0.5,0.1328125,0.5078125,1.0,0.5,0.0,0.5,1.0
14
+ 12,model.layers.1.mlp.up_proj,tx.1.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42919921875,0.4990234375,0.57763671875,0.5,0.1171875,0.5078125,1.0,0.5,0.0,0.5,1.0
15
+ 13,model.layers.1.mlp.down_proj,tx.1.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.40087890625,0.50146484375,0.61083984375,0.5,0.0,0.5234375,0.859375,0.5,0.0,0.5,1.0
16
+ 14,model.layers.2.self_attn.q_proj,tx.2.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.3681640625,0.501953125,0.63525390625,0.5,0.0,0.5390625,1.0,0.5,0.0,0.5,1.0
17
+ 15,model.layers.2.self_attn.k_proj,tx.2.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.3515625,0.50244140625,0.66455078125,0.5,0.0078125,0.546875,1.0,0.5,0.0,0.5,1.0
18
+ 16,model.layers.2.self_attn.v_proj,tx.2.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.3994140625,0.501953125,0.6064453125,0.5,0.03125,0.53125,1.0,0.5,0.0,0.5,1.0
19
+ 17,model.layers.2.self_attn.o_proj,tx.2.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.2265625,0.4404296875,0.96630859375,0.5,0.0078125,0.453125,1.0,0.5,0.0,0.5,1.0
20
+ 18,model.layers.2.mlp.gate_proj,tx.2.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.4375,0.49951171875,0.57275390625,0.5,0.1015625,0.5,1.0,0.5,0.0,0.5,1.0
21
+ 19,model.layers.2.mlp.up_proj,tx.2.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.4375,0.49951171875,0.5771484375,0.5,0.15625,0.5,1.0,0.5,0.0,0.5,1.0
22
+ 20,model.layers.2.mlp.down_proj,tx.2.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.38037109375,0.50048828125,0.62890625,0.5,0.015625,0.515625,0.8515625,0.5,0.0,0.5,1.0
23
+ 21,model.layers.3.self_attn.q_proj,tx.3.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.3984375,0.5009765625,0.59765625,0.5,0.015625,0.5234375,1.0,0.5,0.0,0.5,1.0
24
+ 22,model.layers.3.self_attn.k_proj,tx.3.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.3896484375,0.501953125,0.59423828125,0.5,0.015625,0.53125,1.0,0.5,0.0,0.5,1.0
25
+ 23,model.layers.3.self_attn.v_proj,tx.3.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.41064453125,0.5009765625,0.587890625,0.5,0.046875,0.515625,1.0,0.5,0.0,0.5,1.0
26
+ 24,model.layers.3.self_attn.o_proj,tx.3.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.25048828125,0.486328125,0.8359375,0.5,0.0390625,0.4921875,0.9765625,0.5,0.0,0.5,1.0
27
+ 25,model.layers.3.mlp.gate_proj,tx.3.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43798828125,0.5,0.576171875,0.5,0.0625,0.5078125,1.0,0.5,0.0,0.5,1.0
28
+ 26,model.layers.3.mlp.up_proj,tx.3.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43994140625,0.49951171875,0.5712890625,0.5,0.140625,0.5,1.0,0.5,0.0,0.5,1.0
29
+ 27,model.layers.3.mlp.down_proj,tx.3.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.39306640625,0.5,0.59765625,0.5,0.0234375,0.515625,0.859375,0.5,0.0,0.5,1.0
30
+ 28,model.layers.4.self_attn.q_proj,tx.4.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.37890625,0.5009765625,0.607421875,0.5,0.0078125,0.53125,1.0,0.5,0.0,0.5,1.0
31
+ 29,model.layers.4.self_attn.k_proj,tx.4.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.3759765625,0.50146484375,0.60986328125,0.5,0.0078125,0.53125,1.0,0.5,0.0,0.5,1.0
32
+ 30,model.layers.4.self_attn.v_proj,tx.4.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.40869140625,0.50048828125,0.62255859375,0.5,0.0390625,0.515625,1.0,0.5,0.0,0.5,1.0
33
+ 31,model.layers.4.self_attn.o_proj,tx.4.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.2314453125,0.462890625,0.95458984375,0.5,0.0078125,0.4609375,1.0,0.5,0.0,0.5,1.0
34
+ 32,model.layers.4.mlp.gate_proj,tx.4.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43603515625,0.50048828125,0.5625,0.5,0.0390625,0.5078125,1.0,0.5,0.0,0.5,1.0
35
+ 33,model.layers.4.mlp.up_proj,tx.4.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43896484375,0.50048828125,0.5693359375,0.5,0.140625,0.5078125,1.0,0.5,0.0,0.5,1.0
36
+ 34,model.layers.4.mlp.down_proj,tx.4.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.40380859375,0.5009765625,0.591796875,0.5,0.0234375,0.5,0.96875,0.5,0.0,0.5,1.0
37
+ 35,model.layers.5.self_attn.q_proj,tx.5.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.37841796875,0.50048828125,0.60400390625,0.5,0.0078125,0.53125,0.9375,0.5,0.0,0.5,1.0
38
+ 36,model.layers.5.self_attn.k_proj,tx.5.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.37841796875,0.5009765625,0.59912109375,0.5,0.0078125,0.53125,0.84375,0.5,0.0,0.5,1.0
39
+ 37,model.layers.5.self_attn.v_proj,tx.5.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.40966796875,0.50048828125,0.60107421875,0.5,0.03125,0.515625,1.0,0.5,0.0,0.5,1.0
40
+ 38,model.layers.5.self_attn.o_proj,tx.5.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.18359375,0.4443359375,0.92919921875,0.5,0.0078125,0.46875,1.0,0.5,0.0,0.5,1.0
41
+ 39,model.layers.5.mlp.gate_proj,tx.5.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.41552734375,0.50048828125,0.55859375,0.5,0.03125,0.5078125,1.0,0.5,0.0,0.5,1.0
42
+ 40,model.layers.5.mlp.up_proj,tx.5.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.435546875,0.50048828125,0.5693359375,0.5,0.1015625,0.5,1.0,0.5,0.0,0.5,1.0
43
+ 41,model.layers.5.mlp.down_proj,tx.5.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.4111328125,0.50048828125,0.5947265625,0.5,0.0,0.5,0.9296875,0.5,0.0,0.5,1.0
44
+ 42,model.layers.6.self_attn.q_proj,tx.6.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.3955078125,0.50048828125,0.59619140625,0.5,0.0078125,0.5234375,0.890625,0.5,0.0,0.5,1.0
45
+ 43,model.layers.6.self_attn.k_proj,tx.6.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.388671875,0.50146484375,0.59228515625,0.5,0.015625,0.53125,0.8984375,0.5,0.0,0.5,1.0
46
+ 44,model.layers.6.self_attn.v_proj,tx.6.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.41650390625,0.50048828125,0.5712890625,0.5,0.03125,0.515625,1.0,0.5,0.0,0.5,1.0
47
+ 45,model.layers.6.self_attn.o_proj,tx.6.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.24658203125,0.49365234375,0.841796875,0.5,0.0078125,0.4921875,0.984375,0.5,0.0,0.5,1.0
48
+ 46,model.layers.6.mlp.gate_proj,tx.6.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42626953125,0.5009765625,0.56103515625,0.5,0.0390625,0.5078125,1.0,0.5,0.0,0.5,1.0
49
+ 47,model.layers.6.mlp.up_proj,tx.6.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43212890625,0.50048828125,0.55224609375,0.5,0.09375,0.5,1.0,0.5,0.0,0.5,1.0
50
+ 48,model.layers.6.mlp.down_proj,tx.6.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.41845703125,0.5,0.6025390625,0.5,0.0,0.5,0.9609375,0.5,0.0,0.5,1.0
51
+ 49,model.layers.7.self_attn.q_proj,tx.7.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.3984375,0.5009765625,0.57958984375,0.5,0.015625,0.515625,0.875,0.5,0.0,0.5,1.0
52
+ 50,model.layers.7.self_attn.k_proj,tx.7.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.40869140625,0.50048828125,0.5771484375,0.5,0.0,0.5234375,0.8671875,0.5,0.0,0.5,1.0
53
+ 51,model.layers.7.self_attn.v_proj,tx.7.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.4150390625,0.50048828125,0.5673828125,0.5,0.03125,0.5078125,0.9921875,0.5,0.0,0.5,1.0
54
+ 52,model.layers.7.self_attn.o_proj,tx.7.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.3505859375,0.49560546875,0.70361328125,0.5,0.015625,0.5078125,0.9765625,0.5,0.0,0.5,1.0
55
+ 53,model.layers.7.mlp.gate_proj,tx.7.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.4189453125,0.5009765625,0.55224609375,0.5,0.0234375,0.5078125,0.71875,0.5,0.0,0.5,1.0
56
+ 54,model.layers.7.mlp.up_proj,tx.7.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.4365234375,0.50048828125,0.5537109375,0.5,0.09375,0.5,0.7109375,0.5,0.0,0.5,1.0
57
+ 55,model.layers.7.mlp.down_proj,tx.7.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.408203125,0.5009765625,0.5927734375,0.5,0.0,0.5,0.96875,0.5,0.0,0.5,1.0
58
+ 56,model.layers.8.self_attn.q_proj,tx.8.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.4013671875,0.5009765625,0.59423828125,0.5,0.0,0.515625,0.8359375,0.5,0.0,0.5,1.0
59
+ 57,model.layers.8.self_attn.k_proj,tx.8.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.39208984375,0.5009765625,0.5791015625,0.5,0.0,0.515625,0.890625,0.5,0.0,0.5,1.0
60
+ 58,model.layers.8.self_attn.v_proj,tx.8.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.42626953125,0.50048828125,0.56982421875,0.5,0.03125,0.5078125,0.9921875,0.5,0.0,0.5,1.0
61
+ 59,model.layers.8.self_attn.o_proj,tx.8.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.275390625,0.49755859375,0.68359375,0.5,0.0234375,0.5,0.8828125,0.5,0.0,0.5,1.0
62
+ 60,model.layers.8.mlp.gate_proj,tx.8.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.421875,0.50146484375,0.560546875,0.5,0.0078125,0.5078125,1.0,0.5,0.0,0.5,1.0
63
+ 61,model.layers.8.mlp.up_proj,tx.8.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42724609375,0.50048828125,0.56005859375,0.5,0.0703125,0.5,1.0,0.5,0.0,0.5,1.0
64
+ 62,model.layers.8.mlp.down_proj,tx.8.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.40966796875,0.50048828125,0.58984375,0.5,0.0546875,0.5078125,0.9375,0.5,0.0,0.5,1.0
65
+ 63,model.layers.9.self_attn.q_proj,tx.9.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.40478515625,0.50146484375,0.57958984375,0.5,0.0,0.5078125,0.875,0.5,0.0,0.5,1.0
66
+ 64,model.layers.9.self_attn.k_proj,tx.9.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.3994140625,0.50048828125,0.57958984375,0.5,0.0078125,0.515625,0.8828125,0.5,0.0,0.5,1.0
67
+ 65,model.layers.9.self_attn.v_proj,tx.9.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.42431640625,0.5,0.56201171875,0.5,0.046875,0.5,0.9921875,0.5,0.0,0.5,1.0
68
+ 66,model.layers.9.self_attn.o_proj,tx.9.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.2939453125,0.49560546875,0.7197265625,0.5,0.0390625,0.5078125,0.8671875,0.5,0.0,0.5,1.0
69
+ 67,model.layers.9.mlp.gate_proj,tx.9.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42236328125,0.50146484375,0.548828125,0.5,0.0234375,0.5078125,0.734375,0.5,0.0,0.5,1.0
70
+ 68,model.layers.9.mlp.up_proj,tx.9.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42919921875,0.5009765625,0.5537109375,0.5,0.0546875,0.5,0.7421875,0.5,0.0,0.5,1.0
71
+ 69,model.layers.9.mlp.down_proj,tx.9.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.40380859375,0.50048828125,0.58740234375,0.5,0.0078125,0.5078125,0.890625,0.5,0.0,0.5,1.0
72
+ 70,model.layers.10.self_attn.q_proj,tx.10.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.40234375,0.5009765625,0.57958984375,0.5,0.0,0.515625,0.8671875,0.5,0.0,0.5,1.0
73
+ 71,model.layers.10.self_attn.k_proj,tx.10.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.39306640625,0.50146484375,0.57470703125,0.5,0.0,0.515625,0.921875,0.5,0.0,0.5,1.0
74
+ 72,model.layers.10.self_attn.v_proj,tx.10.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.4013671875,0.50048828125,0.56982421875,0.5,0.0546875,0.5078125,0.9921875,0.5,0.0,0.5,1.0
75
+ 73,model.layers.10.self_attn.o_proj,tx.10.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.259765625,0.47900390625,0.7919921875,0.5,0.015625,0.4921875,0.9140625,0.5,0.0,0.5,1.0
76
+ 74,model.layers.10.mlp.gate_proj,tx.10.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.41748046875,0.50146484375,0.5498046875,0.5,0.03125,0.5078125,0.75,0.5,0.0,0.5,1.0
77
+ 75,model.layers.10.mlp.up_proj,tx.10.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42822265625,0.50048828125,0.5556640625,0.5,0.078125,0.5,0.7578125,0.5,0.0,0.5,1.0
78
+ 76,model.layers.10.mlp.down_proj,tx.10.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.3984375,0.50048828125,0.5927734375,0.5,0.0078125,0.5078125,0.90625,0.5,0.0,0.5,1.0
79
+ 77,model.layers.11.self_attn.q_proj,tx.11.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.4013671875,0.50048828125,0.56494140625,0.5,0.0078125,0.5078125,0.890625,0.5,0.0,0.5,1.0
80
+ 78,model.layers.11.self_attn.k_proj,tx.11.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.4091796875,0.50146484375,0.5712890625,0.5,0.0078125,0.515625,0.9296875,0.5,0.0,0.5,1.0
81
+ 79,model.layers.11.self_attn.v_proj,tx.11.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.41796875,0.50048828125,0.5625,0.5,0.078125,0.5078125,0.9921875,0.5,0.0,0.5,1.0
82
+ 80,model.layers.11.self_attn.o_proj,tx.11.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.33447265625,0.48876953125,0.7529296875,0.5,0.0234375,0.5078125,0.8828125,0.5,0.0,0.5,1.0
83
+ 81,model.layers.11.mlp.gate_proj,tx.11.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42041015625,0.50146484375,0.552734375,0.5,0.015625,0.5078125,0.7578125,0.5,0.0,0.5,1.0
84
+ 82,model.layers.11.mlp.up_proj,tx.11.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.4296875,0.50048828125,0.5546875,0.5,0.078125,0.5,0.7734375,0.5,0.0,0.5,1.0
85
+ 83,model.layers.11.mlp.down_proj,tx.11.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.41064453125,0.5,0.5927734375,0.5,0.0,0.5078125,0.921875,0.5,0.0,0.5,1.0
86
+ 84,model.layers.12.self_attn.q_proj,tx.12.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.3984375,0.50146484375,0.5791015625,0.5,0.0,0.5078125,0.875,0.5,0.0,0.5,1.0
87
+ 85,model.layers.12.self_attn.k_proj,tx.12.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.39501953125,0.50146484375,0.568359375,0.5,0.0078125,0.5078125,0.9296875,0.5,0.0,0.5,1.0
88
+ 86,model.layers.12.self_attn.v_proj,tx.12.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.423828125,0.5,0.56005859375,0.5,0.0625,0.5,0.9921875,0.5,0.0,0.5,1.0
89
+ 87,model.layers.12.self_attn.o_proj,tx.12.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.25830078125,0.5009765625,0.7314453125,0.5,0.03125,0.5078125,0.890625,0.5,0.0,0.5,1.0
90
+ 88,model.layers.12.mlp.gate_proj,tx.12.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.4306640625,0.50146484375,0.55126953125,0.5,0.0234375,0.5078125,0.71875,0.5,0.0,0.5,1.0
91
+ 89,model.layers.12.mlp.up_proj,tx.12.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43212890625,0.5009765625,0.556640625,0.5,0.0625,0.5,0.7265625,0.5,0.0,0.5,1.0
92
+ 90,model.layers.12.mlp.down_proj,tx.12.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.39013671875,0.50048828125,0.6044921875,0.5,0.0078125,0.5078125,0.921875,0.5,0.0,0.5,1.0
93
+ 91,model.layers.13.self_attn.q_proj,tx.13.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.4091796875,0.50048828125,0.5791015625,0.5,0.0078125,0.5078125,0.90625,0.5,0.0,0.5,1.0
94
+ 92,model.layers.13.self_attn.k_proj,tx.13.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.41162109375,0.5009765625,0.57470703125,0.5,0.0078125,0.515625,0.953125,0.5,0.0,0.5,1.0
95
+ 93,model.layers.13.self_attn.v_proj,tx.13.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.4326171875,0.50048828125,0.568359375,0.5,0.078125,0.5,0.9921875,0.5,0.0,0.5,1.0
96
+ 94,model.layers.13.self_attn.o_proj,tx.13.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.28125,0.50244140625,0.71728515625,0.5,0.03125,0.515625,0.875,0.5,0.0,0.5,1.0
97
+ 95,model.layers.13.mlp.gate_proj,tx.13.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.41943359375,0.50146484375,0.55078125,0.5,0.0234375,0.5078125,0.7265625,0.5,0.0,0.5,1.0
98
+ 96,model.layers.13.mlp.up_proj,tx.13.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43701171875,0.50048828125,0.5537109375,0.5,0.046875,0.5078125,0.7109375,0.5,0.0,0.5,1.0
99
+ 97,model.layers.13.mlp.down_proj,tx.13.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.38916015625,0.50048828125,0.60595703125,0.5,0.0,0.5078125,0.9296875,0.5,0.0,0.5,1.0
100
+ 98,model.layers.14.self_attn.q_proj,tx.14.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.40087890625,0.5009765625,0.5771484375,0.5,0.015625,0.5078125,0.8828125,0.5,0.0,0.5,1.0
101
+ 99,model.layers.14.self_attn.k_proj,tx.14.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.4072265625,0.50048828125,0.56640625,0.5,0.0078125,0.515625,0.984375,0.5,0.0,0.5,1.0
102
+ 100,model.layers.14.self_attn.v_proj,tx.14.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.39697265625,0.50048828125,0.57763671875,0.5,0.0234375,0.5078125,0.9921875,0.5,0.0,0.5,1.0
103
+ 101,model.layers.14.self_attn.o_proj,tx.14.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.30224609375,0.4921875,0.79345703125,0.5,0.0234375,0.5,0.9609375,0.5,0.0,0.5,1.0
104
+ 102,model.layers.14.mlp.gate_proj,tx.14.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42724609375,0.5009765625,0.56201171875,0.5,0.0234375,0.5078125,0.71875,0.5,0.0,0.5,1.0
105
+ 103,model.layers.14.mlp.up_proj,tx.14.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43408203125,0.50048828125,0.556640625,0.5,0.0390625,0.5078125,0.7109375,0.5,0.0,0.5,1.0
106
+ 104,model.layers.14.mlp.down_proj,tx.14.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.3740234375,0.50048828125,0.59814453125,0.5,0.0546875,0.5078125,0.953125,0.5,0.0,0.5,1.0
107
+ 105,model.layers.15.self_attn.q_proj,tx.15.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.4091796875,0.50146484375,0.57470703125,0.5,0.0078125,0.515625,0.875,0.5,0.0,0.5,1.0
108
+ 106,model.layers.15.self_attn.k_proj,tx.15.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.4033203125,0.50146484375,0.57177734375,0.5,0.015625,0.515625,0.9296875,0.5,0.0,0.5,1.0
109
+ 107,model.layers.15.self_attn.v_proj,tx.15.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.40625,0.50146484375,0.5751953125,0.5,0.0234375,0.515625,1.0,0.5,0.0,0.5,1.0
110
+ 108,model.layers.15.self_attn.o_proj,tx.15.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.259765625,0.46533203125,0.84521484375,0.5,0.03125,0.4765625,0.953125,0.5,0.0,0.5,1.0
111
+ 109,model.layers.15.mlp.gate_proj,tx.15.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42578125,0.5009765625,0.556640625,0.5,0.0078125,0.5078125,0.7265625,0.5,0.0,0.5,1.0
112
+ 110,model.layers.15.mlp.up_proj,tx.15.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42529296875,0.50048828125,0.5517578125,0.5,0.03125,0.5078125,0.734375,0.5,0.0,0.5,1.0
113
+ 111,model.layers.15.mlp.down_proj,tx.15.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.39697265625,0.5,0.59716796875,0.5,0.0625,0.5078125,0.9375,0.5,0.0,0.5,1.0
114
+ 112,model.layers.16.self_attn.q_proj,tx.16.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.40478515625,0.50146484375,0.56982421875,0.5,0.015625,0.515625,0.859375,0.5,0.0,0.5,1.0
115
+ 113,model.layers.16.self_attn.k_proj,tx.16.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.4072265625,0.50146484375,0.5693359375,0.5,0.0,0.5234375,0.9375,0.5,0.0,0.5,1.0
116
+ 114,model.layers.16.self_attn.v_proj,tx.16.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.39013671875,0.50048828125,0.6015625,0.5,0.0234375,0.515625,1.0,0.5,0.0,0.5,1.0
117
+ 115,model.layers.16.self_attn.o_proj,tx.16.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.22900390625,0.474609375,0.9013671875,0.5,0.03125,0.4921875,0.9765625,0.5,0.0,0.5,1.0
118
+ 116,model.layers.16.mlp.gate_proj,tx.16.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42578125,0.5009765625,0.55615234375,0.5,0.0078125,0.5078125,0.78125,0.5,0.0,0.5,1.0
119
+ 117,model.layers.16.mlp.up_proj,tx.16.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43310546875,0.50048828125,0.556640625,0.5,0.0234375,0.5078125,0.796875,0.5,0.0,0.5,1.0
120
+ 118,model.layers.16.mlp.down_proj,tx.16.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.38720703125,0.50048828125,0.59375,0.5,0.015625,0.5078125,0.859375,0.5,0.0,0.5,1.0
121
+ 119,model.layers.17.self_attn.q_proj,tx.17.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.4072265625,0.50146484375,0.5703125,0.5,0.015625,0.515625,0.8203125,0.5,0.0,0.5,1.0
122
+ 120,model.layers.17.self_attn.k_proj,tx.17.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.408203125,0.50048828125,0.58447265625,0.5,0.0,0.5234375,0.921875,0.5,0.0,0.5,1.0
123
+ 121,model.layers.17.self_attn.v_proj,tx.17.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.41943359375,0.5,0.576171875,0.5,0.046875,0.515625,1.0,0.5,0.0625,0.5,1.0
124
+ 122,model.layers.17.self_attn.o_proj,tx.17.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.24609375,0.48046875,0.93310546875,0.5,0.0234375,0.484375,0.9921875,0.5,0.0,0.5,1.0
125
+ 123,model.layers.17.mlp.gate_proj,tx.17.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43017578125,0.50048828125,0.556640625,0.5,0.015625,0.5078125,0.828125,0.5,0.0,0.5,1.0
126
+ 124,model.layers.17.mlp.up_proj,tx.17.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43359375,0.50048828125,0.55517578125,0.5,0.0390625,0.5078125,0.796875,0.5,0.0,0.5,1.0
127
+ 125,model.layers.17.mlp.down_proj,tx.17.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.3896484375,0.5,0.60009765625,0.5,0.046875,0.5078125,0.921875,0.5,0.0,0.5,1.0
128
+ 126,model.layers.18.self_attn.q_proj,tx.18.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.40234375,0.50146484375,0.576171875,0.5,0.015625,0.515625,0.921875,0.5,0.0,0.5,1.0
129
+ 127,model.layers.18.self_attn.k_proj,tx.18.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.41064453125,0.5009765625,0.58837890625,0.5,0.015625,0.5234375,0.953125,0.5,0.0625,0.5,1.0
130
+ 128,model.layers.18.self_attn.v_proj,tx.18.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.40478515625,0.50048828125,0.5859375,0.5,0.0234375,0.515625,1.0,0.5,0.0,0.5,1.0
131
+ 129,model.layers.18.self_attn.o_proj,tx.18.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.220703125,0.42236328125,0.9794921875,0.5,0.015625,0.4453125,1.0,0.5,0.0,0.4375,1.0
132
+ 130,model.layers.18.mlp.gate_proj,tx.18.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42724609375,0.50048828125,0.56103515625,0.5,0.0078125,0.5078125,0.828125,0.5,0.0,0.5,1.0
133
+ 131,model.layers.18.mlp.up_proj,tx.18.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43798828125,0.50048828125,0.5556640625,0.5,0.046875,0.5078125,0.859375,0.5,0.0,0.5,1.0
134
+ 132,model.layers.18.mlp.down_proj,tx.18.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.3974609375,0.50048828125,0.58837890625,0.5,0.0078125,0.5078125,0.9140625,0.5,0.0,0.5,1.0
135
+ 133,model.layers.19.self_attn.q_proj,tx.19.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.4111328125,0.5009765625,0.58251953125,0.5,0.0078125,0.515625,0.796875,0.5,0.0,0.5,1.0
136
+ 134,model.layers.19.self_attn.k_proj,tx.19.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.39306640625,0.5009765625,0.59814453125,0.5,0.0078125,0.5234375,0.90625,0.5,0.0,0.5,1.0
137
+ 135,model.layers.19.self_attn.v_proj,tx.19.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.40869140625,0.5009765625,0.5830078125,0.5,0.0234375,0.515625,1.0,0.5,0.0,0.5,1.0
138
+ 136,model.layers.19.self_attn.o_proj,tx.19.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.19287109375,0.4443359375,0.99658203125,0.5,0.015625,0.4453125,1.0,0.5,0.0,0.4375,1.0
139
+ 137,model.layers.19.mlp.gate_proj,tx.19.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42236328125,0.50048828125,0.5546875,0.5,0.0078125,0.5078125,0.8203125,0.5,0.0,0.5,1.0
140
+ 138,model.layers.19.mlp.up_proj,tx.19.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43701171875,0.50048828125,0.55859375,0.5,0.0234375,0.5078125,0.859375,0.5,0.0,0.5,1.0
141
+ 139,model.layers.19.mlp.down_proj,tx.19.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.3935546875,0.50048828125,0.59326171875,0.5,0.015625,0.515625,0.875,0.5,0.0,0.5,1.0
142
+ 140,model.layers.20.self_attn.q_proj,tx.20.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.40087890625,0.50146484375,0.5791015625,0.5,0.015625,0.515625,0.8125,0.5,0.0,0.5,1.0
143
+ 141,model.layers.20.self_attn.k_proj,tx.20.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.4072265625,0.50146484375,0.5869140625,0.5,0.0078125,0.5234375,0.9296875,0.5,0.0,0.5,1.0
144
+ 142,model.layers.20.self_attn.v_proj,tx.20.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.39990234375,0.5009765625,0.5927734375,0.5,0.0234375,0.515625,1.0,0.5,0.0,0.5,1.0
145
+ 143,model.layers.20.self_attn.o_proj,tx.20.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.2373046875,0.43212890625,0.99658203125,0.5,0.0234375,0.4375,1.0,0.5,0.0,0.4375,1.0
146
+ 144,model.layers.20.mlp.gate_proj,tx.20.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42431640625,0.50048828125,0.55517578125,0.5,0.0078125,0.5078125,0.796875,0.5,0.0,0.5,1.0
147
+ 145,model.layers.20.mlp.up_proj,tx.20.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.4345703125,0.50048828125,0.55322265625,0.5,0.0390625,0.5078125,0.875,0.5,0.0,0.5,1.0
148
+ 146,model.layers.20.mlp.down_proj,tx.20.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.40380859375,0.50048828125,0.6015625,0.5,0.015625,0.515625,0.8515625,0.5,0.0,0.5,1.0
149
+ 147,model.layers.21.self_attn.q_proj,tx.21.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.3955078125,0.50146484375,0.56640625,0.5,0.0078125,0.515625,0.7578125,0.5,0.0,0.5,1.0
150
+ 148,model.layers.21.self_attn.k_proj,tx.21.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.4072265625,0.50146484375,0.58056640625,0.5,0.0,0.5234375,0.828125,0.5,0.0,0.5,1.0
151
+ 149,model.layers.21.self_attn.v_proj,tx.21.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.400390625,0.50048828125,0.57080078125,0.5,0.0234375,0.515625,1.0,0.5,0.0,0.5,1.0
152
+ 150,model.layers.21.self_attn.o_proj,tx.21.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.2158203125,0.4755859375,0.99169921875,0.5,0.0078125,0.484375,1.0,0.5,0.0,0.5,1.0
153
+ 151,model.layers.21.mlp.gate_proj,tx.21.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43505859375,0.5009765625,0.5546875,0.5,0.0078125,0.5078125,0.8515625,0.5,0.0,0.5,1.0
154
+ 152,model.layers.21.mlp.up_proj,tx.21.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.431640625,0.50048828125,0.55517578125,0.5,0.0390625,0.5078125,0.875,0.5,0.0,0.5,1.0
155
+ 153,model.layers.21.mlp.down_proj,tx.21.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.3740234375,0.50048828125,0.60302734375,0.5,0.03125,0.515625,0.8515625,0.5,0.0,0.5,1.0
156
+ 154,model.layers.22.self_attn.q_proj,tx.22.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.39208984375,0.5009765625,0.57470703125,0.5,0.0078125,0.515625,0.75,0.5,0.0,0.5,1.0
157
+ 155,model.layers.22.self_attn.k_proj,tx.22.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.40234375,0.50146484375,0.5791015625,0.5,0.0078125,0.5234375,0.875,0.5,0.0,0.5,1.0
158
+ 156,model.layers.22.self_attn.v_proj,tx.22.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.4150390625,0.50048828125,0.5986328125,0.5,0.0234375,0.515625,1.0,0.5,0.0,0.5,1.0
159
+ 157,model.layers.22.self_attn.o_proj,tx.22.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.2275390625,0.453125,0.95361328125,0.5,0.0234375,0.4609375,1.0,0.5,0.0,0.4375,1.0
160
+ 158,model.layers.22.mlp.gate_proj,tx.22.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42431640625,0.5009765625,0.55224609375,0.5,0.0078125,0.5078125,0.8203125,0.5,0.0,0.5,1.0
161
+ 159,model.layers.22.mlp.up_proj,tx.22.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42626953125,0.50048828125,0.55126953125,0.5,0.0234375,0.5078125,0.8515625,0.5,0.0,0.5,1.0
162
+ 160,model.layers.22.mlp.down_proj,tx.22.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.3779296875,0.5,0.59521484375,0.5,0.015625,0.5234375,0.828125,0.5,0.0,0.5,1.0
163
+ 161,model.layers.23.self_attn.q_proj,tx.23.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.404296875,0.50146484375,0.58251953125,0.5,0.0,0.515625,0.7890625,0.5,0.0,0.5,1.0
164
+ 162,model.layers.23.self_attn.k_proj,tx.23.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.404296875,0.5009765625,0.57421875,0.5,0.0,0.5234375,0.890625,0.5,0.0,0.5,1.0
165
+ 163,model.layers.23.self_attn.v_proj,tx.23.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.40966796875,0.5009765625,0.56591796875,0.5,0.0390625,0.515625,1.0,0.5,0.0,0.5,1.0
166
+ 164,model.layers.23.self_attn.o_proj,tx.23.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.240234375,0.4619140625,0.8837890625,0.5,0.0078125,0.46875,0.984375,0.5,0.0,0.5,1.0
167
+ 165,model.layers.23.mlp.gate_proj,tx.23.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43359375,0.5009765625,0.5546875,0.5,0.0,0.5078125,0.8203125,0.5,0.0,0.5,1.0
168
+ 166,model.layers.23.mlp.up_proj,tx.23.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.4306640625,0.50048828125,0.5478515625,0.5,0.03125,0.5078125,0.859375,0.5,0.0,0.5,1.0
169
+ 167,model.layers.23.mlp.down_proj,tx.23.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.38525390625,0.50146484375,0.59765625,0.5,0.0078125,0.5234375,0.7890625,0.5,0.0,0.5,1.0
170
+ 168,model.layers.24.self_attn.q_proj,tx.24.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.3955078125,0.5009765625,0.57373046875,0.5,0.0078125,0.5234375,0.8046875,0.5,0.0,0.5,1.0
171
+ 169,model.layers.24.self_attn.k_proj,tx.24.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.400390625,0.50048828125,0.5849609375,0.5,0.0,0.5234375,0.8671875,0.5,0.0,0.5,1.0
172
+ 170,model.layers.24.self_attn.v_proj,tx.24.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.40380859375,0.5009765625,0.5927734375,0.5,0.0234375,0.515625,1.0,0.5,0.0,0.5,1.0
173
+ 171,model.layers.24.self_attn.o_proj,tx.24.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.18896484375,0.427734375,0.96240234375,0.5,0.0078125,0.4375,1.0,0.5,0.0,0.4375,1.0
174
+ 172,model.layers.24.mlp.gate_proj,tx.24.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.4296875,0.5009765625,0.55810546875,0.5,0.0,0.5078125,0.84375,0.5,0.0,0.5,1.0
175
+ 173,model.layers.24.mlp.up_proj,tx.24.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.4365234375,0.50048828125,0.5546875,0.5,0.03125,0.5,0.875,0.5,0.0,0.5,1.0
176
+ 174,model.layers.24.mlp.down_proj,tx.24.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.38623046875,0.50146484375,0.59033203125,0.5,0.0,0.53125,0.7890625,0.5,0.0,0.5,1.0
177
+ 175,model.layers.25.self_attn.q_proj,tx.25.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.40234375,0.50048828125,0.57421875,0.5,0.0,0.5234375,0.75,0.5,0.0,0.5,1.0
178
+ 176,model.layers.25.self_attn.k_proj,tx.25.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.4091796875,0.5009765625,0.60302734375,0.5,0.0,0.5234375,0.859375,0.5,0.0,0.5,1.0
179
+ 177,model.layers.25.self_attn.v_proj,tx.25.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.39306640625,0.50048828125,0.58056640625,0.5,0.015625,0.515625,1.0,0.5,0.0,0.5,1.0
180
+ 178,model.layers.25.self_attn.o_proj,tx.25.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.1904296875,0.44921875,0.998046875,0.5,0.0234375,0.4453125,1.0,0.5,0.0,0.4375,1.0
181
+ 179,model.layers.25.mlp.gate_proj,tx.25.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.4326171875,0.5009765625,0.55224609375,0.5,0.0078125,0.5078125,0.8359375,0.5,0.0,0.5,1.0
182
+ 180,model.layers.25.mlp.up_proj,tx.25.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.4365234375,0.50048828125,0.5556640625,0.5,0.03125,0.5078125,0.859375,0.5,0.0,0.5,1.0
183
+ 181,model.layers.25.mlp.down_proj,tx.25.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.37060546875,0.5009765625,0.607421875,0.5,0.015625,0.53125,0.8046875,0.5,0.0,0.5,1.0
184
+ 182,model.layers.26.self_attn.q_proj,tx.26.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.40478515625,0.5009765625,0.5791015625,0.5,0.0,0.5234375,0.7890625,0.5,0.0,0.5,1.0
185
+ 183,model.layers.26.self_attn.k_proj,tx.26.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.39453125,0.50146484375,0.59716796875,0.5,0.0,0.5234375,0.8671875,0.5,0.0,0.5,1.0
186
+ 184,model.layers.26.self_attn.v_proj,tx.26.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.38916015625,0.50048828125,0.58349609375,0.5,0.015625,0.5078125,1.0,0.5,0.0,0.5,1.0
187
+ 185,model.layers.26.self_attn.o_proj,tx.26.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.2021484375,0.45849609375,0.978515625,0.5,0.015625,0.4453125,1.0,0.5,0.0,0.4375,1.0
188
+ 186,model.layers.26.mlp.gate_proj,tx.26.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43701171875,0.5009765625,0.55126953125,0.5,0.0078125,0.5078125,0.84375,0.5,0.0,0.5,1.0
189
+ 187,model.layers.26.mlp.up_proj,tx.26.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43310546875,0.50048828125,0.55078125,0.5,0.03125,0.5078125,0.8671875,0.5,0.0,0.5,1.0
190
+ 188,model.layers.26.mlp.down_proj,tx.26.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.35888671875,0.50146484375,0.60205078125,0.5,0.015625,0.53125,0.84375,0.5,0.0,0.5,1.0
191
+ 189,model.layers.27.self_attn.q_proj,tx.27.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.40771484375,0.5009765625,0.5927734375,0.5,0.0,0.5234375,1.0,0.5,0.0,0.5,1.0
192
+ 190,model.layers.27.self_attn.k_proj,tx.27.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.39794921875,0.50146484375,0.599609375,0.5,0.0,0.5234375,1.0,0.5,0.0,0.5,1.0
193
+ 191,model.layers.27.self_attn.v_proj,tx.27.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.3955078125,0.50146484375,0.59130859375,0.5,0.0234375,0.515625,1.0,0.5,0.0,0.5,1.0
194
+ 192,model.layers.27.self_attn.o_proj,tx.27.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.1708984375,0.470703125,0.96044921875,0.5,0.0078125,0.4609375,1.0,0.5,0.0,0.4375,1.0
195
+ 193,model.layers.27.mlp.gate_proj,tx.27.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.431640625,0.5009765625,0.55419921875,0.5,0.0078125,0.5078125,0.8125,0.5,0.0,0.5,1.0
196
+ 194,model.layers.27.mlp.up_proj,tx.27.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.4306640625,0.5009765625,0.55224609375,0.5,0.0390625,0.5078125,0.84375,0.5,0.0,0.5,1.0
197
+ 195,model.layers.27.mlp.down_proj,tx.27.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.37255859375,0.5009765625,0.60595703125,0.5,0.0,0.5390625,0.7890625,0.5,0.0,0.5,1.0
198
+ 196,model.layers.28.self_attn.q_proj,tx.28.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.400390625,0.50146484375,0.5791015625,0.5,0.0,0.5234375,0.7578125,0.5,0.0,0.5,1.0
199
+ 197,model.layers.28.self_attn.k_proj,tx.28.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.39990234375,0.50146484375,0.58642578125,0.5,0.0,0.5234375,0.8828125,0.5,0.0,0.5,1.0
200
+ 198,model.layers.28.self_attn.v_proj,tx.28.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.41015625,0.50048828125,0.57177734375,0.5,0.0234375,0.5078125,1.0,0.5,0.0,0.5,1.0
201
+ 199,model.layers.28.self_attn.o_proj,tx.28.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.17236328125,0.48193359375,0.95068359375,0.5,0.0234375,0.484375,1.0,0.5,0.0,0.5,1.0
202
+ 200,model.layers.28.mlp.gate_proj,tx.28.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42431640625,0.5009765625,0.5556640625,0.5,0.0078125,0.5078125,0.8203125,0.5,0.0,0.5,1.0
203
+ 201,model.layers.28.mlp.up_proj,tx.28.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43115234375,0.5009765625,0.5517578125,0.5,0.0234375,0.5078125,0.828125,0.5,0.0,0.5,1.0
204
+ 202,model.layers.28.mlp.down_proj,tx.28.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.3603515625,0.501953125,0.6064453125,0.5,0.0234375,0.5390625,0.8125,0.5,0.0,0.5,1.0
205
+ 203,model.layers.29.self_attn.q_proj,tx.29.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.40771484375,0.5009765625,0.58349609375,0.5,0.0,0.515625,1.0,0.5,0.0,0.5,1.0
206
+ 204,model.layers.29.self_attn.k_proj,tx.29.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.40185546875,0.501953125,0.5791015625,0.5,0.0,0.5234375,1.0,0.5,0.0,0.5,1.0
207
+ 205,model.layers.29.self_attn.v_proj,tx.29.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.41748046875,0.50048828125,0.572265625,0.5,0.015625,0.5078125,1.0,0.5,0.0,0.5,1.0
208
+ 206,model.layers.29.self_attn.o_proj,tx.29.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.1025390625,0.5009765625,0.884765625,0.5,0.0078125,0.5,0.9921875,0.5,0.0,0.5,1.0
209
+ 207,model.layers.29.mlp.gate_proj,tx.29.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42626953125,0.5009765625,0.55419921875,0.5,0.0078125,0.5078125,0.84375,0.5,0.0,0.5,1.0
210
+ 208,model.layers.29.mlp.up_proj,tx.29.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.42822265625,0.5009765625,0.5576171875,0.5,0.015625,0.5078125,0.828125,0.5,0.0,0.5,1.0
211
+ 209,model.layers.29.mlp.down_proj,tx.29.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.37939453125,0.50244140625,0.6044921875,0.5,0.0,0.5390625,0.8125,0.5,0.0,0.5,1.0
212
+ 210,model.layers.30.self_attn.q_proj,tx.30.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.37890625,0.5009765625,0.57568359375,0.5,0.0,0.5234375,0.765625,0.5,0.0,0.5,1.0
213
+ 211,model.layers.30.self_attn.k_proj,tx.30.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.3935546875,0.50048828125,0.5908203125,0.5,0.0,0.53125,0.890625,0.5,0.0,0.5,1.0
214
+ 212,model.layers.30.self_attn.v_proj,tx.30.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.3974609375,0.50048828125,0.5908203125,0.5,0.015625,0.5078125,1.0,0.5,0.0,0.5,1.0
215
+ 213,model.layers.30.self_attn.o_proj,tx.30.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.1689453125,0.42919921875,0.9931640625,0.5,0.046875,0.4453125,1.0,0.5,0.0,0.5,1.0
216
+ 214,model.layers.30.mlp.gate_proj,tx.30.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43115234375,0.5009765625,0.56494140625,0.5,0.0078125,0.5078125,0.890625,0.5,0.0,0.5,1.0
217
+ 215,model.layers.30.mlp.up_proj,tx.30.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.4287109375,0.5009765625,0.56396484375,0.5,0.0234375,0.5078125,0.8828125,0.5,0.0,0.5,1.0
218
+ 216,model.layers.30.mlp.down_proj,tx.30.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.353515625,0.501953125,0.61962890625,0.5,0.0,0.546875,0.9140625,0.5,0.0,0.5,1.0
219
+ 217,model.layers.31.self_attn.q_proj,tx.31.attn.q,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.3935546875,0.5009765625,0.587890625,0.5,0.0,0.515625,0.9765625,0.5,0.0,0.5,1.0
220
+ 218,model.layers.31.self_attn.k_proj,tx.31.attn.k,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.38916015625,0.5009765625,0.57421875,0.5,0.0078125,0.5234375,0.96875,0.5,0.0,0.5,1.0
221
+ 219,model.layers.31.self_attn.v_proj,tx.31.attn.v,Linear,weight,"[1024, 4096]",4194304,2097152,0.5,"(128, 16)",8 x 256,2048,0.5,0.38037109375,0.50048828125,0.578125,0.5,0.0078125,0.515625,1.0,0.5,0.0,0.5,1.0
222
+ 220,model.layers.31.self_attn.o_proj,tx.31.attn.o,Linear,weight,"[4096, 4096]",16777216,8388608,0.5,"(128, 16)",32 x 256,8192,0.5,0.0751953125,0.45263671875,0.986328125,0.5,0.0,0.46875,1.0,0.5,0.0,0.5,1.0
223
+ 221,model.layers.31.mlp.gate_proj,tx.31.mlp.gate,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.431640625,0.50048828125,0.5712890625,0.5,0.046875,0.5078125,0.921875,0.5,0.0,0.5,1.0
224
+ 222,model.layers.31.mlp.up_proj,tx.31.mlp.up,Linear,weight,"[14336, 4096]",58720256,29360128,0.5,"(128, 16)",112 x 256,28672,0.5,0.43212890625,0.50048828125,0.5576171875,0.5,0.0546875,0.5078125,0.890625,0.5,0.0,0.5,1.0
225
+ 223,model.layers.31.mlp.down_proj,tx.31.mlp.down,Linear,weight,"[4096, 14336]",58720256,29360128,0.5,"(128, 16)",32 x 896,28672,0.5,0.32275390625,0.5,0.66162109375,0.5,0.0,0.5703125,1.0,0.5,0.0,0.5,1.0
226
+ 224,lm_head,lm_head,Linear,weight,"[128256, 4096]",525336576,525336576,0.0,"(128, 16)",1002 x 256,256512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sparsity_report_Meta-Llama-3.1-8B-wanda-unstructured-0.6.csv ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ row,layer_id,short_id,layer_type,param_type,shape,nparam,nnz,sparsity,tile_shape,n_tile,n_tile_total,tile_avg,tile_min,tile_med,tile_max,col_avg,col_min,col_med,col_max,row_avg,row_min,row_med,row_max
2
+ 0,model.layers.0.self_attn.q_proj,tx.0.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.240234375,0.60009765625,0.87744140625,0.599853515625,0.0,0.6875,1.0,0.599853515625,0.0625,0.625,1.0
3
+ 1,model.layers.0.self_attn.k_proj,tx.0.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.279296875,0.59912109375,0.87158203125,0.599853515625,0.0,0.6796875,1.0,0.599853515625,0.0625,0.625,1.0
4
+ 2,model.layers.0.self_attn.v_proj,tx.0.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.31640625,0.59912109375,0.84521484375,0.599853515625,0.0,0.5546875,1.0,0.599853515625,0.0625,0.625,1.0
5
+ 3,model.layers.0.self_attn.o_proj,tx.0.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.10302734375,0.611328125,1.0,0.599853515625,0.0078125,0.59375,1.0,0.599853515625,0.0,0.625,1.0
6
+ 4,model.layers.0.mlp.gate_proj,tx.0.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51708984375,0.60009765625,0.6728515625,0.599853515625,0.078125,0.609375,1.0,0.599853515625,0.0,0.625,1.0
7
+ 5,model.layers.0.mlp.up_proj,tx.0.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.53125,0.599609375,0.67724609375,0.599853515625,0.1015625,0.609375,1.0,0.599853515625,0.0,0.625,1.0
8
+ 6,model.layers.0.mlp.down_proj,tx.0.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4208984375,0.60107421875,0.7314453125,0.5999581813812256,0.046875,0.6171875,0.984375,0.5999581813812256,0.0,0.625,1.0
9
+ 7,model.layers.1.self_attn.q_proj,tx.1.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.3779296875,0.60205078125,0.79345703125,0.599853515625,0.0,0.6640625,1.0,0.599853515625,0.0625,0.625,1.0
10
+ 8,model.layers.1.self_attn.k_proj,tx.1.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.37060546875,0.60107421875,0.77734375,0.599853515625,0.0,0.6640625,1.0,0.599853515625,0.0625,0.625,1.0
11
+ 9,model.layers.1.self_attn.v_proj,tx.1.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.39208984375,0.60205078125,0.7333984375,0.599853515625,0.0234375,0.65625,1.0,0.599853515625,0.125,0.625,1.0
12
+ 10,model.layers.1.self_attn.o_proj,tx.1.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.28466796875,0.62353515625,0.98974609375,0.599853515625,0.0390625,0.5859375,1.0,0.599853515625,0.0,0.625,1.0
13
+ 11,model.layers.1.mlp.gate_proj,tx.1.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52783203125,0.60009765625,0.677734375,0.599853515625,0.1640625,0.609375,1.0,0.599853515625,0.0,0.625,1.0
14
+ 12,model.layers.1.mlp.up_proj,tx.1.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52734375,0.599609375,0.66845703125,0.599853515625,0.140625,0.609375,1.0,0.599853515625,0.0,0.625,1.0
15
+ 13,model.layers.1.mlp.down_proj,tx.1.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.48828125,0.6015625,0.7109375,0.5999581813812256,0.0,0.6328125,0.9375,0.5999581813812256,0.0,0.625,1.0
16
+ 14,model.layers.2.self_attn.q_proj,tx.2.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.45166015625,0.6025390625,0.7451171875,0.599853515625,0.0,0.6484375,1.0,0.599853515625,0.0,0.625,1.0
17
+ 15,model.layers.2.self_attn.k_proj,tx.2.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.427734375,0.60302734375,0.7607421875,0.599853515625,0.0078125,0.65625,1.0,0.599853515625,0.0625,0.625,1.0
18
+ 16,model.layers.2.self_attn.v_proj,tx.2.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4853515625,0.6025390625,0.7099609375,0.599853515625,0.0546875,0.640625,1.0,0.599853515625,0.0625,0.625,1.0
19
+ 17,model.layers.2.self_attn.o_proj,tx.2.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.2900390625,0.54296875,0.98779296875,0.599853515625,0.0078125,0.5625,1.0,0.599853515625,0.0,0.5625,1.0
20
+ 18,model.layers.2.mlp.gate_proj,tx.2.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52880859375,0.60009765625,0.66455078125,0.599853515625,0.1328125,0.6015625,1.0,0.599853515625,0.0,0.625,1.0
21
+ 19,model.layers.2.mlp.up_proj,tx.2.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.53076171875,0.599609375,0.6650390625,0.599853515625,0.1796875,0.6015625,1.0,0.599853515625,0.0625,0.625,1.0
22
+ 20,model.layers.2.mlp.down_proj,tx.2.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.474609375,0.60107421875,0.73974609375,0.5999581813812256,0.015625,0.625,0.9140625,0.5999581813812256,0.0625,0.625,1.0
23
+ 21,model.layers.3.self_attn.q_proj,tx.3.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.48876953125,0.60107421875,0.705078125,0.599853515625,0.0234375,0.6328125,1.0,0.599853515625,0.0,0.625,1.0
24
+ 22,model.layers.3.self_attn.k_proj,tx.3.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.46923828125,0.60205078125,0.69921875,0.599853515625,0.015625,0.640625,1.0,0.599853515625,0.0625,0.625,1.0
25
+ 23,model.layers.3.self_attn.v_proj,tx.3.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49658203125,0.60107421875,0.6923828125,0.599853515625,0.0625,0.625,1.0,0.599853515625,0.0625,0.625,1.0
26
+ 24,model.layers.3.self_attn.o_proj,tx.3.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.32666015625,0.58544921875,0.96142578125,0.599853515625,0.046875,0.5859375,1.0,0.599853515625,0.0,0.625,1.0
27
+ 25,model.layers.3.mlp.gate_proj,tx.3.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.53466796875,0.60009765625,0.66748046875,0.599853515625,0.109375,0.609375,1.0,0.599853515625,0.0,0.625,1.0
28
+ 26,model.layers.3.mlp.up_proj,tx.3.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.54150390625,0.599609375,0.6611328125,0.599853515625,0.1953125,0.6015625,1.0,0.599853515625,0.0,0.625,1.0
29
+ 27,model.layers.3.mlp.down_proj,tx.3.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4814453125,0.6005859375,0.71240234375,0.5999581813812256,0.03125,0.6171875,0.9375,0.5999581813812256,0.0,0.625,1.0
30
+ 28,model.layers.4.self_attn.q_proj,tx.4.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.4619140625,0.60107421875,0.71728515625,0.599853515625,0.0078125,0.640625,1.0,0.599853515625,0.0625,0.625,1.0
31
+ 29,model.layers.4.self_attn.k_proj,tx.4.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.46142578125,0.60107421875,0.71923828125,0.599853515625,0.0078125,0.640625,1.0,0.599853515625,0.0625,0.625,1.0
32
+ 30,model.layers.4.self_attn.v_proj,tx.4.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49609375,0.6015625,0.71923828125,0.599853515625,0.0546875,0.625,1.0,0.599853515625,0.0625,0.625,1.0
33
+ 31,model.layers.4.self_attn.o_proj,tx.4.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.27587890625,0.56298828125,0.9892578125,0.599853515625,0.0234375,0.5703125,1.0,0.599853515625,0.0,0.5625,1.0
34
+ 32,model.layers.4.mlp.gate_proj,tx.4.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52001953125,0.6005859375,0.66064453125,0.599853515625,0.078125,0.609375,1.0,0.599853515625,0.0625,0.625,1.0
35
+ 33,model.layers.4.mlp.up_proj,tx.4.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.53759765625,0.6005859375,0.66357421875,0.599853515625,0.1875,0.609375,1.0,0.599853515625,0.0,0.625,1.0
36
+ 34,model.layers.4.mlp.down_proj,tx.4.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.48779296875,0.60107421875,0.70263671875,0.5999581813812256,0.0546875,0.609375,0.984375,0.5999581813812256,0.0,0.625,1.0
37
+ 35,model.layers.5.self_attn.q_proj,tx.5.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.44970703125,0.6005859375,0.7177734375,0.599853515625,0.0078125,0.6328125,0.96875,0.599853515625,0.0625,0.625,1.0
38
+ 36,model.layers.5.self_attn.k_proj,tx.5.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.455078125,0.6005859375,0.708984375,0.599853515625,0.015625,0.640625,0.9296875,0.599853515625,0.0625,0.625,1.0
39
+ 37,model.layers.5.self_attn.v_proj,tx.5.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49755859375,0.6005859375,0.70751953125,0.599853515625,0.0625,0.625,1.0,0.599853515625,0.0625,0.625,1.0
40
+ 38,model.layers.5.self_attn.o_proj,tx.5.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.25390625,0.5859375,0.9423828125,0.599853515625,0.0078125,0.609375,1.0,0.599853515625,0.0,0.625,1.0
41
+ 39,model.layers.5.mlp.gate_proj,tx.5.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51953125,0.60107421875,0.658203125,0.599853515625,0.046875,0.609375,1.0,0.599853515625,0.0625,0.625,1.0
42
+ 40,model.layers.5.mlp.up_proj,tx.5.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52392578125,0.6005859375,0.6630859375,0.599853515625,0.140625,0.6015625,1.0,0.599853515625,0.0,0.625,1.0
43
+ 41,model.layers.5.mlp.down_proj,tx.5.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.48828125,0.6005859375,0.7041015625,0.5999581813812256,0.0,0.609375,0.953125,0.5999581813812256,0.0,0.625,1.0
44
+ 42,model.layers.6.self_attn.q_proj,tx.6.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.47412109375,0.6005859375,0.7080078125,0.599853515625,0.015625,0.6328125,0.9609375,0.599853515625,0.0625,0.625,1.0
45
+ 43,model.layers.6.self_attn.k_proj,tx.6.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.46484375,0.6005859375,0.705078125,0.599853515625,0.015625,0.6328125,0.953125,0.599853515625,0.0625,0.625,1.0
46
+ 44,model.layers.6.self_attn.v_proj,tx.6.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.498046875,0.6005859375,0.685546875,0.599853515625,0.0703125,0.6171875,1.0,0.599853515625,0.0625,0.625,1.0
47
+ 45,model.layers.6.self_attn.o_proj,tx.6.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.3115234375,0.60302734375,0.94775390625,0.599853515625,0.0234375,0.6015625,1.0,0.599853515625,0.0,0.625,1.0
48
+ 46,model.layers.6.mlp.gate_proj,tx.6.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51416015625,0.60107421875,0.658203125,0.599853515625,0.0625,0.609375,1.0,0.599853515625,0.0,0.625,1.0
49
+ 47,model.layers.6.mlp.up_proj,tx.6.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51318359375,0.6005859375,0.64892578125,0.599853515625,0.1171875,0.6015625,1.0,0.599853515625,0.0,0.625,1.0
50
+ 48,model.layers.6.mlp.down_proj,tx.6.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.48291015625,0.60009765625,0.70654296875,0.5999581813812256,0.0078125,0.609375,0.984375,0.5999581813812256,0.0,0.625,1.0
51
+ 49,model.layers.7.self_attn.q_proj,tx.7.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.48291015625,0.60107421875,0.68408203125,0.599853515625,0.0234375,0.625,0.9765625,0.599853515625,0.0625,0.625,1.0
52
+ 50,model.layers.7.self_attn.k_proj,tx.7.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49072265625,0.60205078125,0.6845703125,0.599853515625,0.0078125,0.625,0.8984375,0.599853515625,0.0625,0.625,1.0
53
+ 51,model.layers.7.self_attn.v_proj,tx.7.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.50537109375,0.6005859375,0.67626953125,0.599853515625,0.0703125,0.609375,0.9921875,0.599853515625,0.0625,0.625,1.0
54
+ 52,model.layers.7.self_attn.o_proj,tx.7.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.41748046875,0.5927734375,0.798828125,0.599853515625,0.0234375,0.6171875,0.9921875,0.599853515625,0.0,0.625,1.0
55
+ 53,model.layers.7.mlp.gate_proj,tx.7.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51904296875,0.6015625,0.64892578125,0.599853515625,0.046875,0.609375,0.8203125,0.599853515625,0.0,0.625,1.0
56
+ 54,model.layers.7.mlp.up_proj,tx.7.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.529296875,0.6005859375,0.650390625,0.599853515625,0.140625,0.6015625,0.8125,0.599853515625,0.0,0.625,1.0
57
+ 55,model.layers.7.mlp.down_proj,tx.7.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.49072265625,0.60107421875,0.693359375,0.5999581813812256,0.0078125,0.609375,0.9921875,0.5999581813812256,0.0,0.625,1.0
58
+ 56,model.layers.8.self_attn.q_proj,tx.8.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.48193359375,0.6005859375,0.70654296875,0.599853515625,0.015625,0.6171875,0.9296875,0.599853515625,0.0,0.625,1.0
59
+ 57,model.layers.8.self_attn.k_proj,tx.8.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.48193359375,0.60107421875,0.68701171875,0.599853515625,0.0,0.625,0.953125,0.599853515625,0.0625,0.625,1.0
60
+ 58,model.layers.8.self_attn.v_proj,tx.8.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.50537109375,0.6005859375,0.666015625,0.599853515625,0.0390625,0.609375,0.9921875,0.599853515625,0.0,0.625,1.0
61
+ 59,model.layers.8.self_attn.o_proj,tx.8.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.31689453125,0.6064453125,0.83056640625,0.599853515625,0.03125,0.609375,0.9609375,0.599853515625,0.0,0.625,1.0
62
+ 60,model.layers.8.mlp.gate_proj,tx.8.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5166015625,0.6015625,0.6494140625,0.599853515625,0.03125,0.609375,1.0,0.599853515625,0.0,0.625,1.0
63
+ 61,model.layers.8.mlp.up_proj,tx.8.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5205078125,0.60107421875,0.65234375,0.599853515625,0.1015625,0.6015625,1.0,0.599853515625,0.0,0.625,1.0
64
+ 62,model.layers.8.mlp.down_proj,tx.8.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4775390625,0.6005859375,0.705078125,0.5999581813812256,0.0390625,0.609375,0.953125,0.5999581813812256,0.0,0.625,1.0
65
+ 63,model.layers.9.self_attn.q_proj,tx.9.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.4814453125,0.60107421875,0.6943359375,0.599853515625,0.015625,0.6171875,0.9375,0.599853515625,0.0625,0.625,1.0
66
+ 64,model.layers.9.self_attn.k_proj,tx.9.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.47607421875,0.6005859375,0.68701171875,0.599853515625,0.0078125,0.6171875,0.953125,0.599853515625,0.0625,0.625,1.0
67
+ 65,model.layers.9.self_attn.v_proj,tx.9.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.5146484375,0.60009765625,0.67529296875,0.599853515625,0.0703125,0.6015625,1.0,0.599853515625,0.125,0.625,1.0
68
+ 66,model.layers.9.self_attn.o_proj,tx.9.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.337890625,0.60888671875,0.8427734375,0.599853515625,0.0625,0.6171875,0.9453125,0.599853515625,0.0,0.625,1.0
69
+ 67,model.layers.9.mlp.gate_proj,tx.9.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51318359375,0.6015625,0.65576171875,0.599853515625,0.0234375,0.609375,0.8515625,0.599853515625,0.0,0.625,1.0
70
+ 68,model.layers.9.mlp.up_proj,tx.9.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51953125,0.60107421875,0.65185546875,0.599853515625,0.0859375,0.609375,0.8359375,0.599853515625,0.0625,0.625,1.0
71
+ 69,model.layers.9.mlp.down_proj,tx.9.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.47705078125,0.60009765625,0.70703125,0.5999581813812256,0.0078125,0.609375,0.9609375,0.5999581813812256,0.0,0.625,1.0
72
+ 70,model.layers.10.self_attn.q_proj,tx.10.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.490234375,0.6005859375,0.6982421875,0.599853515625,0.015625,0.6171875,0.953125,0.599853515625,0.0625,0.625,1.0
73
+ 71,model.layers.10.self_attn.k_proj,tx.10.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4755859375,0.6005859375,0.68701171875,0.599853515625,0.0,0.625,0.9453125,0.599853515625,0.0625,0.625,1.0
74
+ 72,model.layers.10.self_attn.v_proj,tx.10.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49951171875,0.60009765625,0.6640625,0.599853515625,0.0859375,0.609375,0.9921875,0.599853515625,0.0625,0.625,1.0
75
+ 73,model.layers.10.self_attn.o_proj,tx.10.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.2880859375,0.6279296875,0.84130859375,0.599853515625,0.015625,0.625,0.96875,0.599853515625,0.0,0.625,1.0
76
+ 74,model.layers.10.mlp.gate_proj,tx.10.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52001953125,0.6015625,0.65625,0.599853515625,0.03125,0.609375,0.8359375,0.599853515625,0.0,0.625,1.0
77
+ 75,model.layers.10.mlp.up_proj,tx.10.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.521484375,0.6005859375,0.65576171875,0.599853515625,0.1171875,0.6015625,0.828125,0.599853515625,0.0,0.625,1.0
78
+ 76,model.layers.10.mlp.down_proj,tx.10.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.47412109375,0.6005859375,0.69921875,0.5999581813812256,0.0078125,0.6171875,0.953125,0.5999581813812256,0.0,0.625,1.0
79
+ 77,model.layers.11.self_attn.q_proj,tx.11.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.48046875,0.6005859375,0.673828125,0.599853515625,0.015625,0.609375,0.953125,0.599853515625,0.0625,0.625,1.0
80
+ 78,model.layers.11.self_attn.k_proj,tx.11.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4892578125,0.60107421875,0.67529296875,0.599853515625,0.015625,0.6171875,0.9765625,0.599853515625,0.0625,0.625,1.0
81
+ 79,model.layers.11.self_attn.v_proj,tx.11.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.50439453125,0.60009765625,0.67578125,0.599853515625,0.1171875,0.609375,1.0,0.599853515625,0.0625,0.625,1.0
82
+ 80,model.layers.11.self_attn.o_proj,tx.11.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.3662109375,0.59521484375,0.88818359375,0.599853515625,0.03125,0.6171875,0.984375,0.599853515625,0.0,0.625,1.0
83
+ 81,model.layers.11.mlp.gate_proj,tx.11.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.517578125,0.60107421875,0.658203125,0.599853515625,0.015625,0.609375,0.828125,0.599853515625,0.0,0.625,1.0
84
+ 82,model.layers.11.mlp.up_proj,tx.11.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52880859375,0.6005859375,0.65576171875,0.599853515625,0.109375,0.609375,0.8515625,0.599853515625,0.0,0.625,1.0
85
+ 83,model.layers.11.mlp.down_proj,tx.11.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.47607421875,0.60009765625,0.72412109375,0.5999581813812256,0.0,0.6171875,0.96875,0.5999581813812256,0.0,0.625,1.0
86
+ 84,model.layers.12.self_attn.q_proj,tx.12.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.4853515625,0.6015625,0.68701171875,0.599853515625,0.015625,0.609375,0.953125,0.599853515625,0.0,0.625,1.0
87
+ 85,model.layers.12.self_attn.k_proj,tx.12.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.47900390625,0.6015625,0.6767578125,0.599853515625,0.0234375,0.6171875,0.984375,0.599853515625,0.0625,0.625,1.0
88
+ 86,model.layers.12.self_attn.v_proj,tx.12.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49609375,0.60009765625,0.66796875,0.599853515625,0.0703125,0.6015625,0.9921875,0.599853515625,0.0625,0.625,1.0
89
+ 87,model.layers.12.self_attn.o_proj,tx.12.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.35009765625,0.60498046875,0.8291015625,0.599853515625,0.046875,0.6171875,0.9765625,0.599853515625,0.0,0.625,1.0
90
+ 88,model.layers.12.mlp.gate_proj,tx.12.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5107421875,0.6015625,0.65869140625,0.599853515625,0.0390625,0.609375,0.828125,0.599853515625,0.0625,0.625,1.0
91
+ 89,model.layers.12.mlp.up_proj,tx.12.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51708984375,0.6005859375,0.65576171875,0.599853515625,0.09375,0.609375,0.8203125,0.599853515625,0.0,0.625,1.0
92
+ 90,model.layers.12.mlp.down_proj,tx.12.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4736328125,0.6015625,0.728515625,0.5999581813812256,0.015625,0.6171875,0.984375,0.5999581813812256,0.0625,0.625,1.0
93
+ 91,model.layers.13.self_attn.q_proj,tx.13.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.50244140625,0.60009765625,0.68115234375,0.599853515625,0.0078125,0.609375,0.984375,0.599853515625,0.0625,0.625,1.0
94
+ 92,model.layers.13.self_attn.k_proj,tx.13.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49755859375,0.6015625,0.6748046875,0.599853515625,0.0234375,0.6171875,0.9921875,0.599853515625,0.0625,0.625,1.0
95
+ 93,model.layers.13.self_attn.v_proj,tx.13.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.52587890625,0.60009765625,0.6708984375,0.599853515625,0.1015625,0.609375,0.9921875,0.599853515625,0.0,0.625,1.0
96
+ 94,model.layers.13.self_attn.o_proj,tx.13.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.33837890625,0.6083984375,0.80615234375,0.599853515625,0.046875,0.625,0.9296875,0.599853515625,0.0,0.625,1.0
97
+ 95,model.layers.13.mlp.gate_proj,tx.13.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52001953125,0.6015625,0.6650390625,0.599853515625,0.03125,0.609375,0.8125,0.599853515625,0.0625,0.625,1.0
98
+ 96,model.layers.13.mlp.up_proj,tx.13.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.53076171875,0.6005859375,0.65478515625,0.599853515625,0.109375,0.609375,0.8203125,0.599853515625,0.0625,0.625,1.0
99
+ 97,model.layers.13.mlp.down_proj,tx.13.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.474609375,0.60107421875,0.72216796875,0.5999581813812256,0.0,0.609375,0.9765625,0.5999581813812256,0.0625,0.625,1.0
100
+ 98,model.layers.14.self_attn.q_proj,tx.14.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.49169921875,0.60107421875,0.68505859375,0.599853515625,0.015625,0.6171875,0.984375,0.599853515625,0.0625,0.625,1.0
101
+ 99,model.layers.14.self_attn.k_proj,tx.14.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4931640625,0.6005859375,0.67333984375,0.599853515625,0.015625,0.6171875,0.984375,0.599853515625,0.0625,0.625,1.0
102
+ 100,model.layers.14.self_attn.v_proj,tx.14.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4736328125,0.60107421875,0.69775390625,0.599853515625,0.03125,0.6171875,0.9921875,0.599853515625,0.0625,0.625,1.0
103
+ 101,model.layers.14.self_attn.o_proj,tx.14.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.36328125,0.61328125,0.8388671875,0.599853515625,0.0234375,0.609375,0.96875,0.599853515625,0.0,0.625,1.0
104
+ 102,model.layers.14.mlp.gate_proj,tx.14.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5146484375,0.60107421875,0.66650390625,0.599853515625,0.03125,0.6171875,0.8359375,0.599853515625,0.0,0.625,1.0
105
+ 103,model.layers.14.mlp.up_proj,tx.14.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.525390625,0.6005859375,0.6591796875,0.599853515625,0.1015625,0.609375,0.8359375,0.599853515625,0.0,0.625,1.0
106
+ 104,model.layers.14.mlp.down_proj,tx.14.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4267578125,0.6015625,0.71484375,0.5999581813812256,0.0390625,0.6171875,0.9765625,0.5999581813812256,0.0,0.625,1.0
107
+ 105,model.layers.15.self_attn.q_proj,tx.15.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.49853515625,0.6015625,0.6767578125,0.599853515625,0.0078125,0.6171875,0.984375,0.599853515625,0.0625,0.625,1.0
108
+ 106,model.layers.15.self_attn.k_proj,tx.15.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.5009765625,0.6005859375,0.6708984375,0.599853515625,0.015625,0.625,0.984375,0.599853515625,0.0625,0.625,1.0
109
+ 107,model.layers.15.self_attn.v_proj,tx.15.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.48291015625,0.6015625,0.6767578125,0.599853515625,0.03125,0.6171875,1.0,0.599853515625,0.0625,0.625,1.0
110
+ 108,model.layers.15.self_attn.o_proj,tx.15.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.32373046875,0.58447265625,0.9599609375,0.599853515625,0.046875,0.6015625,1.0,0.599853515625,0.0,0.625,1.0
111
+ 109,model.layers.15.mlp.gate_proj,tx.15.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51171875,0.60107421875,0.6640625,0.599853515625,0.015625,0.6171875,0.84375,0.599853515625,0.0,0.625,1.0
112
+ 110,model.layers.15.mlp.up_proj,tx.15.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5244140625,0.6005859375,0.66259765625,0.599853515625,0.0703125,0.609375,0.84375,0.599853515625,0.0,0.625,1.0
113
+ 111,model.layers.15.mlp.down_proj,tx.15.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4580078125,0.60009765625,0.72412109375,0.5999581813812256,0.0390625,0.609375,0.9921875,0.5999581813812256,0.0,0.625,1.0
114
+ 112,model.layers.16.self_attn.q_proj,tx.16.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.4853515625,0.60205078125,0.6884765625,0.599853515625,0.015625,0.625,0.9609375,0.599853515625,0.0,0.625,1.0
115
+ 113,model.layers.16.self_attn.k_proj,tx.16.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.490234375,0.60205078125,0.68212890625,0.599853515625,0.0,0.625,0.96875,0.599853515625,0.0625,0.625,1.0
116
+ 114,model.layers.16.self_attn.v_proj,tx.16.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.46240234375,0.6015625,0.6962890625,0.599853515625,0.0234375,0.625,1.0,0.599853515625,0.0625,0.625,1.0
117
+ 115,model.layers.16.self_attn.o_proj,tx.16.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.2744140625,0.5703125,0.99462890625,0.599853515625,0.0390625,0.5859375,1.0,0.599853515625,0.0,0.5625,1.0
118
+ 116,model.layers.16.mlp.gate_proj,tx.16.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51416015625,0.60107421875,0.669921875,0.599853515625,0.0,0.6171875,0.8671875,0.599853515625,0.0,0.625,1.0
119
+ 117,model.layers.16.mlp.up_proj,tx.16.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51953125,0.6005859375,0.65869140625,0.599853515625,0.046875,0.609375,0.875,0.599853515625,0.0,0.625,1.0
120
+ 118,model.layers.16.mlp.down_proj,tx.16.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4609375,0.60107421875,0.72607421875,0.5999581813812256,0.015625,0.609375,0.9765625,0.5999581813812256,0.0625,0.625,1.0
121
+ 119,model.layers.17.self_attn.q_proj,tx.17.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.49853515625,0.6015625,0.67626953125,0.599853515625,0.015625,0.625,0.890625,0.599853515625,0.0625,0.625,1.0
122
+ 120,model.layers.17.self_attn.k_proj,tx.17.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49365234375,0.6015625,0.68115234375,0.599853515625,0.015625,0.6328125,0.953125,0.599853515625,0.0625,0.625,1.0
123
+ 121,model.layers.17.self_attn.v_proj,tx.17.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49560546875,0.6005859375,0.6728515625,0.599853515625,0.0859375,0.6171875,1.0,0.599853515625,0.0625,0.625,1.0
124
+ 122,model.layers.17.self_attn.o_proj,tx.17.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.31689453125,0.564453125,0.99560546875,0.599853515625,0.0625,0.578125,1.0,0.599853515625,0.0,0.5625,1.0
125
+ 123,model.layers.17.mlp.gate_proj,tx.17.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5126953125,0.6005859375,0.662109375,0.599853515625,0.0234375,0.6171875,0.875,0.599853515625,0.0,0.625,1.0
126
+ 124,model.layers.17.mlp.up_proj,tx.17.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51904296875,0.6005859375,0.66455078125,0.599853515625,0.046875,0.609375,0.8828125,0.599853515625,0.0,0.625,1.0
127
+ 125,model.layers.17.mlp.down_proj,tx.17.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.48291015625,0.60009765625,0.70849609375,0.5999581813812256,0.03125,0.6171875,0.9921875,0.5999581813812256,0.0625,0.625,1.0
128
+ 126,model.layers.18.self_attn.q_proj,tx.18.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.49853515625,0.60107421875,0.68798828125,0.599853515625,0.0234375,0.625,0.96875,0.599853515625,0.0625,0.625,1.0
129
+ 127,model.layers.18.self_attn.k_proj,tx.18.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4912109375,0.60107421875,0.70361328125,0.599853515625,0.0078125,0.6328125,0.96875,0.599853515625,0.0625,0.625,1.0
130
+ 128,model.layers.18.self_attn.v_proj,tx.18.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.46630859375,0.6015625,0.69775390625,0.599853515625,0.03125,0.625,1.0,0.599853515625,0.0625,0.625,1.0
131
+ 129,model.layers.18.self_attn.o_proj,tx.18.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.23779296875,0.55126953125,1.0,0.599853515625,0.0234375,0.5703125,1.0,0.599853515625,0.0,0.5625,1.0
132
+ 130,model.layers.18.mlp.gate_proj,tx.18.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51123046875,0.60107421875,0.6640625,0.599853515625,0.015625,0.6171875,0.859375,0.599853515625,0.0,0.625,1.0
133
+ 131,model.layers.18.mlp.up_proj,tx.18.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52099609375,0.6005859375,0.66552734375,0.599853515625,0.0625,0.609375,0.9296875,0.599853515625,0.0,0.625,1.0
134
+ 132,model.layers.18.mlp.down_proj,tx.18.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.46923828125,0.60009765625,0.70703125,0.5999581813812256,0.015625,0.6171875,0.984375,0.5999581813812256,0.0,0.625,1.0
135
+ 133,model.layers.19.self_attn.q_proj,tx.19.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.48486328125,0.60205078125,0.6884765625,0.599853515625,0.015625,0.625,0.8671875,0.599853515625,0.0,0.625,1.0
136
+ 134,model.layers.19.self_attn.k_proj,tx.19.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4775390625,0.60205078125,0.71533203125,0.599853515625,0.0078125,0.6328125,0.9609375,0.599853515625,0.0625,0.625,1.0
137
+ 135,model.layers.19.self_attn.v_proj,tx.19.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4912109375,0.6015625,0.697265625,0.599853515625,0.03125,0.625,1.0,0.599853515625,0.0625,0.625,1.0
138
+ 136,model.layers.19.self_attn.o_proj,tx.19.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.27978515625,0.4970703125,1.0,0.599853515625,0.0234375,0.5390625,1.0,0.599853515625,0.0,0.5625,1.0
139
+ 137,model.layers.19.mlp.gate_proj,tx.19.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51171875,0.60107421875,0.66162109375,0.599853515625,0.015625,0.609375,0.875,0.599853515625,0.0,0.625,1.0
140
+ 138,model.layers.19.mlp.up_proj,tx.19.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5244140625,0.6005859375,0.66064453125,0.599853515625,0.0703125,0.609375,0.9140625,0.599853515625,0.0,0.625,1.0
141
+ 139,model.layers.19.mlp.down_proj,tx.19.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.46533203125,0.6005859375,0.71630859375,0.5999581813812256,0.015625,0.625,0.96875,0.5999581813812256,0.0,0.625,1.0
142
+ 140,model.layers.20.self_attn.q_proj,tx.20.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.478515625,0.6015625,0.69287109375,0.599853515625,0.015625,0.625,0.875,0.599853515625,0.0625,0.625,1.0
143
+ 141,model.layers.20.self_attn.k_proj,tx.20.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.48095703125,0.60205078125,0.693359375,0.599853515625,0.0078125,0.6328125,0.9609375,0.599853515625,0.0625,0.625,1.0
144
+ 142,model.layers.20.self_attn.v_proj,tx.20.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.46875,0.60107421875,0.705078125,0.599853515625,0.0234375,0.625,1.0,0.599853515625,0.0625,0.625,1.0
145
+ 143,model.layers.20.self_attn.o_proj,tx.20.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.28125,0.56591796875,0.99365234375,0.599853515625,0.03125,0.5625,1.0,0.599853515625,0.0,0.5625,1.0
146
+ 144,model.layers.20.mlp.gate_proj,tx.20.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.50927734375,0.60107421875,0.6630859375,0.599853515625,0.015625,0.6171875,0.875,0.599853515625,0.0,0.625,1.0
147
+ 145,model.layers.20.mlp.up_proj,tx.20.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51953125,0.6005859375,0.66064453125,0.599853515625,0.0625,0.609375,0.9296875,0.599853515625,0.0,0.625,1.0
148
+ 146,model.layers.20.mlp.down_proj,tx.20.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.48193359375,0.6005859375,0.71826171875,0.5999581813812256,0.015625,0.625,0.9609375,0.5999581813812256,0.0625,0.625,1.0
149
+ 147,model.layers.21.self_attn.q_proj,tx.21.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.48388671875,0.60205078125,0.685546875,0.599853515625,0.0078125,0.625,0.8671875,0.599853515625,0.0625,0.625,1.0
150
+ 148,model.layers.21.self_attn.k_proj,tx.21.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.48583984375,0.60205078125,0.68994140625,0.599853515625,0.0,0.6328125,0.8984375,0.599853515625,0.0625,0.625,1.0
151
+ 149,model.layers.21.self_attn.v_proj,tx.21.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.47705078125,0.60107421875,0.68701171875,0.599853515625,0.0234375,0.625,1.0,0.599853515625,0.0625,0.625,1.0
152
+ 150,model.layers.21.self_attn.o_proj,tx.21.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.3125,0.564453125,1.0,0.599853515625,0.0078125,0.578125,1.0,0.599853515625,0.0,0.5625,1.0
153
+ 151,model.layers.21.mlp.gate_proj,tx.21.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.513671875,0.6005859375,0.66748046875,0.599853515625,0.0,0.6171875,0.8984375,0.599853515625,0.0,0.625,1.0
154
+ 152,model.layers.21.mlp.up_proj,tx.21.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51171875,0.6005859375,0.65966796875,0.599853515625,0.0625,0.609375,0.9453125,0.599853515625,0.0,0.625,1.0
155
+ 153,model.layers.21.mlp.down_proj,tx.21.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.44482421875,0.60107421875,0.708984375,0.5999581813812256,0.0234375,0.625,0.953125,0.5999581813812256,0.0625,0.625,1.0
156
+ 154,model.layers.22.self_attn.q_proj,tx.22.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.47705078125,0.60107421875,0.6943359375,0.599853515625,0.0078125,0.625,0.859375,0.599853515625,0.0,0.625,1.0
157
+ 155,model.layers.22.self_attn.k_proj,tx.22.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4873046875,0.6015625,0.69482421875,0.599853515625,0.0078125,0.6328125,0.90625,0.599853515625,0.0625,0.625,1.0
158
+ 156,model.layers.22.self_attn.v_proj,tx.22.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.48681640625,0.60107421875,0.70263671875,0.599853515625,0.0234375,0.625,1.0,0.599853515625,0.0625,0.625,1.0
159
+ 157,model.layers.22.self_attn.o_proj,tx.22.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.263671875,0.525390625,1.0,0.599853515625,0.03125,0.546875,1.0,0.599853515625,0.0,0.5625,1.0
160
+ 158,model.layers.22.mlp.gate_proj,tx.22.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.50927734375,0.60107421875,0.66064453125,0.599853515625,0.015625,0.609375,0.8671875,0.599853515625,0.0,0.625,1.0
161
+ 159,model.layers.22.mlp.up_proj,tx.22.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.50927734375,0.6005859375,0.66015625,0.599853515625,0.046875,0.609375,0.9375,0.599853515625,0.0,0.625,1.0
162
+ 160,model.layers.22.mlp.down_proj,tx.22.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.462890625,0.60107421875,0.71484375,0.5999581813812256,0.015625,0.6328125,0.9375,0.5999581813812256,0.0,0.625,1.0
163
+ 161,model.layers.23.self_attn.q_proj,tx.23.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.47705078125,0.60205078125,0.69140625,0.599853515625,0.0,0.625,0.8671875,0.599853515625,0.0,0.625,1.0
164
+ 162,model.layers.23.self_attn.k_proj,tx.23.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4775390625,0.6015625,0.69287109375,0.599853515625,0.0,0.6328125,0.9140625,0.599853515625,0.0625,0.625,1.0
165
+ 163,model.layers.23.self_attn.v_proj,tx.23.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4912109375,0.60107421875,0.69140625,0.599853515625,0.0234375,0.625,1.0,0.599853515625,0.0625,0.625,1.0
166
+ 164,model.layers.23.self_attn.o_proj,tx.23.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.3017578125,0.56884765625,0.99951171875,0.599853515625,0.015625,0.578125,1.0,0.599853515625,0.0,0.5625,1.0
167
+ 165,model.layers.23.mlp.gate_proj,tx.23.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.513671875,0.60107421875,0.66015625,0.599853515625,0.0078125,0.609375,0.875,0.599853515625,0.0,0.625,1.0
168
+ 166,model.layers.23.mlp.up_proj,tx.23.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5146484375,0.60107421875,0.662109375,0.599853515625,0.046875,0.609375,0.921875,0.599853515625,0.0,0.625,1.0
169
+ 167,model.layers.23.mlp.down_proj,tx.23.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.46044921875,0.60205078125,0.708984375,0.5999581813812256,0.0078125,0.6328125,0.9140625,0.5999581813812256,0.0,0.625,1.0
170
+ 168,model.layers.24.self_attn.q_proj,tx.24.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.4755859375,0.6015625,0.685546875,0.599853515625,0.0078125,0.6328125,0.859375,0.599853515625,0.0625,0.625,1.0
171
+ 169,model.layers.24.self_attn.k_proj,tx.24.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4912109375,0.60205078125,0.7080078125,0.599853515625,0.0,0.6328125,0.890625,0.599853515625,0.0625,0.625,1.0
172
+ 170,model.layers.24.self_attn.v_proj,tx.24.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.474609375,0.6015625,0.705078125,0.599853515625,0.0234375,0.625,1.0,0.599853515625,0.0625,0.625,1.0
173
+ 171,model.layers.24.self_attn.o_proj,tx.24.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.26171875,0.55029296875,1.0,0.599853515625,0.0234375,0.5625,1.0,0.599853515625,0.0,0.5625,1.0
174
+ 172,model.layers.24.mlp.gate_proj,tx.24.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.515625,0.6005859375,0.6640625,0.599853515625,0.0078125,0.609375,0.890625,0.599853515625,0.0,0.625,1.0
175
+ 173,model.layers.24.mlp.up_proj,tx.24.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52392578125,0.60107421875,0.65966796875,0.599853515625,0.046875,0.609375,0.9296875,0.599853515625,0.0625,0.625,1.0
176
+ 174,model.layers.24.mlp.down_proj,tx.24.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4423828125,0.60205078125,0.71435546875,0.5999581813812256,0.0,0.640625,0.9140625,0.5999581813812256,0.0625,0.625,1.0
177
+ 175,model.layers.25.self_attn.q_proj,tx.25.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.49365234375,0.60107421875,0.69921875,0.599853515625,0.0,0.6328125,0.8671875,0.599853515625,0.0625,0.625,1.0
178
+ 176,model.layers.25.self_attn.k_proj,tx.25.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4794921875,0.60205078125,0.72607421875,0.599853515625,0.0,0.6328125,0.8984375,0.599853515625,0.0,0.625,1.0
179
+ 177,model.layers.25.self_attn.v_proj,tx.25.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.46337890625,0.6005859375,0.69482421875,0.599853515625,0.0234375,0.625,1.0,0.599853515625,0.125,0.625,1.0
180
+ 178,model.layers.25.self_attn.o_proj,tx.25.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.2294921875,0.62451171875,1.0,0.599853515625,0.0234375,0.609375,1.0,0.599853515625,0.0,0.625,1.0
181
+ 179,model.layers.25.mlp.gate_proj,tx.25.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52197265625,0.60107421875,0.6591796875,0.599853515625,0.015625,0.609375,0.8984375,0.599853515625,0.0,0.625,1.0
182
+ 180,model.layers.25.mlp.up_proj,tx.25.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.52099609375,0.60107421875,0.658203125,0.599853515625,0.046875,0.609375,0.90625,0.599853515625,0.0625,0.625,1.0
183
+ 181,model.layers.25.mlp.down_proj,tx.25.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.46484375,0.60205078125,0.72265625,0.5999581813812256,0.0078125,0.640625,0.90625,0.5999581813812256,0.0,0.625,1.0
184
+ 182,model.layers.26.self_attn.q_proj,tx.26.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.4833984375,0.6015625,0.68408203125,0.599853515625,0.0,0.6328125,0.890625,0.599853515625,0.0625,0.625,1.0
185
+ 183,model.layers.26.self_attn.k_proj,tx.26.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.46728515625,0.60205078125,0.71142578125,0.599853515625,0.0,0.6328125,0.90625,0.599853515625,0.0625,0.625,1.0
186
+ 184,model.layers.26.self_attn.v_proj,tx.26.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.45947265625,0.6015625,0.6875,0.599853515625,0.015625,0.6171875,1.0,0.599853515625,0.0625,0.625,1.0
187
+ 185,model.layers.26.self_attn.o_proj,tx.26.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.2255859375,0.541015625,1.0,0.599853515625,0.015625,0.5390625,1.0,0.599853515625,0.0,0.5625,1.0
188
+ 186,model.layers.26.mlp.gate_proj,tx.26.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5185546875,0.60107421875,0.66650390625,0.599853515625,0.0078125,0.609375,0.8984375,0.599853515625,0.0,0.625,1.0
189
+ 187,model.layers.26.mlp.up_proj,tx.26.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51806640625,0.60107421875,0.6591796875,0.599853515625,0.03125,0.609375,0.921875,0.599853515625,0.0,0.625,1.0
190
+ 188,model.layers.26.mlp.down_proj,tx.26.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.4052734375,0.60107421875,0.7265625,0.5999581813812256,0.0078125,0.6484375,0.921875,0.5999581813812256,0.0,0.625,1.0
191
+ 189,model.layers.27.self_attn.q_proj,tx.27.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.48291015625,0.6015625,0.69677734375,0.599853515625,0.0,0.6328125,1.0,0.599853515625,0.0625,0.625,1.0
192
+ 190,model.layers.27.self_attn.k_proj,tx.27.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.48486328125,0.60205078125,0.71630859375,0.599853515625,0.0,0.6328125,1.0,0.599853515625,0.0625,0.625,1.0
193
+ 191,model.layers.27.self_attn.v_proj,tx.27.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.47705078125,0.60205078125,0.70458984375,0.599853515625,0.0234375,0.625,1.0,0.599853515625,0.0625,0.625,1.0
194
+ 192,model.layers.27.self_attn.o_proj,tx.27.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.18310546875,0.5771484375,1.0,0.599853515625,0.0078125,0.578125,1.0,0.599853515625,0.0,0.5625,1.0
195
+ 193,model.layers.27.mlp.gate_proj,tx.27.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5126953125,0.60107421875,0.6630859375,0.599853515625,0.015625,0.609375,0.8828125,0.599853515625,0.0,0.625,1.0
196
+ 194,model.layers.27.mlp.up_proj,tx.27.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51708984375,0.6015625,0.66552734375,0.599853515625,0.046875,0.609375,0.8984375,0.599853515625,0.0,0.625,1.0
197
+ 195,model.layers.27.mlp.down_proj,tx.27.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.44091796875,0.60107421875,0.72412109375,0.5999581813812256,0.0,0.65625,0.921875,0.5999581813812256,0.0,0.625,1.0
198
+ 196,model.layers.28.self_attn.q_proj,tx.28.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.4794921875,0.6015625,0.693359375,0.599853515625,0.0,0.6328125,0.875,0.599853515625,0.0,0.625,1.0
199
+ 197,model.layers.28.self_attn.k_proj,tx.28.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.47607421875,0.6015625,0.6923828125,0.599853515625,0.0,0.6328125,0.90625,0.599853515625,0.0625,0.625,1.0
200
+ 198,model.layers.28.self_attn.v_proj,tx.28.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.49365234375,0.6005859375,0.6943359375,0.599853515625,0.0234375,0.6171875,1.0,0.599853515625,0.0625,0.625,1.0
201
+ 199,model.layers.28.self_attn.o_proj,tx.28.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.2392578125,0.587890625,0.9990234375,0.599853515625,0.03125,0.6015625,1.0,0.599853515625,0.0,0.625,1.0
202
+ 200,model.layers.28.mlp.gate_proj,tx.28.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51318359375,0.6015625,0.6767578125,0.599853515625,0.015625,0.6171875,0.859375,0.599853515625,0.0625,0.625,1.0
203
+ 201,model.layers.28.mlp.up_proj,tx.28.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51318359375,0.6015625,0.66259765625,0.599853515625,0.03125,0.609375,0.859375,0.599853515625,0.0625,0.625,1.0
204
+ 202,model.layers.28.mlp.down_proj,tx.28.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.41943359375,0.60302734375,0.74169921875,0.5999581813812256,0.0078125,0.6640625,0.9296875,0.5999581813812256,0.0625,0.625,1.0
205
+ 203,model.layers.29.self_attn.q_proj,tx.29.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.48681640625,0.6015625,0.69775390625,0.599853515625,0.0,0.625,1.0,0.599853515625,0.0625,0.625,1.0
206
+ 204,model.layers.29.self_attn.k_proj,tx.29.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4970703125,0.60205078125,0.69384765625,0.599853515625,0.0,0.625,1.0,0.599853515625,0.125,0.625,1.0
207
+ 205,model.layers.29.self_attn.v_proj,tx.29.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.5,0.60107421875,0.68310546875,0.599853515625,0.015625,0.6171875,1.0,0.599853515625,0.0625,0.625,1.0
208
+ 206,model.layers.29.self_attn.o_proj,tx.29.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.134765625,0.57470703125,0.9921875,0.599853515625,0.015625,0.5703125,1.0,0.599853515625,0.0,0.625,1.0
209
+ 207,model.layers.29.mlp.gate_proj,tx.29.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51025390625,0.6015625,0.66943359375,0.599853515625,0.0078125,0.6171875,0.8828125,0.599853515625,0.0,0.625,1.0
210
+ 208,model.layers.29.mlp.up_proj,tx.29.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.5029296875,0.6015625,0.66748046875,0.599853515625,0.015625,0.6171875,0.8671875,0.599853515625,0.0625,0.625,1.0
211
+ 209,model.layers.29.mlp.down_proj,tx.29.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.44189453125,0.603515625,0.73583984375,0.5999581813812256,0.0,0.6640625,0.921875,0.5999581813812256,0.0,0.625,1.0
212
+ 210,model.layers.30.self_attn.q_proj,tx.30.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.45263671875,0.60107421875,0.69091796875,0.599853515625,0.0,0.6328125,0.8984375,0.599853515625,0.0625,0.625,1.0
213
+ 211,model.layers.30.self_attn.k_proj,tx.30.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.47412109375,0.60107421875,0.69677734375,0.599853515625,0.0,0.640625,0.921875,0.599853515625,0.0625,0.625,1.0
214
+ 212,model.layers.30.self_attn.v_proj,tx.30.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4755859375,0.6015625,0.69384765625,0.599853515625,0.0234375,0.6171875,1.0,0.599853515625,0.0625,0.625,1.0
215
+ 213,model.layers.30.self_attn.o_proj,tx.30.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.23876953125,0.5595703125,0.99462890625,0.599853515625,0.0546875,0.5546875,1.0,0.599853515625,0.0,0.5625,1.0
216
+ 214,model.layers.30.mlp.gate_proj,tx.30.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51318359375,0.60107421875,0.67041015625,0.599853515625,0.015625,0.6171875,0.9609375,0.599853515625,0.0,0.625,1.0
217
+ 215,model.layers.30.mlp.up_proj,tx.30.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51318359375,0.60107421875,0.67138671875,0.599853515625,0.03125,0.6171875,0.953125,0.599853515625,0.0625,0.625,1.0
218
+ 216,model.layers.30.mlp.down_proj,tx.30.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.408203125,0.6025390625,0.7666015625,0.5999581813812256,0.0,0.671875,0.96875,0.5999581813812256,0.0,0.625,1.0
219
+ 217,model.layers.31.self_attn.q_proj,tx.31.attn.q,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.47607421875,0.60009765625,0.6943359375,0.599853515625,0.0,0.625,1.0,0.599853515625,0.0625,0.625,1.0
220
+ 218,model.layers.31.self_attn.k_proj,tx.31.attn.k,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.4775390625,0.60107421875,0.6884765625,0.599853515625,0.0078125,0.625,0.9765625,0.599853515625,0.0625,0.625,1.0
221
+ 219,model.layers.31.self_attn.v_proj,tx.31.attn.v,Linear,weight,"[1024, 4096]",4194304,1678336,0.599853515625,"(128, 16)",8 x 256,2048,0.599853515625,0.47412109375,0.60009765625,0.69873046875,0.599853515625,0.0078125,0.6171875,1.0,0.599853515625,0.0625,0.625,1.0
222
+ 220,model.layers.31.self_attn.o_proj,tx.31.attn.o,Linear,weight,"[4096, 4096]",16777216,6713344,0.599853515625,"(128, 16)",32 x 256,8192,0.599853515625,0.10791015625,0.603515625,1.0,0.599853515625,0.0,0.609375,1.0,0.599853515625,0.0,0.625,1.0
223
+ 221,model.layers.31.mlp.gate_proj,tx.31.mlp.gate,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51220703125,0.60009765625,0.67138671875,0.599853515625,0.0703125,0.6171875,0.9609375,0.599853515625,0.0625,0.625,1.0
224
+ 222,model.layers.31.mlp.up_proj,tx.31.mlp.up,Linear,weight,"[14336, 4096]",58720256,23496704,0.599853515625,"(128, 16)",112 x 256,28672,0.599853515625,0.51513671875,0.60009765625,0.67529296875,0.599853515625,0.0703125,0.6171875,0.9296875,0.599853515625,0.0,0.625,1.0
225
+ 223,model.layers.31.mlp.down_proj,tx.31.mlp.down,Linear,weight,"[4096, 14336]",58720256,23490560,0.5999581217765808,"(128, 16)",32 x 896,28672,0.5999581813812256,0.37353515625,0.59912109375,0.77978515625,0.5999581813812256,0.0,0.6953125,1.0,0.5999581813812256,0.0,0.625,1.0
226
+ 224,lm_head,lm_head,Linear,weight,"[128256, 4096]",525336576,525336576,0.0,"(128, 16)",1002 x 256,256512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
sparsity_report_Meta-Llama-3.1-8B-wanda-unstructured-0.7.csv ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ row,layer_id,short_id,layer_type,param_type,shape,nparam,nnz,sparsity,tile_shape,n_tile,n_tile_total,tile_avg,tile_min,tile_med,tile_max,col_avg,col_min,col_med,col_max,row_avg,row_min,row_med,row_max
2
+ 0,model.layers.0.self_attn.q_proj,tx.0.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.380859375,0.70458984375,0.94970703125,0.699951171875,0.0,0.9453125,1.0,0.699951171875,0.125,0.6875,1.0
3
+ 1,model.layers.0.self_attn.k_proj,tx.0.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.4150390625,0.70263671875,0.9375,0.699951171875,0.0,0.9296875,1.0,0.699951171875,0.1875,0.6875,1.0
4
+ 2,model.layers.0.self_attn.v_proj,tx.0.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.44287109375,0.701171875,0.90771484375,0.699951171875,0.0078125,0.7734375,1.0,0.699951171875,0.125,0.6875,1.0
5
+ 3,model.layers.0.self_attn.o_proj,tx.0.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.1572265625,0.775390625,1.0,0.699951171875,0.015625,0.78125,1.0,0.699951171875,0.0,0.8125,1.0
6
+ 4,model.layers.0.mlp.gate_proj,tx.0.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.607421875,0.701171875,0.76318359375,0.6999512314796448,0.125,0.71875,1.0,0.6999512314796448,0.0625,0.6875,1.0
7
+ 5,model.layers.0.mlp.up_proj,tx.0.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.62646484375,0.7001953125,0.76513671875,0.6999512314796448,0.1171875,0.7109375,1.0,0.6999512314796448,0.125,0.6875,1.0
8
+ 6,model.layers.0.mlp.down_proj,tx.0.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.5146484375,0.70166015625,0.81689453125,0.6999861001968384,0.0625,0.7265625,1.0,0.6999861001968384,0.0625,0.6875,1.0
9
+ 7,model.layers.1.self_attn.q_proj,tx.1.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.4599609375,0.703125,0.8779296875,0.699951171875,0.0,0.7890625,1.0,0.699951171875,0.125,0.6875,1.0
10
+ 8,model.layers.1.self_attn.k_proj,tx.1.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.4560546875,0.70263671875,0.86865234375,0.699951171875,0.0,0.796875,1.0,0.699951171875,0.1875,0.6875,1.0
11
+ 9,model.layers.1.self_attn.v_proj,tx.1.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.48046875,0.7021484375,0.83349609375,0.699951171875,0.03125,0.765625,1.0,0.699951171875,0.1875,0.6875,1.0
12
+ 10,model.layers.1.self_attn.o_proj,tx.1.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.39208984375,0.73291015625,0.9912109375,0.699951171875,0.0625,0.7109375,1.0,0.699951171875,0.0,0.75,1.0
13
+ 11,model.layers.1.mlp.gate_proj,tx.1.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.61865234375,0.70068359375,0.76806640625,0.6999512314796448,0.1953125,0.7109375,1.0,0.6999512314796448,0.125,0.6875,1.0
14
+ 12,model.layers.1.mlp.up_proj,tx.1.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.62548828125,0.7001953125,0.77001953125,0.6999512314796448,0.15625,0.7109375,1.0,0.6999512314796448,0.125,0.6875,1.0
15
+ 13,model.layers.1.mlp.down_proj,tx.1.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.58740234375,0.7021484375,0.81005859375,0.6999861001968384,0.0078125,0.7421875,0.9609375,0.6999861001968384,0.125,0.6875,1.0
16
+ 14,model.layers.2.self_attn.q_proj,tx.2.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.5400390625,0.703125,0.8369140625,0.699951171875,0.0078125,0.765625,1.0,0.699951171875,0.125,0.6875,1.0
17
+ 15,model.layers.2.self_attn.k_proj,tx.2.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.51904296875,0.7041015625,0.849609375,0.699951171875,0.015625,0.7734375,1.0,0.699951171875,0.125,0.6875,1.0
18
+ 16,model.layers.2.self_attn.v_proj,tx.2.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5751953125,0.703125,0.80615234375,0.699951171875,0.078125,0.75,1.0,0.699951171875,0.1875,0.6875,1.0
19
+ 17,model.layers.2.self_attn.o_proj,tx.2.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.37841796875,0.673828125,0.99755859375,0.699951171875,0.0234375,0.6875,1.0,0.699951171875,0.0,0.6875,1.0
20
+ 18,model.layers.2.mlp.gate_proj,tx.2.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.626953125,0.701171875,0.7607421875,0.6999512314796448,0.171875,0.7109375,1.0,0.6999512314796448,0.0625,0.6875,1.0
21
+ 19,model.layers.2.mlp.up_proj,tx.2.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.6318359375,0.70068359375,0.75439453125,0.6999512314796448,0.21875,0.703125,1.0,0.6999512314796448,0.125,0.6875,1.0
22
+ 20,model.layers.2.mlp.down_proj,tx.2.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.560546875,0.70166015625,0.83056640625,0.6999861001968384,0.0078125,0.734375,0.984375,0.6999861001968384,0.0625,0.6875,1.0
23
+ 21,model.layers.3.self_attn.q_proj,tx.3.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.57275390625,0.70166015625,0.8076171875,0.699951171875,0.0234375,0.7421875,1.0,0.699951171875,0.125,0.6875,1.0
24
+ 22,model.layers.3.self_attn.k_proj,tx.3.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55615234375,0.70263671875,0.80712890625,0.699951171875,0.0234375,0.75,1.0,0.699951171875,0.125,0.6875,1.0
25
+ 23,model.layers.3.self_attn.v_proj,tx.3.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58837890625,0.70166015625,0.79443359375,0.699951171875,0.0859375,0.7265625,1.0,0.699951171875,0.125,0.6875,1.0
26
+ 24,model.layers.3.self_attn.o_proj,tx.3.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.40673828125,0.673828125,0.9990234375,0.699951171875,0.0703125,0.703125,1.0,0.699951171875,0.0,0.6875,1.0
27
+ 25,model.layers.3.mlp.gate_proj,tx.3.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.62353515625,0.701171875,0.763671875,0.6999512314796448,0.140625,0.7109375,1.0,0.6999512314796448,0.125,0.6875,1.0
28
+ 26,model.layers.3.mlp.up_proj,tx.3.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.6416015625,0.7001953125,0.76708984375,0.6999512314796448,0.234375,0.7109375,1.0,0.6999512314796448,0.0625,0.6875,1.0
29
+ 27,model.layers.3.mlp.down_proj,tx.3.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.576171875,0.701171875,0.81103515625,0.6999861001968384,0.0078125,0.7265625,0.9921875,0.6999861001968384,0.0625,0.6875,1.0
30
+ 28,model.layers.4.self_attn.q_proj,tx.4.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.55419921875,0.701171875,0.81884765625,0.699951171875,0.0078125,0.75,1.0,0.699951171875,0.125,0.6875,1.0
31
+ 29,model.layers.4.self_attn.k_proj,tx.4.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5556640625,0.701171875,0.8232421875,0.699951171875,0.0234375,0.75,1.0,0.699951171875,0.125,0.6875,1.0
32
+ 30,model.layers.4.self_attn.v_proj,tx.4.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58251953125,0.701171875,0.8134765625,0.699951171875,0.0703125,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
33
+ 31,model.layers.4.self_attn.o_proj,tx.4.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.29248046875,0.6943359375,0.99755859375,0.699951171875,0.0234375,0.703125,1.0,0.699951171875,0.0,0.6875,1.0
34
+ 32,model.layers.4.mlp.gate_proj,tx.4.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.6123046875,0.70068359375,0.7685546875,0.6999512314796448,0.1328125,0.71875,1.0,0.6999512314796448,0.125,0.6875,1.0
35
+ 33,model.layers.4.mlp.up_proj,tx.4.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.6259765625,0.70068359375,0.77099609375,0.6999512314796448,0.2109375,0.7109375,1.0,0.6999512314796448,0.125,0.6875,1.0
36
+ 34,model.layers.4.mlp.down_proj,tx.4.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.53955078125,0.7021484375,0.8134765625,0.6999861001968384,0.0,0.71875,1.0,0.6999861001968384,0.125,0.6875,1.0
37
+ 35,model.layers.5.self_attn.q_proj,tx.5.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.55419921875,0.70166015625,0.82177734375,0.699951171875,0.015625,0.7421875,1.0,0.699951171875,0.125,0.6875,1.0
38
+ 36,model.layers.5.self_attn.k_proj,tx.5.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55029296875,0.7021484375,0.8115234375,0.699951171875,0.015625,0.75,0.984375,0.699951171875,0.1875,0.6875,1.0
39
+ 37,model.layers.5.self_attn.v_proj,tx.5.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.583984375,0.701171875,0.81689453125,0.699951171875,0.09375,0.734375,1.0,0.699951171875,0.1875,0.6875,1.0
40
+ 38,model.layers.5.self_attn.o_proj,tx.5.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.30322265625,0.7255859375,0.9775390625,0.699951171875,0.0234375,0.75,1.0,0.699951171875,0.0,0.75,1.0
41
+ 39,model.layers.5.mlp.gate_proj,tx.5.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59423828125,0.701171875,0.77978515625,0.6999512314796448,0.0703125,0.71875,1.0,0.6999512314796448,0.125,0.6875,1.0
42
+ 40,model.layers.5.mlp.up_proj,tx.5.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59033203125,0.70068359375,0.77392578125,0.6999512314796448,0.2109375,0.71875,1.0,0.6999512314796448,0.125,0.6875,1.0
43
+ 41,model.layers.5.mlp.down_proj,tx.5.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.5673828125,0.701171875,0.8076171875,0.6999861001968384,0.0,0.7265625,1.0,0.6999861001968384,0.125,0.6875,1.0
44
+ 42,model.layers.6.self_attn.q_proj,tx.6.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.56591796875,0.70166015625,0.82958984375,0.699951171875,0.03125,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
45
+ 43,model.layers.6.self_attn.k_proj,tx.6.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.54443359375,0.7021484375,0.828125,0.699951171875,0.015625,0.7421875,0.984375,0.699951171875,0.125,0.6875,1.0
46
+ 44,model.layers.6.self_attn.v_proj,tx.6.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58642578125,0.701171875,0.802734375,0.699951171875,0.0703125,0.71875,1.0,0.699951171875,0.125,0.6875,1.0
47
+ 45,model.layers.6.self_attn.o_proj,tx.6.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.33203125,0.70703125,0.9892578125,0.699951171875,0.0390625,0.7265625,1.0,0.699951171875,0.0,0.6875,1.0
48
+ 46,model.layers.6.mlp.gate_proj,tx.6.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.60546875,0.70166015625,0.77734375,0.6999512314796448,0.0859375,0.71875,1.0,0.6999512314796448,0.0625,0.6875,1.0
49
+ 47,model.layers.6.mlp.up_proj,tx.6.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.60693359375,0.701171875,0.77294921875,0.6999512314796448,0.1796875,0.71875,1.0,0.6999512314796448,0.0625,0.6875,1.0
50
+ 48,model.layers.6.mlp.down_proj,tx.6.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.53564453125,0.70068359375,0.8017578125,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.125,0.6875,1.0
51
+ 49,model.layers.7.self_attn.q_proj,tx.7.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.58056640625,0.701171875,0.80908203125,0.699951171875,0.0234375,0.7265625,1.0,0.699951171875,0.0625,0.6875,1.0
52
+ 50,model.layers.7.self_attn.k_proj,tx.7.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58203125,0.701171875,0.82080078125,0.699951171875,0.015625,0.734375,0.984375,0.699951171875,0.1875,0.6875,1.0
53
+ 51,model.layers.7.self_attn.v_proj,tx.7.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.60400390625,0.69970703125,0.80615234375,0.699951171875,0.1015625,0.71875,0.9921875,0.699951171875,0.125,0.6875,1.0
54
+ 52,model.layers.7.self_attn.o_proj,tx.7.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.39306640625,0.70947265625,0.94873046875,0.699951171875,0.0234375,0.7265625,1.0,0.699951171875,0.0,0.6875,1.0
55
+ 53,model.layers.7.mlp.gate_proj,tx.7.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.60595703125,0.70068359375,0.76904296875,0.6999512314796448,0.078125,0.71875,0.9296875,0.6999512314796448,0.0,0.6875,1.0
56
+ 54,model.layers.7.mlp.up_proj,tx.7.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.62255859375,0.70068359375,0.77392578125,0.6999512314796448,0.140625,0.71875,0.9375,0.6999512314796448,0.125,0.6875,1.0
57
+ 55,model.layers.7.mlp.down_proj,tx.7.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.54052734375,0.69970703125,0.83349609375,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.0625,0.6875,1.0
58
+ 56,model.layers.8.self_attn.q_proj,tx.8.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.56884765625,0.7021484375,0.79931640625,0.699951171875,0.0234375,0.7265625,0.9921875,0.699951171875,0.125,0.6875,1.0
59
+ 57,model.layers.8.self_attn.k_proj,tx.8.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.564453125,0.703125,0.79052734375,0.699951171875,0.015625,0.734375,0.9921875,0.699951171875,0.125,0.6875,1.0
60
+ 58,model.layers.8.self_attn.v_proj,tx.8.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58642578125,0.701171875,0.775390625,0.699951171875,0.078125,0.71875,1.0,0.699951171875,0.1875,0.6875,1.0
61
+ 59,model.layers.8.self_attn.o_proj,tx.8.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.306640625,0.7109375,0.9765625,0.699951171875,0.015625,0.7265625,1.0,0.699951171875,0.0,0.6875,1.0
62
+ 60,model.layers.8.mlp.gate_proj,tx.8.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5966796875,0.7021484375,0.77978515625,0.6999512314796448,0.0390625,0.71875,1.0,0.6999512314796448,0.0625,0.6875,1.0
63
+ 61,model.layers.8.mlp.up_proj,tx.8.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.60546875,0.701171875,0.77001953125,0.6999512314796448,0.1640625,0.71875,1.0,0.6999512314796448,0.125,0.6875,1.0
64
+ 62,model.layers.8.mlp.down_proj,tx.8.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.53955078125,0.70263671875,0.84619140625,0.6999861001968384,0.015625,0.734375,1.0,0.6999861001968384,0.125,0.6875,1.0
65
+ 63,model.layers.9.self_attn.q_proj,tx.9.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.583984375,0.70068359375,0.8125,0.699951171875,0.0234375,0.71875,1.0,0.699951171875,0.125,0.6875,1.0
66
+ 64,model.layers.9.self_attn.k_proj,tx.9.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.57763671875,0.7001953125,0.80322265625,0.699951171875,0.0234375,0.7265625,0.984375,0.699951171875,0.1875,0.6875,1.0
67
+ 65,model.layers.9.self_attn.v_proj,tx.9.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5927734375,0.69921875,0.78515625,0.699951171875,0.0859375,0.7109375,1.0,0.699951171875,0.1875,0.6875,1.0
68
+ 66,model.layers.9.self_attn.o_proj,tx.9.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.3671875,0.6826171875,0.97998046875,0.699951171875,0.078125,0.71875,1.0,0.699951171875,0.0,0.6875,1.0
69
+ 67,model.layers.9.mlp.gate_proj,tx.9.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.578125,0.70166015625,0.779296875,0.6999512314796448,0.015625,0.71875,0.9453125,0.6999512314796448,0.125,0.6875,1.0
70
+ 68,model.layers.9.mlp.up_proj,tx.9.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5966796875,0.70068359375,0.7734375,0.6999512314796448,0.1484375,0.71875,0.9375,0.6999512314796448,0.0625,0.6875,1.0
71
+ 69,model.layers.9.mlp.down_proj,tx.9.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.50244140625,0.7001953125,0.86474609375,0.6999861001968384,0.0078125,0.734375,1.0,0.6999861001968384,0.125,0.6875,1.0
72
+ 70,model.layers.10.self_attn.q_proj,tx.10.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.57275390625,0.70263671875,0.80078125,0.699951171875,0.0234375,0.7265625,1.0,0.699951171875,0.125,0.6875,1.0
73
+ 71,model.layers.10.self_attn.k_proj,tx.10.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5830078125,0.703125,0.79443359375,0.699951171875,0.015625,0.734375,0.9921875,0.699951171875,0.125,0.6875,1.0
74
+ 72,model.layers.10.self_attn.v_proj,tx.10.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.6005859375,0.7021484375,0.7900390625,0.699951171875,0.1015625,0.71875,0.9921875,0.699951171875,0.125,0.6875,1.0
75
+ 73,model.layers.10.self_attn.o_proj,tx.10.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.28076171875,0.75634765625,0.96875,0.699951171875,0.015625,0.734375,1.0,0.699951171875,0.0,0.75,1.0
76
+ 74,model.layers.10.mlp.gate_proj,tx.10.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.6083984375,0.70166015625,0.791015625,0.6999512314796448,0.0234375,0.71875,0.9375,0.6999512314796448,0.0625,0.6875,1.0
77
+ 75,model.layers.10.mlp.up_proj,tx.10.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.61376953125,0.70166015625,0.775390625,0.6999512314796448,0.125,0.71875,0.9296875,0.6999512314796448,0.0625,0.6875,1.0
78
+ 76,model.layers.10.mlp.down_proj,tx.10.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.533203125,0.701171875,0.85888671875,0.6999861001968384,0.0078125,0.734375,1.0,0.6999861001968384,0.125,0.6875,1.0
79
+ 77,model.layers.11.self_attn.q_proj,tx.11.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.57373046875,0.7021484375,0.79345703125,0.699951171875,0.015625,0.71875,1.0,0.699951171875,0.125,0.6875,1.0
80
+ 78,model.layers.11.self_attn.k_proj,tx.11.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.57958984375,0.703125,0.7841796875,0.699951171875,0.03125,0.7265625,0.9921875,0.699951171875,0.125,0.6875,1.0
81
+ 79,model.layers.11.self_attn.v_proj,tx.11.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.59326171875,0.70166015625,0.78515625,0.699951171875,0.125,0.71875,1.0,0.699951171875,0.125,0.6875,1.0
82
+ 80,model.layers.11.self_attn.o_proj,tx.11.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.3916015625,0.7158203125,0.9892578125,0.699951171875,0.03125,0.7265625,1.0,0.699951171875,0.0,0.6875,1.0
83
+ 81,model.layers.11.mlp.gate_proj,tx.11.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.60205078125,0.7021484375,0.7890625,0.6999512314796448,0.015625,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
84
+ 82,model.layers.11.mlp.up_proj,tx.11.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5986328125,0.701171875,0.787109375,0.6999512314796448,0.09375,0.71875,0.953125,0.6999512314796448,0.125,0.6875,1.0
85
+ 83,model.layers.11.mlp.down_proj,tx.11.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.51025390625,0.70068359375,0.865234375,0.6999861001968384,0.0,0.7265625,1.0,0.6999861001968384,0.0625,0.6875,1.0
86
+ 84,model.layers.12.self_attn.q_proj,tx.12.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.54638671875,0.701171875,0.7958984375,0.699951171875,0.03125,0.7265625,1.0,0.699951171875,0.125,0.6875,1.0
87
+ 85,model.layers.12.self_attn.k_proj,tx.12.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.57421875,0.701171875,0.79345703125,0.699951171875,0.0234375,0.7265625,0.9921875,0.699951171875,0.125,0.6875,1.0
88
+ 86,model.layers.12.self_attn.v_proj,tx.12.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5751953125,0.7001953125,0.78564453125,0.699951171875,0.0859375,0.71875,1.0,0.699951171875,0.1875,0.6875,1.0
89
+ 87,model.layers.12.self_attn.o_proj,tx.12.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.384765625,0.7080078125,0.9140625,0.699951171875,0.109375,0.71875,1.0,0.699951171875,0.0,0.6875,1.0
90
+ 88,model.layers.12.mlp.gate_proj,tx.12.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.58837890625,0.703125,0.7734375,0.6999512314796448,0.0390625,0.71875,0.9453125,0.6999512314796448,0.125,0.6875,1.0
91
+ 89,model.layers.12.mlp.up_proj,tx.12.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5947265625,0.70263671875,0.76806640625,0.6999512314796448,0.109375,0.71875,0.9453125,0.6999512314796448,0.125,0.6875,1.0
92
+ 90,model.layers.12.mlp.down_proj,tx.12.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.50439453125,0.70068359375,0.85498046875,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.125,0.6875,1.0
93
+ 91,model.layers.13.self_attn.q_proj,tx.13.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.587890625,0.69873046875,0.79638671875,0.699951171875,0.0234375,0.71875,1.0,0.699951171875,0.125,0.6875,1.0
94
+ 92,model.layers.13.self_attn.k_proj,tx.13.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.59130859375,0.69970703125,0.79345703125,0.699951171875,0.0078125,0.7265625,0.9921875,0.699951171875,0.1875,0.6875,1.0
95
+ 93,model.layers.13.self_attn.v_proj,tx.13.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.61669921875,0.69921875,0.775390625,0.699951171875,0.1015625,0.71875,1.0,0.699951171875,0.125,0.6875,1.0
96
+ 94,model.layers.13.self_attn.o_proj,tx.13.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.32958984375,0.70068359375,0.99462890625,0.699951171875,0.046875,0.71875,1.0,0.699951171875,0.0,0.6875,1.0
97
+ 95,model.layers.13.mlp.gate_proj,tx.13.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5830078125,0.701171875,0.7900390625,0.6999512314796448,0.03125,0.7265625,0.9453125,0.6999512314796448,0.125,0.6875,1.0
98
+ 96,model.layers.13.mlp.up_proj,tx.13.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.591796875,0.701171875,0.7822265625,0.6999512314796448,0.109375,0.71875,0.9453125,0.6999512314796448,0.0625,0.6875,1.0
99
+ 97,model.layers.13.mlp.down_proj,tx.13.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.53564453125,0.69921875,0.84912109375,0.6999861001968384,0.0,0.7265625,1.0,0.6999861001968384,0.125,0.6875,1.0
100
+ 98,model.layers.14.self_attn.q_proj,tx.14.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.576171875,0.7001953125,0.80078125,0.699951171875,0.0234375,0.71875,1.0,0.699951171875,0.125,0.6875,1.0
101
+ 99,model.layers.14.self_attn.k_proj,tx.14.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.57421875,0.70166015625,0.81005859375,0.699951171875,0.015625,0.7265625,1.0,0.699951171875,0.125,0.6875,1.0
102
+ 100,model.layers.14.self_attn.v_proj,tx.14.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.56787109375,0.7001953125,0.81494140625,0.699951171875,0.03125,0.7265625,1.0,0.699951171875,0.1875,0.6875,1.0
103
+ 101,model.layers.14.self_attn.o_proj,tx.14.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.3203125,0.7138671875,0.9765625,0.699951171875,0.0390625,0.7265625,1.0,0.699951171875,0.0,0.75,1.0
104
+ 102,model.layers.14.mlp.gate_proj,tx.14.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59375,0.701171875,0.7822265625,0.6999512314796448,0.03125,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
105
+ 103,model.layers.14.mlp.up_proj,tx.14.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59765625,0.701171875,0.7822265625,0.6999512314796448,0.109375,0.71875,0.953125,0.6999512314796448,0.0625,0.6875,1.0
106
+ 104,model.layers.14.mlp.down_proj,tx.14.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.47802734375,0.701171875,0.8837890625,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.0625,0.6875,1.0
107
+ 105,model.layers.15.self_attn.q_proj,tx.15.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.59521484375,0.7001953125,0.79931640625,0.699951171875,0.03125,0.7265625,1.0,0.699951171875,0.125,0.6875,1.0
108
+ 106,model.layers.15.self_attn.k_proj,tx.15.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.59326171875,0.70068359375,0.7978515625,0.699951171875,0.015625,0.734375,0.9921875,0.699951171875,0.125,0.6875,1.0
109
+ 107,model.layers.15.self_attn.v_proj,tx.15.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.591796875,0.69970703125,0.7939453125,0.699951171875,0.0546875,0.7265625,1.0,0.699951171875,0.125,0.6875,1.0
110
+ 108,model.layers.15.self_attn.o_proj,tx.15.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.279296875,0.71044921875,1.0,0.699951171875,0.0703125,0.7109375,1.0,0.699951171875,0.0,0.6875,1.0
111
+ 109,model.layers.15.mlp.gate_proj,tx.15.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5966796875,0.70166015625,0.810546875,0.6999512314796448,0.0234375,0.7265625,0.96875,0.6999512314796448,0.0625,0.6875,1.0
112
+ 110,model.layers.15.mlp.up_proj,tx.15.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5927734375,0.70166015625,0.80126953125,0.6999512314796448,0.125,0.71875,0.9609375,0.6999512314796448,0.125,0.6875,1.0
113
+ 111,model.layers.15.mlp.down_proj,tx.15.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.52099609375,0.70068359375,0.87353515625,0.6999861001968384,0.0234375,0.7265625,1.0,0.6999861001968384,0.0625,0.6875,1.0
114
+ 112,model.layers.16.self_attn.q_proj,tx.16.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.59326171875,0.7021484375,0.80712890625,0.699951171875,0.0234375,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
115
+ 113,model.layers.16.self_attn.k_proj,tx.16.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.6005859375,0.7021484375,0.802734375,0.699951171875,0.015625,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
116
+ 114,model.layers.16.self_attn.v_proj,tx.16.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58203125,0.7001953125,0.81494140625,0.699951171875,0.0390625,0.734375,1.0,0.699951171875,0.1875,0.6875,1.0
117
+ 115,model.layers.16.self_attn.o_proj,tx.16.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.30126953125,0.712890625,1.0,0.699951171875,0.0390625,0.71875,1.0,0.699951171875,0.0,0.75,1.0
118
+ 116,model.layers.16.mlp.gate_proj,tx.16.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.60791015625,0.7001953125,0.796875,0.6999512314796448,0.0,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
119
+ 117,model.layers.16.mlp.up_proj,tx.16.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.6103515625,0.69970703125,0.79052734375,0.6999512314796448,0.09375,0.71875,0.984375,0.6999512314796448,0.125,0.6875,1.0
120
+ 118,model.layers.16.mlp.down_proj,tx.16.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.53466796875,0.69873046875,0.86865234375,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.125,0.6875,1.0
121
+ 119,model.layers.17.self_attn.q_proj,tx.17.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.57763671875,0.70166015625,0.8212890625,0.699951171875,0.015625,0.734375,0.9921875,0.699951171875,0.125,0.6875,1.0
122
+ 120,model.layers.17.self_attn.k_proj,tx.17.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58056640625,0.703125,0.8203125,0.699951171875,0.015625,0.7421875,0.984375,0.699951171875,0.1875,0.6875,1.0
123
+ 121,model.layers.17.self_attn.v_proj,tx.17.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58642578125,0.701171875,0.80224609375,0.699951171875,0.0703125,0.7265625,1.0,0.699951171875,0.1875,0.6875,1.0
124
+ 122,model.layers.17.self_attn.o_proj,tx.17.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.357421875,0.6708984375,1.0,0.699951171875,0.0625,0.703125,1.0,0.699951171875,0.0,0.6875,1.0
125
+ 123,model.layers.17.mlp.gate_proj,tx.17.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.583984375,0.7001953125,0.78662109375,0.6999512314796448,0.046875,0.7265625,0.9609375,0.6999512314796448,0.125,0.6875,1.0
126
+ 124,model.layers.17.mlp.up_proj,tx.17.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5908203125,0.70068359375,0.78369140625,0.6999512314796448,0.0859375,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
127
+ 125,model.layers.17.mlp.down_proj,tx.17.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.5498046875,0.701171875,0.8662109375,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.125,0.6875,1.0
128
+ 126,model.layers.18.self_attn.q_proj,tx.18.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.5810546875,0.70361328125,0.8017578125,0.699951171875,0.0234375,0.734375,0.9921875,0.699951171875,0.0625,0.6875,1.0
129
+ 127,model.layers.18.self_attn.k_proj,tx.18.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.58740234375,0.703125,0.802734375,0.699951171875,0.0078125,0.7421875,0.984375,0.699951171875,0.125,0.6875,1.0
130
+ 128,model.layers.18.self_attn.v_proj,tx.18.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.56787109375,0.7021484375,0.81494140625,0.699951171875,0.0390625,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
131
+ 129,model.layers.18.self_attn.o_proj,tx.18.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.22705078125,0.71533203125,1.0,0.699951171875,0.0234375,0.71875,1.0,0.699951171875,0.0,0.75,1.0
132
+ 130,model.layers.18.mlp.gate_proj,tx.18.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.57373046875,0.70166015625,0.78515625,0.6999512314796448,0.03125,0.7265625,0.96875,0.6999512314796448,0.125,0.6875,1.0
133
+ 131,model.layers.18.mlp.up_proj,tx.18.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.58837890625,0.70166015625,0.775390625,0.6999512314796448,0.0703125,0.71875,0.984375,0.6999512314796448,0.125,0.6875,1.0
134
+ 132,model.layers.18.mlp.down_proj,tx.18.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.52783203125,0.70166015625,0.880859375,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.0625,0.6875,1.0
135
+ 133,model.layers.19.self_attn.q_proj,tx.19.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.576171875,0.7021484375,0.80712890625,0.699951171875,0.015625,0.734375,0.96875,0.699951171875,0.125,0.6875,1.0
136
+ 134,model.layers.19.self_attn.k_proj,tx.19.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.56787109375,0.70166015625,0.80419921875,0.699951171875,0.0078125,0.7421875,0.9765625,0.699951171875,0.1875,0.6875,1.0
137
+ 135,model.layers.19.self_attn.v_proj,tx.19.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.56884765625,0.70166015625,0.80908203125,0.699951171875,0.0390625,0.734375,1.0,0.699951171875,0.1875,0.6875,1.0
138
+ 136,model.layers.19.self_attn.o_proj,tx.19.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.25537109375,0.75390625,1.0,0.699951171875,0.0234375,0.75,1.0,0.699951171875,0.0,0.75,1.0
139
+ 137,model.layers.19.mlp.gate_proj,tx.19.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5537109375,0.70263671875,0.7861328125,0.6999512314796448,0.015625,0.7265625,0.96875,0.6999512314796448,0.125,0.6875,1.0
140
+ 138,model.layers.19.mlp.up_proj,tx.19.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5693359375,0.7021484375,0.79150390625,0.6999512314796448,0.078125,0.7265625,0.984375,0.6999512314796448,0.0625,0.6875,1.0
141
+ 139,model.layers.19.mlp.down_proj,tx.19.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.49755859375,0.70166015625,0.85888671875,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.0625,0.6875,1.0
142
+ 140,model.layers.20.self_attn.q_proj,tx.20.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.56689453125,0.7021484375,0.81103515625,0.699951171875,0.015625,0.7421875,0.9921875,0.699951171875,0.125,0.6875,1.0
143
+ 141,model.layers.20.self_attn.k_proj,tx.20.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.56884765625,0.70361328125,0.8115234375,0.699951171875,0.0078125,0.75,0.984375,0.699951171875,0.125,0.6875,1.0
144
+ 142,model.layers.20.self_attn.v_proj,tx.20.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55322265625,0.70361328125,0.8037109375,0.699951171875,0.015625,0.734375,1.0,0.699951171875,0.1875,0.6875,1.0
145
+ 143,model.layers.20.self_attn.o_proj,tx.20.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.330078125,0.70703125,0.99951171875,0.699951171875,0.0390625,0.734375,1.0,0.699951171875,0.0,0.75,1.0
146
+ 144,model.layers.20.mlp.gate_proj,tx.20.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.57470703125,0.7021484375,0.787109375,0.6999512314796448,0.0234375,0.7265625,0.96875,0.6999512314796448,0.0625,0.6875,1.0
147
+ 145,model.layers.20.mlp.up_proj,tx.20.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59375,0.7021484375,0.787109375,0.6999512314796448,0.0859375,0.7265625,0.9921875,0.6999512314796448,0.125,0.6875,1.0
148
+ 146,model.layers.20.mlp.down_proj,tx.20.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.5576171875,0.7001953125,0.85498046875,0.6999861001968384,0.0078125,0.734375,1.0,0.6999861001968384,0.0625,0.6875,1.0
149
+ 147,model.layers.21.self_attn.q_proj,tx.21.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.57080078125,0.70263671875,0.798828125,0.699951171875,0.0078125,0.7421875,0.9765625,0.699951171875,0.125,0.6875,1.0
150
+ 148,model.layers.21.self_attn.k_proj,tx.21.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5732421875,0.70263671875,0.79736328125,0.699951171875,0.0078125,0.7421875,0.9765625,0.699951171875,0.1875,0.6875,1.0
151
+ 149,model.layers.21.self_attn.v_proj,tx.21.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5615234375,0.70263671875,0.80126953125,0.699951171875,0.015625,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
152
+ 150,model.layers.21.self_attn.o_proj,tx.21.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.34375,0.66845703125,1.0,0.699951171875,0.0078125,0.703125,1.0,0.699951171875,0.0,0.6875,1.0
153
+ 151,model.layers.21.mlp.gate_proj,tx.21.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59619140625,0.701171875,0.7880859375,0.6999512314796448,0.0,0.7265625,0.9453125,0.6999512314796448,0.125,0.6875,1.0
154
+ 152,model.layers.21.mlp.up_proj,tx.21.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59033203125,0.701171875,0.78759765625,0.6999512314796448,0.0546875,0.7265625,0.96875,0.6999512314796448,0.0625,0.6875,1.0
155
+ 153,model.layers.21.mlp.down_proj,tx.21.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.49365234375,0.701171875,0.83447265625,0.6999861001968384,0.0,0.734375,1.0,0.6999861001968384,0.0625,0.6875,1.0
156
+ 154,model.layers.22.self_attn.q_proj,tx.22.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.5654296875,0.703125,0.8017578125,0.699951171875,0.0078125,0.7421875,0.96875,0.699951171875,0.0625,0.6875,1.0
157
+ 155,model.layers.22.self_attn.k_proj,tx.22.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.564453125,0.70361328125,0.796875,0.699951171875,0.0078125,0.75,0.984375,0.699951171875,0.125,0.6875,1.0
158
+ 156,model.layers.22.self_attn.v_proj,tx.22.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5703125,0.70263671875,0.7900390625,0.699951171875,0.0390625,0.7421875,1.0,0.699951171875,0.1875,0.6875,1.0
159
+ 157,model.layers.22.self_attn.o_proj,tx.22.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.23828125,0.72314453125,1.0,0.699951171875,0.0078125,0.7265625,1.0,0.699951171875,0.0,0.75,1.0
160
+ 158,model.layers.22.mlp.gate_proj,tx.22.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.599609375,0.7021484375,0.783203125,0.6999512314796448,0.0078125,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
161
+ 159,model.layers.22.mlp.up_proj,tx.22.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.60595703125,0.70166015625,0.77880859375,0.6999512314796448,0.0703125,0.7265625,0.9765625,0.6999512314796448,0.0625,0.6875,1.0
162
+ 160,model.layers.22.mlp.down_proj,tx.22.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.54736328125,0.701171875,0.83544921875,0.6999861001968384,0.0078125,0.7265625,1.0,0.6999861001968384,0.0625,0.6875,1.0
163
+ 161,model.layers.23.self_attn.q_proj,tx.23.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.564453125,0.703125,0.802734375,0.699951171875,0.0,0.7421875,0.96875,0.699951171875,0.125,0.6875,1.0
164
+ 162,model.layers.23.self_attn.k_proj,tx.23.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55712890625,0.70361328125,0.8115234375,0.699951171875,0.0078125,0.75,0.96875,0.699951171875,0.125,0.6875,1.0
165
+ 163,model.layers.23.self_attn.v_proj,tx.23.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.57275390625,0.7021484375,0.80517578125,0.699951171875,0.0234375,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
166
+ 164,model.layers.23.self_attn.o_proj,tx.23.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.28076171875,0.736328125,1.0,0.699951171875,0.046875,0.7421875,1.0,0.699951171875,0.0,0.75,1.0
167
+ 165,model.layers.23.mlp.gate_proj,tx.23.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.595703125,0.70263671875,0.7861328125,0.6999512314796448,0.015625,0.7265625,0.9453125,0.6999512314796448,0.125,0.6875,1.0
168
+ 166,model.layers.23.mlp.up_proj,tx.23.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.6005859375,0.7021484375,0.78857421875,0.6999512314796448,0.0625,0.7265625,0.9609375,0.6999512314796448,0.125,0.6875,1.0
169
+ 167,model.layers.23.mlp.down_proj,tx.23.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.5439453125,0.70263671875,0.8251953125,0.6999861001968384,0.0,0.734375,1.0,0.6999861001968384,0.125,0.6875,1.0
170
+ 168,model.layers.24.self_attn.q_proj,tx.24.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.54833984375,0.7021484375,0.81396484375,0.699951171875,0.0,0.7421875,0.9765625,0.699951171875,0.125,0.6875,1.0
171
+ 169,model.layers.24.self_attn.k_proj,tx.24.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55419921875,0.7021484375,0.8251953125,0.699951171875,0.0,0.75,0.9765625,0.699951171875,0.1875,0.6875,1.0
172
+ 170,model.layers.24.self_attn.v_proj,tx.24.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5576171875,0.70068359375,0.8125,0.699951171875,0.015625,0.7421875,1.0,0.699951171875,0.125,0.6875,1.0
173
+ 171,model.layers.24.self_attn.o_proj,tx.24.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.27099609375,0.716796875,1.0,0.699951171875,0.0234375,0.734375,1.0,0.699951171875,0.0,0.75,1.0
174
+ 172,model.layers.24.mlp.gate_proj,tx.24.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5966796875,0.70166015625,0.7890625,0.6999512314796448,0.03125,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
175
+ 173,model.layers.24.mlp.up_proj,tx.24.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.599609375,0.70166015625,0.78564453125,0.6999512314796448,0.0703125,0.7265625,0.9609375,0.6999512314796448,0.125,0.6875,1.0
176
+ 174,model.layers.24.mlp.down_proj,tx.24.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.48388671875,0.701171875,0.83837890625,0.6999861001968384,0.0,0.734375,1.0,0.6999861001968384,0.125,0.6875,1.0
177
+ 175,model.layers.25.self_attn.q_proj,tx.25.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.57421875,0.70263671875,0.8056640625,0.699951171875,0.0,0.7421875,0.9765625,0.699951171875,0.125,0.6875,1.0
178
+ 176,model.layers.25.self_attn.k_proj,tx.25.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55419921875,0.7021484375,0.83935546875,0.699951171875,0.0,0.75,0.9921875,0.699951171875,0.125,0.6875,1.0
179
+ 177,model.layers.25.self_attn.v_proj,tx.25.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.54248046875,0.70166015625,0.8037109375,0.699951171875,0.03125,0.7421875,1.0,0.699951171875,0.1875,0.6875,1.0
180
+ 178,model.layers.25.self_attn.o_proj,tx.25.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.26220703125,0.79541015625,1.0,0.699951171875,0.015625,0.796875,1.0,0.699951171875,0.0,0.8125,1.0
181
+ 179,model.layers.25.mlp.gate_proj,tx.25.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59375,0.70166015625,0.78369140625,0.6999512314796448,0.03125,0.7265625,0.953125,0.6999512314796448,0.0625,0.6875,1.0
182
+ 180,model.layers.25.mlp.up_proj,tx.25.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59130859375,0.70166015625,0.7822265625,0.6999512314796448,0.0546875,0.7265625,0.9609375,0.6999512314796448,0.125,0.6875,1.0
183
+ 181,model.layers.25.mlp.down_proj,tx.25.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.52294921875,0.7021484375,0.84033203125,0.6999861001968384,0.0,0.734375,1.0,0.6999861001968384,0.125,0.6875,1.0
184
+ 182,model.layers.26.self_attn.q_proj,tx.26.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.55419921875,0.703125,0.8056640625,0.699951171875,0.0,0.7421875,0.9921875,0.699951171875,0.125,0.6875,1.0
185
+ 183,model.layers.26.self_attn.k_proj,tx.26.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5478515625,0.70361328125,0.82421875,0.699951171875,0.0,0.75,0.9765625,0.699951171875,0.125,0.6875,1.0
186
+ 184,model.layers.26.self_attn.v_proj,tx.26.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55029296875,0.70166015625,0.80126953125,0.699951171875,0.015625,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
187
+ 185,model.layers.26.self_attn.o_proj,tx.26.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.24609375,0.6591796875,1.0,0.699951171875,0.015625,0.703125,1.0,0.699951171875,0.0,0.6875,1.0
188
+ 186,model.layers.26.mlp.gate_proj,tx.26.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.58056640625,0.701171875,0.79248046875,0.6999512314796448,0.0390625,0.7265625,0.9453125,0.6999512314796448,0.125,0.6875,1.0
189
+ 187,model.layers.26.mlp.up_proj,tx.26.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.58251953125,0.701171875,0.78955078125,0.6999512314796448,0.078125,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
190
+ 188,model.layers.26.mlp.down_proj,tx.26.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.443359375,0.70263671875,0.8466796875,0.6999861001968384,0.0078125,0.7421875,1.0,0.6999861001968384,0.0625,0.6875,1.0
191
+ 189,model.layers.27.self_attn.q_proj,tx.27.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.5283203125,0.70361328125,0.80322265625,0.699951171875,0.0,0.75,1.0,0.699951171875,0.125,0.6875,1.0
192
+ 190,model.layers.27.self_attn.k_proj,tx.27.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.541015625,0.7041015625,0.8251953125,0.699951171875,0.0,0.75,1.0,0.699951171875,0.125,0.6875,1.0
193
+ 191,model.layers.27.self_attn.v_proj,tx.27.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55615234375,0.7021484375,0.80322265625,0.699951171875,0.0234375,0.7421875,1.0,0.699951171875,0.125,0.6875,1.0
194
+ 192,model.layers.27.self_attn.o_proj,tx.27.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.2080078125,0.736328125,1.0,0.699951171875,0.0078125,0.7890625,1.0,0.699951171875,0.0,0.75,1.0
195
+ 193,model.layers.27.mlp.gate_proj,tx.27.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.55712890625,0.70263671875,0.78515625,0.6999512314796448,0.0234375,0.7265625,0.9453125,0.6999512314796448,0.0625,0.6875,1.0
196
+ 194,model.layers.27.mlp.up_proj,tx.27.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.55908203125,0.7021484375,0.77978515625,0.6999512314796448,0.0546875,0.7265625,0.953125,0.6999512314796448,0.0625,0.6875,1.0
197
+ 195,model.layers.27.mlp.down_proj,tx.27.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.5302734375,0.69970703125,0.83203125,0.6999861001968384,0.0,0.75,1.0,0.6999861001968384,0.125,0.6875,1.0
198
+ 196,model.layers.28.self_attn.q_proj,tx.28.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.53173828125,0.70361328125,0.81298828125,0.699951171875,0.0,0.7421875,0.984375,0.699951171875,0.125,0.6875,1.0
199
+ 197,model.layers.28.self_attn.k_proj,tx.28.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.537109375,0.70361328125,0.818359375,0.699951171875,0.0,0.75,0.96875,0.699951171875,0.1875,0.6875,1.0
200
+ 198,model.layers.28.self_attn.v_proj,tx.28.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.556640625,0.70166015625,0.822265625,0.699951171875,0.0234375,0.734375,1.0,0.699951171875,0.0625,0.6875,1.0
201
+ 199,model.layers.28.self_attn.o_proj,tx.28.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.30224609375,0.728515625,1.0,0.699951171875,0.0390625,0.734375,1.0,0.699951171875,0.0,0.75,1.0
202
+ 200,model.layers.28.mlp.gate_proj,tx.28.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5693359375,0.70263671875,0.77978515625,0.6999512314796448,0.0234375,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
203
+ 201,model.layers.28.mlp.up_proj,tx.28.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5693359375,0.70263671875,0.7822265625,0.6999512314796448,0.0546875,0.7265625,0.9453125,0.6999512314796448,0.0625,0.6875,1.0
204
+ 202,model.layers.28.mlp.down_proj,tx.28.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.474609375,0.703125,0.8681640625,0.6999861001968384,0.0078125,0.7578125,1.0,0.6999861001968384,0.125,0.6875,1.0
205
+ 203,model.layers.29.self_attn.q_proj,tx.29.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.54345703125,0.701171875,0.8125,0.699951171875,0.0,0.7421875,1.0,0.699951171875,0.125,0.6875,1.0
206
+ 204,model.layers.29.self_attn.k_proj,tx.29.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.56298828125,0.70166015625,0.79931640625,0.699951171875,0.0,0.7421875,1.0,0.699951171875,0.1875,0.6875,1.0
207
+ 205,model.layers.29.self_attn.v_proj,tx.29.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.55615234375,0.70166015625,0.8017578125,0.699951171875,0.015625,0.734375,1.0,0.699951171875,0.1875,0.6875,1.0
208
+ 206,model.layers.29.self_attn.o_proj,tx.29.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.17431640625,0.73291015625,1.0,0.699951171875,0.015625,0.7734375,1.0,0.699951171875,0.0,0.75,1.0
209
+ 207,model.layers.29.mlp.gate_proj,tx.29.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.5732421875,0.70166015625,0.79296875,0.6999512314796448,0.03125,0.734375,0.9609375,0.6999512314796448,0.125,0.6875,1.0
210
+ 208,model.layers.29.mlp.up_proj,tx.29.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.57958984375,0.70166015625,0.7861328125,0.6999512314796448,0.0390625,0.7265625,0.953125,0.6999512314796448,0.125,0.6875,1.0
211
+ 209,model.layers.29.mlp.down_proj,tx.29.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.43017578125,0.70458984375,0.86669921875,0.6999861001968384,0.0,0.7734375,1.0,0.6999861001968384,0.125,0.6875,1.0
212
+ 210,model.layers.30.self_attn.q_proj,tx.30.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.541015625,0.70166015625,0.81982421875,0.699951171875,0.0,0.75,0.984375,0.699951171875,0.0625,0.6875,1.0
213
+ 211,model.layers.30.self_attn.k_proj,tx.30.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.54541015625,0.701171875,0.82763671875,0.699951171875,0.0,0.75,0.984375,0.699951171875,0.125,0.6875,1.0
214
+ 212,model.layers.30.self_attn.v_proj,tx.30.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5244140625,0.70068359375,0.818359375,0.699951171875,0.03125,0.734375,1.0,0.699951171875,0.1875,0.6875,1.0
215
+ 213,model.layers.30.self_attn.o_proj,tx.30.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.31591796875,0.6845703125,1.0,0.699951171875,0.0625,0.703125,1.0,0.699951171875,0.0,0.6875,1.0
216
+ 214,model.layers.30.mlp.gate_proj,tx.30.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59228515625,0.69921875,0.79638671875,0.6999512314796448,0.0546875,0.7265625,1.0,0.6999512314796448,0.125,0.6875,1.0
217
+ 215,model.layers.30.mlp.up_proj,tx.30.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59228515625,0.69921875,0.7958984375,0.6999512314796448,0.0625,0.7265625,0.984375,0.6999512314796448,0.125,0.6875,1.0
218
+ 216,model.layers.30.mlp.down_proj,tx.30.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.47607421875,0.703125,0.87646484375,0.6999861001968384,0.0,0.796875,1.0,0.6999861001968384,0.0625,0.6875,1.0
219
+ 217,model.layers.31.self_attn.q_proj,tx.31.attn.q,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.54296875,0.701171875,0.82177734375,0.699951171875,0.0,0.7421875,1.0,0.699951171875,0.1875,0.6875,1.0
220
+ 218,model.layers.31.self_attn.k_proj,tx.31.attn.k,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.5185546875,0.70068359375,0.83935546875,0.699951171875,0.0078125,0.7421875,1.0,0.699951171875,0.125,0.6875,1.0
221
+ 219,model.layers.31.self_attn.v_proj,tx.31.attn.v,Linear,weight,"[1024, 4096]",4194304,1258496,0.699951171875,"(128, 16)",8 x 256,2048,0.699951171875,0.515625,0.701171875,0.82421875,0.699951171875,0.015625,0.734375,1.0,0.699951171875,0.125,0.6875,1.0
222
+ 220,model.layers.31.self_attn.o_proj,tx.31.attn.o,Linear,weight,"[4096, 4096]",16777216,5033984,0.699951171875,"(128, 16)",32 x 256,8192,0.699951171875,0.17138671875,0.7275390625,1.0,0.699951171875,0.0078125,0.7265625,1.0,0.699951171875,0.0,0.75,1.0
223
+ 221,model.layers.31.mlp.gate_proj,tx.31.mlp.gate,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.59521484375,0.70068359375,0.796875,0.6999512314796448,0.0703125,0.7265625,0.9765625,0.6999512314796448,0.125,0.6875,1.0
224
+ 222,model.layers.31.mlp.up_proj,tx.31.mlp.up,Linear,weight,"[14336, 4096]",58720256,17618944,0.699951171875,"(128, 16)",112 x 256,28672,0.6999512314796448,0.595703125,0.70068359375,0.79443359375,0.6999512314796448,0.0703125,0.7265625,0.984375,0.6999512314796448,0.125,0.6875,1.0
225
+ 223,model.layers.31.mlp.down_proj,tx.31.mlp.down,Linear,weight,"[4096, 14336]",58720256,17616896,0.6999860405921936,"(128, 16)",32 x 896,28672,0.6999861001968384,0.4501953125,0.69921875,0.92431640625,0.6999861001968384,0.0,0.828125,1.0,0.6999861001968384,0.125,0.6875,1.0
226
+ 224,lm_head,lm_head,Linear,weight,"[128256, 4096]",525336576,525336576,0.0,"(128, 16)",1002 x 256,256512,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0