Upload 29 files
Browse files- model_acts/.DS_Store +0 -0
- model_acts/tied_per_target_skip/.DS_Store +0 -0
- model_acts/tied_per_target_skip/layer_0.npz +3 -0
- model_acts/tied_per_target_skip/layer_1.npz +3 -0
- model_acts/tied_per_target_skip/layer_10.npz +3 -0
- model_acts/tied_per_target_skip/layer_11.npz +3 -0
- model_acts/tied_per_target_skip/layer_2.npz +3 -0
- model_acts/tied_per_target_skip/layer_3.npz +3 -0
- model_acts/tied_per_target_skip/layer_4.npz +3 -0
- model_acts/tied_per_target_skip/layer_5.npz +3 -0
- model_acts/tied_per_target_skip/layer_6.npz +3 -0
- model_acts/tied_per_target_skip/layer_7.npz +3 -0
- model_acts/tied_per_target_skip/layer_8.npz +3 -0
- model_acts/tied_per_target_skip/layer_9.npz +3 -0
- model_acts/tied_per_target_skip/summary.json +79 -0
- model_acts/untied_batchtopk/.DS_Store +0 -0
- model_acts/untied_batchtopk/layer_0.npz +3 -0
- model_acts/untied_batchtopk/layer_1.npz +3 -0
- model_acts/untied_batchtopk/layer_10.npz +3 -0
- model_acts/untied_batchtopk/layer_11.npz +3 -0
- model_acts/untied_batchtopk/layer_2.npz +3 -0
- model_acts/untied_batchtopk/layer_3.npz +3 -0
- model_acts/untied_batchtopk/layer_4.npz +3 -0
- model_acts/untied_batchtopk/layer_5.npz +3 -0
- model_acts/untied_batchtopk/layer_6.npz +3 -0
- model_acts/untied_batchtopk/layer_7.npz +3 -0
- model_acts/untied_batchtopk/layer_8.npz +3 -0
- model_acts/untied_batchtopk/layer_9.npz +3 -0
- model_acts/untied_batchtopk/summary.json +79 -0
model_acts/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
model_acts/tied_per_target_skip/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
model_acts/tied_per_target_skip/layer_0.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d3353a378c70bff4038350ca71a4c22ce889e2a95b8eb766d4955556a8b45e2
|
| 3 |
+
size 494337217
|
model_acts/tied_per_target_skip/layer_1.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1245d6596f7b7960be2e76b93763ca1fe5afbd99dbef625079f83680490a8d7
|
| 3 |
+
size 489508799
|
model_acts/tied_per_target_skip/layer_10.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a4156899cce8b78fcb0737f2a7ee40c8f8401e531b3e996f54421d0999119f8
|
| 3 |
+
size 489802191
|
model_acts/tied_per_target_skip/layer_11.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77b1202667b27619eb416881f74f5a39385e6eba0162ed2480a6b770018e318f
|
| 3 |
+
size 504547759
|
model_acts/tied_per_target_skip/layer_2.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfcf5fba291b0ec9255646f605e6a9735e4eb4a3a415326d74d2a2987e6549be
|
| 3 |
+
size 485271365
|
model_acts/tied_per_target_skip/layer_3.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba2f8ceee17e3a50882338106742a000244b803c0788e0596bc0fe159f0c4e5b
|
| 3 |
+
size 475999817
|
model_acts/tied_per_target_skip/layer_4.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b63d5dae34c085ece13d453b5feec845e3c882ce733b01d5387bab3bff1cf348
|
| 3 |
+
size 470775020
|
model_acts/tied_per_target_skip/layer_5.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:520a129c598423f412df36c060c58663d394ff3f05f672ca860ee779f4f507f2
|
| 3 |
+
size 468067069
|
model_acts/tied_per_target_skip/layer_6.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6badc8c86c62504583a2825cb344597ba678d97401d4553e17486b4bd07fad65
|
| 3 |
+
size 468245655
|
model_acts/tied_per_target_skip/layer_7.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb313a78d6b8b4ed3d54e9ed1ae03565959fc5750ec86dc12e73e7d0b1261c98
|
| 3 |
+
size 470199246
|
model_acts/tied_per_target_skip/layer_8.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fbb1dfae32e48f7b09c4249bd109c9a14e2d52f14b8a1cd146c829af572b8cd
|
| 3 |
+
size 473898103
|
model_acts/tied_per_target_skip/layer_9.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8919692d98e664eb3640ef781ff97db803849126f55fbc0908641a1ffedde740
|
| 3 |
+
size 480251933
|
model_acts/tied_per_target_skip/summary.json
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_path": "trained_gpt2_clts/tied-per-target-skip/tied_per_target_skip_global_batchtopk_jumprelu",
|
| 3 |
+
"config": {
|
| 4 |
+
"num_features": 122880,
|
| 5 |
+
"num_layers": 12,
|
| 6 |
+
"d_model": 768,
|
| 7 |
+
"model_name": null,
|
| 8 |
+
"normalization_method": "mean_std",
|
| 9 |
+
"activation_fn": "jumprelu",
|
| 10 |
+
"jumprelu_threshold": 0.0,
|
| 11 |
+
"batchtopk_k": null,
|
| 12 |
+
"batchtopk_straight_through": false,
|
| 13 |
+
"topk_k": null,
|
| 14 |
+
"topk_straight_through": true,
|
| 15 |
+
"topk_mode": "global",
|
| 16 |
+
"two_stage_batchtopk": false,
|
| 17 |
+
"two_stage_topk": false,
|
| 18 |
+
"clt_dtype": null,
|
| 19 |
+
"expected_input_dtype": null,
|
| 20 |
+
"mlp_input_template": null,
|
| 21 |
+
"mlp_output_template": null,
|
| 22 |
+
"tl_input_template": null,
|
| 23 |
+
"tl_output_template": null,
|
| 24 |
+
"decoder_tying": "per_target",
|
| 25 |
+
"enable_feature_offset": false,
|
| 26 |
+
"enable_feature_scale": false,
|
| 27 |
+
"skip_connection": true
|
| 28 |
+
},
|
| 29 |
+
"layer_stats": {
|
| 30 |
+
"0": {
|
| 31 |
+
"avg_l0": 4.6845703125,
|
| 32 |
+
"max_l0": 45.0
|
| 33 |
+
},
|
| 34 |
+
"1": {
|
| 35 |
+
"avg_l0": 3.1630859375,
|
| 36 |
+
"max_l0": 56.0
|
| 37 |
+
},
|
| 38 |
+
"2": {
|
| 39 |
+
"avg_l0": 6.1865234375,
|
| 40 |
+
"max_l0": 232.0
|
| 41 |
+
},
|
| 42 |
+
"3": {
|
| 43 |
+
"avg_l0": 4.947265625,
|
| 44 |
+
"max_l0": 26.0
|
| 45 |
+
},
|
| 46 |
+
"4": {
|
| 47 |
+
"avg_l0": 5.6328125,
|
| 48 |
+
"max_l0": 83.0
|
| 49 |
+
},
|
| 50 |
+
"5": {
|
| 51 |
+
"avg_l0": 4.9423828125,
|
| 52 |
+
"max_l0": 189.0
|
| 53 |
+
},
|
| 54 |
+
"6": {
|
| 55 |
+
"avg_l0": 8.0615234375,
|
| 56 |
+
"max_l0": 240.0
|
| 57 |
+
},
|
| 58 |
+
"7": {
|
| 59 |
+
"avg_l0": 9.8701171875,
|
| 60 |
+
"max_l0": 210.0
|
| 61 |
+
},
|
| 62 |
+
"8": {
|
| 63 |
+
"avg_l0": 12.326171875,
|
| 64 |
+
"max_l0": 332.0
|
| 65 |
+
},
|
| 66 |
+
"9": {
|
| 67 |
+
"avg_l0": 21.96875,
|
| 68 |
+
"max_l0": 624.0
|
| 69 |
+
},
|
| 70 |
+
"10": {
|
| 71 |
+
"avg_l0": 32.90234375,
|
| 72 |
+
"max_l0": 306.0
|
| 73 |
+
},
|
| 74 |
+
"11": {
|
| 75 |
+
"avg_l0": 44.5126953125,
|
| 76 |
+
"max_l0": 521.0
|
| 77 |
+
}
|
| 78 |
+
}
|
| 79 |
+
}
|
model_acts/untied_batchtopk/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
model_acts/untied_batchtopk/layer_0.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83389d695c2dd9d07d56ccb1cf2656b99c680c5604cd3f6f6e2079fa0d6085ee
|
| 3 |
+
size 137572646
|
model_acts/untied_batchtopk/layer_1.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77b138646d0718814a4c0d0e639053ff8a6a666142a35b823aaf4392f80768ce
|
| 3 |
+
size 138485586
|
model_acts/untied_batchtopk/layer_10.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c29582939fdb05468684438f909154c0d19da08d1a549a5878753dcabcfe7b7e
|
| 3 |
+
size 141088709
|
model_acts/untied_batchtopk/layer_11.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76f2c85ca2c72e0e2194c69aba50621b2b60e013e23cb05a95a8ef19cf6c42af
|
| 3 |
+
size 141985132
|
model_acts/untied_batchtopk/layer_2.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:adb5cde2c96825bde7926fe423605fd4eeb52060ea2e05e3855a5647d10b0dd0
|
| 3 |
+
size 141012590
|
model_acts/untied_batchtopk/layer_3.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60b3f8b07c9a6f03e9d2fb058be3c84fa87a3907ef7aa4dc5a2523ba62ba164d
|
| 3 |
+
size 139652495
|
model_acts/untied_batchtopk/layer_4.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f17cb00904be3442f3b74400efa26028db87f4686b27e946c919a57a7985160a
|
| 3 |
+
size 139469523
|
model_acts/untied_batchtopk/layer_5.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5e683c0acdd92f0ce536b9edb1f7c41a856f04c582b034a2fbbaa3f241d17c0
|
| 3 |
+
size 139326223
|
model_acts/untied_batchtopk/layer_6.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eff45531db388be36092bc8920527de785600ddb09bca1b3c03a6bbe45b8ea99
|
| 3 |
+
size 139405298
|
model_acts/untied_batchtopk/layer_7.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0c0ccde584199f0b5418513fb1979fb65941f85383c9404b0958f8938523877
|
| 3 |
+
size 139476702
|
model_acts/untied_batchtopk/layer_8.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be904e1b04517ac30ab45ced760c2ce0463b4c45275ce0beac179e8b08fcea4c
|
| 3 |
+
size 139567834
|
model_acts/untied_batchtopk/layer_9.npz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:457a914fa4690cbf4ba2741c5900f2b88bc813cf9c4e857617444e959ff77fce
|
| 3 |
+
size 139921003
|
model_acts/untied_batchtopk/summary.json
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_path": "trained_gpt2_clts/untied-batchtopk/untied_global_batchtopk_jumprelu",
|
| 3 |
+
"config": {
|
| 4 |
+
"num_features": 32768,
|
| 5 |
+
"num_layers": 12,
|
| 6 |
+
"d_model": 768,
|
| 7 |
+
"model_name": null,
|
| 8 |
+
"normalization_method": "mean_std",
|
| 9 |
+
"activation_fn": "jumprelu",
|
| 10 |
+
"jumprelu_threshold": 0.0,
|
| 11 |
+
"batchtopk_k": null,
|
| 12 |
+
"batchtopk_straight_through": false,
|
| 13 |
+
"topk_k": null,
|
| 14 |
+
"topk_straight_through": true,
|
| 15 |
+
"topk_mode": "global",
|
| 16 |
+
"two_stage_batchtopk": false,
|
| 17 |
+
"two_stage_topk": false,
|
| 18 |
+
"clt_dtype": null,
|
| 19 |
+
"expected_input_dtype": null,
|
| 20 |
+
"mlp_input_template": null,
|
| 21 |
+
"mlp_output_template": null,
|
| 22 |
+
"tl_input_template": null,
|
| 23 |
+
"tl_output_template": null,
|
| 24 |
+
"decoder_tying": "none",
|
| 25 |
+
"enable_feature_offset": false,
|
| 26 |
+
"enable_feature_scale": false,
|
| 27 |
+
"skip_connection": false
|
| 28 |
+
},
|
| 29 |
+
"layer_stats": {
|
| 30 |
+
"0": {
|
| 31 |
+
"avg_l0": 6.1376953125,
|
| 32 |
+
"max_l0": 36.0
|
| 33 |
+
},
|
| 34 |
+
"1": {
|
| 35 |
+
"avg_l0": 3.8759765625,
|
| 36 |
+
"max_l0": 51.0
|
| 37 |
+
},
|
| 38 |
+
"2": {
|
| 39 |
+
"avg_l0": 6.123046875,
|
| 40 |
+
"max_l0": 116.0
|
| 41 |
+
},
|
| 42 |
+
"3": {
|
| 43 |
+
"avg_l0": 5.4716796875,
|
| 44 |
+
"max_l0": 36.0
|
| 45 |
+
},
|
| 46 |
+
"4": {
|
| 47 |
+
"avg_l0": 7.1689453125,
|
| 48 |
+
"max_l0": 31.0
|
| 49 |
+
},
|
| 50 |
+
"5": {
|
| 51 |
+
"avg_l0": 8.97265625,
|
| 52 |
+
"max_l0": 52.0
|
| 53 |
+
},
|
| 54 |
+
"6": {
|
| 55 |
+
"avg_l0": 14.6796875,
|
| 56 |
+
"max_l0": 147.0
|
| 57 |
+
},
|
| 58 |
+
"7": {
|
| 59 |
+
"avg_l0": 18.650390625,
|
| 60 |
+
"max_l0": 116.0
|
| 61 |
+
},
|
| 62 |
+
"8": {
|
| 63 |
+
"avg_l0": 24.76171875,
|
| 64 |
+
"max_l0": 130.0
|
| 65 |
+
},
|
| 66 |
+
"9": {
|
| 67 |
+
"avg_l0": 23.3310546875,
|
| 68 |
+
"max_l0": 81.0
|
| 69 |
+
},
|
| 70 |
+
"10": {
|
| 71 |
+
"avg_l0": 28.69140625,
|
| 72 |
+
"max_l0": 130.0
|
| 73 |
+
},
|
| 74 |
+
"11": {
|
| 75 |
+
"avg_l0": 40.8994140625,
|
| 76 |
+
"max_l0": 330.0
|
| 77 |
+
}
|
| 78 |
+
}
|
| 79 |
+
}
|