adamkarvonen
commited on
Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_10/ae.pt +3 -0
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_10/config.json +32 -0
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_10/eval_results.json +1 -0
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_11/ae.pt +3 -0
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_11/config.json +32 -0
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_11/eval_results.json +1 -0
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_6/ae.pt +3 -0
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_6/config.json +32 -0
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_6/eval_results.json +1 -0
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_7/ae.pt +3 -0
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_7/config.json +32 -0
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_7/eval_results.json +1 -0
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_8/ae.pt +3 -0
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_8/config.json +32 -0
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_8/eval_results.json +1 -0
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_9/ae.pt +3 -0
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_9/config.json +32 -0
- BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_9/eval_results.json +1 -0
- GatedSAETrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_0/ae.pt +3 -0
- GatedSAETrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_1/ae.pt +3 -0
- GatedSAETrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_2/ae.pt +3 -0
- GatedSAETrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_3/ae.pt +3 -0
- GatedSAETrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_4/ae.pt +3 -0
- GatedSAETrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_5/ae.pt +3 -0
- JumpReluTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_0/ae.pt +3 -0
- JumpReluTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_1/ae.pt +3 -0
- JumpReluTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_2/ae.pt +3 -0
- JumpReluTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_3/ae.pt +3 -0
- JumpReluTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_4/ae.pt +3 -0
- JumpReluTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_5/ae.pt +3 -0
- PAnnealTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_0/ae.pt +3 -0
- PAnnealTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_1/ae.pt +3 -0
- PAnnealTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_2/ae.pt +3 -0
- PAnnealTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_3/ae.pt +3 -0
- PAnnealTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_4/ae.pt +3 -0
- PAnnealTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_5/ae.pt +3 -0
- StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_0/ae.pt +3 -0
- StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_1/ae.pt +3 -0
- StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_2/ae.pt +3 -0
- StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_3/ae.pt +3 -0
- StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_4/ae.pt +3 -0
- StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_5/ae.pt +3 -0
- StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12_checkpoints/trainer_0_step_0/ae.pt +3 -0
- StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12_checkpoints/trainer_0_step_122/ae.pt +3 -0
- StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12_checkpoints/trainer_0_step_244/ae.pt +3 -0
- StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12_checkpoints/trainer_0_step_2441/ae.pt +3 -0
- StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12_checkpoints/trainer_0_step_24414/ae.pt +3 -0
- StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12_checkpoints/trainer_0_step_386/ae.pt +3 -0
- StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12_checkpoints/trainer_0_step_772/ae.pt +3 -0
- StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12_checkpoints/trainer_0_step_7720/ae.pt +3 -0
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_10/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7e56dca54623be6b87ef79fa7675fa038455848308fa0bf7b3615c24f868732
|
3 |
+
size 75525142
|
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_10/config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "BatchTopKTrainer",
|
4 |
+
"dict_class": "BatchTopKSAE",
|
5 |
+
"lr": 0.0003,
|
6 |
+
"steps": 244140,
|
7 |
+
"auxk_alpha": 0.03125,
|
8 |
+
"warmup_steps": 1000,
|
9 |
+
"decay_start": 195312,
|
10 |
+
"threshold_beta": 0.999,
|
11 |
+
"threshold_start_step": 1000,
|
12 |
+
"top_k_aux": 1152,
|
13 |
+
"seed": 0,
|
14 |
+
"activation_dim": 2304,
|
15 |
+
"dict_size": 4096,
|
16 |
+
"k": 320,
|
17 |
+
"device": "cuda:0",
|
18 |
+
"layer": 12,
|
19 |
+
"lm_name": "google/gemma-2-2b",
|
20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_10",
|
21 |
+
"submodule_name": "resid_post_layer_12"
|
22 |
+
},
|
23 |
+
"buffer": {
|
24 |
+
"d_submodule": 2304,
|
25 |
+
"io": "out",
|
26 |
+
"n_ctxs": 244,
|
27 |
+
"ctx_len": 1024,
|
28 |
+
"refresh_batch_size": 4,
|
29 |
+
"out_batch_size": 2048,
|
30 |
+
"device": "cuda:0"
|
31 |
+
}
|
32 |
+
}
|
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_10/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 48.85125, "l1_loss": 1669.4, "l0": 321.7892822265625, "frac_variance_explained": 0.8830859375, "cossim": 0.93751953125, "l2_ratio": 0.9375, "relative_reconstruction_bias": 1.0002734375, "loss_original": 2.152919921875, "loss_reconstructed": 2.195830078125, "loss_zero": 12.4375, "frac_recovered": 0.99533203125, "frac_alive": 0.999755859375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_11/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:788072b25d7b0eee3e51ea5615ef25aaca4482e9af5f3ea33b62e4e743960335
|
3 |
+
size 75525142
|
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_11/config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "BatchTopKTrainer",
|
4 |
+
"dict_class": "BatchTopKSAE",
|
5 |
+
"lr": 0.0003,
|
6 |
+
"steps": 244140,
|
7 |
+
"auxk_alpha": 0.03125,
|
8 |
+
"warmup_steps": 1000,
|
9 |
+
"decay_start": 195312,
|
10 |
+
"threshold_beta": 0.999,
|
11 |
+
"threshold_start_step": 1000,
|
12 |
+
"top_k_aux": 1152,
|
13 |
+
"seed": 0,
|
14 |
+
"activation_dim": 2304,
|
15 |
+
"dict_size": 4096,
|
16 |
+
"k": 640,
|
17 |
+
"device": "cuda:0",
|
18 |
+
"layer": 12,
|
19 |
+
"lm_name": "google/gemma-2-2b",
|
20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_11",
|
21 |
+
"submodule_name": "resid_post_layer_12"
|
22 |
+
},
|
23 |
+
"buffer": {
|
24 |
+
"d_submodule": 2304,
|
25 |
+
"io": "out",
|
26 |
+
"n_ctxs": 244,
|
27 |
+
"ctx_len": 1024,
|
28 |
+
"refresh_batch_size": 4,
|
29 |
+
"out_batch_size": 2048,
|
30 |
+
"device": "cuda:0"
|
31 |
+
}
|
32 |
+
}
|
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_11/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 38.55, "l1_loss": 2861.84, "l0": 643.082568359375, "frac_variance_explained": 0.9279296875, "cossim": 0.9608984375, "l2_ratio": 0.96087890625, "relative_reconstruction_bias": 1.00046875, "loss_original": 2.152919921875, "loss_reconstructed": 2.16953125, "loss_zero": 12.4375, "frac_recovered": 0.99826171875, "frac_alive": 0.971435546875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_6/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd123d1f222b07a7153d5fc3871b40c79944d6a21592a5aaf18cdf6448ea082f
|
3 |
+
size 75525142
|
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_6/config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "BatchTopKTrainer",
|
4 |
+
"dict_class": "BatchTopKSAE",
|
5 |
+
"lr": 0.0003,
|
6 |
+
"steps": 244140,
|
7 |
+
"auxk_alpha": 0.03125,
|
8 |
+
"warmup_steps": 1000,
|
9 |
+
"decay_start": 195312,
|
10 |
+
"threshold_beta": 0.999,
|
11 |
+
"threshold_start_step": 1000,
|
12 |
+
"top_k_aux": 1152,
|
13 |
+
"seed": 0,
|
14 |
+
"activation_dim": 2304,
|
15 |
+
"dict_size": 4096,
|
16 |
+
"k": 20,
|
17 |
+
"device": "cuda:0",
|
18 |
+
"layer": 12,
|
19 |
+
"lm_name": "google/gemma-2-2b",
|
20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_6",
|
21 |
+
"submodule_name": "resid_post_layer_12"
|
22 |
+
},
|
23 |
+
"buffer": {
|
24 |
+
"d_submodule": 2304,
|
25 |
+
"io": "out",
|
26 |
+
"n_ctxs": 244,
|
27 |
+
"ctx_len": 1024,
|
28 |
+
"refresh_batch_size": 4,
|
29 |
+
"out_batch_size": 2048,
|
30 |
+
"device": "cuda:0"
|
31 |
+
}
|
32 |
+
}
|
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_6/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 71.1775, "l1_loss": 287.09, "l0": 19.922880859375, "frac_variance_explained": 0.74666015625, "cossim": 0.865703125, "l2_ratio": 0.8649609375, "relative_reconstruction_bias": 1.0004296875, "loss_original": 2.152919921875, "loss_reconstructed": 2.55654296875, "loss_zero": 12.4375, "frac_recovered": 0.9603515625, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_7/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4040775dd6a9059c152c05d266fe006157b31297152227a072b4d51408c53809
|
3 |
+
size 75525142
|
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_7/config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "BatchTopKTrainer",
|
4 |
+
"dict_class": "BatchTopKSAE",
|
5 |
+
"lr": 0.0003,
|
6 |
+
"steps": 244140,
|
7 |
+
"auxk_alpha": 0.03125,
|
8 |
+
"warmup_steps": 1000,
|
9 |
+
"decay_start": 195312,
|
10 |
+
"threshold_beta": 0.999,
|
11 |
+
"threshold_start_step": 1000,
|
12 |
+
"top_k_aux": 1152,
|
13 |
+
"seed": 0,
|
14 |
+
"activation_dim": 2304,
|
15 |
+
"dict_size": 4096,
|
16 |
+
"k": 40,
|
17 |
+
"device": "cuda:0",
|
18 |
+
"layer": 12,
|
19 |
+
"lm_name": "google/gemma-2-2b",
|
20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_7",
|
21 |
+
"submodule_name": "resid_post_layer_12"
|
22 |
+
},
|
23 |
+
"buffer": {
|
24 |
+
"d_submodule": 2304,
|
25 |
+
"io": "out",
|
26 |
+
"n_ctxs": 244,
|
27 |
+
"ctx_len": 1024,
|
28 |
+
"refresh_batch_size": 4,
|
29 |
+
"out_batch_size": 2048,
|
30 |
+
"device": "cuda:0"
|
31 |
+
}
|
32 |
+
}
|
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_7/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 66.54, "l1_loss": 402.34, "l0": 39.944716796875, "frac_variance_explained": 0.77919921875, "cossim": 0.88330078125, "l2_ratio": 0.88310546875, "relative_reconstruction_bias": 1.00033203125, "loss_original": 2.152919921875, "loss_reconstructed": 2.396982421875, "loss_zero": 12.4375, "frac_recovered": 0.97587890625, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_8/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e609d9ad8b04890412881b51171fd7f30e6f7aeb781bf68d1037ace553e8ff3
|
3 |
+
size 75525142
|
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_8/config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "BatchTopKTrainer",
|
4 |
+
"dict_class": "BatchTopKSAE",
|
5 |
+
"lr": 0.0003,
|
6 |
+
"steps": 244140,
|
7 |
+
"auxk_alpha": 0.03125,
|
8 |
+
"warmup_steps": 1000,
|
9 |
+
"decay_start": 195312,
|
10 |
+
"threshold_beta": 0.999,
|
11 |
+
"threshold_start_step": 1000,
|
12 |
+
"top_k_aux": 1152,
|
13 |
+
"seed": 0,
|
14 |
+
"activation_dim": 2304,
|
15 |
+
"dict_size": 4096,
|
16 |
+
"k": 80,
|
17 |
+
"device": "cuda:0",
|
18 |
+
"layer": 12,
|
19 |
+
"lm_name": "google/gemma-2-2b",
|
20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_8",
|
21 |
+
"submodule_name": "resid_post_layer_12"
|
22 |
+
},
|
23 |
+
"buffer": {
|
24 |
+
"d_submodule": 2304,
|
25 |
+
"io": "out",
|
26 |
+
"n_ctxs": 244,
|
27 |
+
"ctx_len": 1024,
|
28 |
+
"refresh_batch_size": 4,
|
29 |
+
"out_batch_size": 2048,
|
30 |
+
"device": "cuda:0"
|
31 |
+
}
|
32 |
+
}
|
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_8/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 61.62375, "l1_loss": 577.64, "l0": 79.8656640625, "frac_variance_explained": 0.81333984375, "cossim": 0.90095703125, "l2_ratio": 0.9005859375, "relative_reconstruction_bias": 1.0005859375, "loss_original": 2.152919921875, "loss_reconstructed": 2.291015625, "loss_zero": 12.4375, "frac_recovered": 0.98591796875, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_9/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a241b985f09e2d3f5541cb3d127950ccc9d6237f533c9e8d107799af1259afa
|
3 |
+
size 75525142
|
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_9/config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "BatchTopKTrainer",
|
4 |
+
"dict_class": "BatchTopKSAE",
|
5 |
+
"lr": 0.0003,
|
6 |
+
"steps": 244140,
|
7 |
+
"auxk_alpha": 0.03125,
|
8 |
+
"warmup_steps": 1000,
|
9 |
+
"decay_start": 195312,
|
10 |
+
"threshold_beta": 0.999,
|
11 |
+
"threshold_start_step": 1000,
|
12 |
+
"top_k_aux": 1152,
|
13 |
+
"seed": 0,
|
14 |
+
"activation_dim": 2304,
|
15 |
+
"dict_size": 4096,
|
16 |
+
"k": 160,
|
17 |
+
"device": "cuda:0",
|
18 |
+
"layer": 12,
|
19 |
+
"lm_name": "google/gemma-2-2b",
|
20 |
+
"wandb_name": "BatchTopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_9",
|
21 |
+
"submodule_name": "resid_post_layer_12"
|
22 |
+
},
|
23 |
+
"buffer": {
|
24 |
+
"d_submodule": 2304,
|
25 |
+
"io": "out",
|
26 |
+
"n_ctxs": 244,
|
27 |
+
"ctx_len": 1024,
|
28 |
+
"refresh_batch_size": 4,
|
29 |
+
"out_batch_size": 2048,
|
30 |
+
"device": "cuda:0"
|
31 |
+
}
|
32 |
+
}
|
BatchTopKTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_9/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 56.02125, "l1_loss": 883.4, "l0": 159.4246337890625, "frac_variance_explained": 0.847890625, "cossim": 0.91810546875, "l2_ratio": 0.91796875, "relative_reconstruction_bias": 1.00001953125, "loss_original": 2.152919921875, "loss_reconstructed": 2.2319140625, "loss_zero": 12.4375, "frac_recovered": 0.99177734375, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
GatedSAETrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba233fc8ba9e4e02fdda1fdd6409da877dfccaa586204e8838f16ab284bbf405
|
3 |
+
size 75557910
|
GatedSAETrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_1/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:887ed43cda633a2935ae7e8a93690b024b4db61eba86cd31f4860177fb7d74bf
|
3 |
+
size 75557910
|
GatedSAETrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_2/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3a1ccf2baa4e2b0beb1067dac78261662b1cae85c812b93e6ca52609a3f8c46
|
3 |
+
size 75557910
|
GatedSAETrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_3/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf6049a877d63d090059682d7f31d4f9abeb567e8363adb2c8f1c0947b0fcdc4
|
3 |
+
size 75557910
|
GatedSAETrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_4/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09f96ba30e4bf8f6b590c69e87dd7b8ae0d7be6e280eee717a9bdaca691fa3ec
|
3 |
+
size 75557910
|
GatedSAETrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_5/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8f6ce284848acb59d9229414d93493a81ce64222fd7ecb3d0ca65c5c7986ba6
|
3 |
+
size 75557910
|
JumpReluTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a17abc5a1e2602b158758d8fda172bb308618a2b29a82b9e429f41306532e81
|
3 |
+
size 75541279
|
JumpReluTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_1/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7bda0e5d0830cd179667d094b99d53dd44c49830f942806414a37426f0df7622
|
3 |
+
size 75541279
|
JumpReluTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_2/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e79b0318df990fbc84587edd10612be6b8cbb84638e3bc4cb5780c31833ea8ea
|
3 |
+
size 75541279
|
JumpReluTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_3/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:004eb54008f8506586a93893c6338e054e297ec3c2ff20abecb80cb98fd5a164
|
3 |
+
size 75541279
|
JumpReluTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_4/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:189175ea1c34fe55de1b96480590e8b3dfb79963ec561f961efb0b5892774997
|
3 |
+
size 75541279
|
JumpReluTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_5/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:522fd8e36dab064fbfa848f4e0d36ad7ee463b95382eba68282a851b80211ff2
|
3 |
+
size 75541279
|
PAnnealTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2f39f941791308e5a38407cc0e6dd9b0d4cb9e51a234a789db6fd43a43f2e7c
|
3 |
+
size 75524776
|
PAnnealTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_1/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc236b61b19d327d6f8afd38662f28d89a459dfb6754f22f62080767b57f61ad
|
3 |
+
size 75524776
|
PAnnealTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_2/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2f120429a8042890ad1810ef5a86d8500c3ba738959a200aeda13910a074d12
|
3 |
+
size 75524776
|
PAnnealTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_3/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69c315b36b04096117a57d910139c9f64be2e28ee15a62be163c7b897bf6768b
|
3 |
+
size 75524776
|
PAnnealTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_4/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e97e13f77ae26ca96a6fbf2ded67661da894acaff5fd9b81869e740fe3d3b10b
|
3 |
+
size 75524776
|
PAnnealTrainer_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_5/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2347792806f7bdb1be9925e34fc43abbeb1a3de7bc2b3451fc8fea16db797af5
|
3 |
+
size 75524776
|
StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7cfc0683229ea8a10530761fe8924042b2e169afda357e89cf195e5e4c230813
|
3 |
+
size 75524776
|
StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_1/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4410a2aefd77dcf7c9e3ccb2c3860712fbc5f4c05c0ab0038f3e0ffd01e7161
|
3 |
+
size 75524776
|
StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_2/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76009c54e89f0e0cd6a8cdd3dbb96dbefdf2203d67c1493260e812c6ae0c8687
|
3 |
+
size 75524776
|
StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_3/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2f89b968bf24a929123d9eb876bc683d5e127465008912eec2eaca26f3b75c1
|
3 |
+
size 75524776
|
StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_4/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c58ab3f9c6023a99832bd9d44e1af6e2975d16014f6b9c1e3d24b7ee7132036
|
3 |
+
size 75524776
|
StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12/trainer_5/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ed200814789373a94bcce1b3d95b8d98f79041d726910f972669b0b46201b50
|
3 |
+
size 75524776
|
StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12_checkpoints/trainer_0_step_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bbd6a47e1b24bf9267b3a3e38b8055ea373a50f45e1ef446742faf65987454f0
|
3 |
+
size 75524792
|
StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12_checkpoints/trainer_0_step_122/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcab681719adf3e2bd225cc03a7ef8fde7053d447eda80e7807396f4b24b1d5b
|
3 |
+
size 75524808
|
StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12_checkpoints/trainer_0_step_244/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57f38fd757927e096ab982173169215966c60fe84bba113a877e6696c2bd4e1d
|
3 |
+
size 75524808
|
StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12_checkpoints/trainer_0_step_2441/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c634d8a50c960690b2191e8a161322b8561dc168a60f8afd34b2f3c7449f8dd6
|
3 |
+
size 75524880
|
StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12_checkpoints/trainer_0_step_24414/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:897dad9d76414b44a70f7fdabca7e56036956fdac36ffe4f1208ccb79edec791
|
3 |
+
size 75525080
|
StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12_checkpoints/trainer_0_step_386/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42ba721d3e5bf5d480d3455043773b67f890ccc6e194614bfce824a5f8c39b43
|
3 |
+
size 75524808
|
StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12_checkpoints/trainer_0_step_772/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6cc92e67df96094a7037230ef7427e8d077972707e7060b396e6585e558e9bea
|
3 |
+
size 75524808
|
StandardTrainerAprilUpdate_google_gemma-2-2b_ctx1024_0108/resid_post_layer_12_checkpoints/trainer_0_step_7720/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1686c1e40fd958abd1357111c7ba1f158cc0e21fbe0c79c6ada356d697cb2196
|
3 |
+
size 75524880
|