marco-molinari's picture
Upload folder using huggingface_hub
a3dc3f3 verified
{
"sae": {
"expansion_factor": 16,
"normalize_decoder": true,
"num_latents": 0,
"k": 128,
"multi_topk": false,
"jumprelu": false,
"jumprelu_init_threshold": 0.001,
"jumprelu_bandwidth": 0.001,
"jumprelu_target_l0": null,
"jumprelu_per_layer_l0": false,
"init_enc_as_dec_transpose": true,
"init_b_dec_as_zeros": false
},
"batch_size": 8,
"max_seq_len": 1024,
"num_training_tokens": 1000000000,
"grad_acc_steps": 1,
"micro_acc_steps": 1,
"adam_8bit": false,
"adam_epsilon": 1e-08,
"adam_betas": [
0.9,
0.999
],
"lr": null,
"lr_scheduler_name": "constant",
"lr_warmup_steps": 0.0,
"l1_coefficient": 0.0,
"l1_warmup_steps": 0.0,
"use_l2_loss": false,
"auxk_alpha": 0.03125,
"dead_feature_threshold": 10000000,
"hookpoints": [
"layers.0",
"layers.1",
"layers.2",
"layers.3",
"layers.4",
"layers.5",
"layers.6",
"layers.7",
"layers.8",
"layers.9",
"layers.10",
"layers.11",
"layers.12",
"layers.13",
"layers.14",
"layers.15",
"layers.16",
"layers.17",
"layers.18",
"layers.19",
"layers.20",
"layers.21",
"layers.22",
"layers.23"
],
"layers": [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23
],
"layer_stride": 1,
"distribute_modules": true,
"save_every": 100000,
"normalize_activations": 1.0,
"num_norm_estimation_tokens": 5000000,
"clusters": {
"k1-c0": [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22
],
"k2-c0": [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15
],
"k2-c1": [
16,
17,
18,
19,
20,
21,
22
],
"k3-c0": [
7,
8,
9,
10,
11,
12,
13,
14,
15
],
"k3-c1": [
0,
1,
2,
3,
4,
5,
6
],
"k4-c0": [
11,
12,
13,
14,
15
],
"k4-c1": [
7,
8,
9,
10
],
"k5-c0": [
19,
20,
21,
22
],
"k5-c1": [
16,
17,
18
],
"k6-c0": [
3,
4,
5,
6
],
"k6-c1": [
0,
1,
2
],
"k7-c0": [
13,
14,
15
],
"k7-c1": [
11,
12
],
"k8-c0": [
3,
4
],
"k8-c1": [
5,
6
],
"k9-c0": [
21,
22
],
"k9-c1": [
19,
20
],
"k10-c0": [
7,
8
],
"k10-c1": [
9,
10
],
"k11-c0": [
17,
18
],
"k12-c0": [
14,
15
],
"k13-c0": [
1,
2
],
"layers.0": [
0
],
"layers.1": [
1
],
"layers.2": [
2
],
"layers.3": [
3
],
"layers.4": [
4
],
"layers.5": [
5
],
"layers.6": [
6
],
"layers.7": [
7
],
"layers.8": [
8
],
"layers.9": [
9
],
"layers.10": [
10
],
"layers.11": [
11
],
"layers.12": [
12
],
"layers.13": [
13
],
"layers.14": [
14
],
"layers.15": [
15
],
"layers.16": [
16
],
"layers.17": [
17
],
"layers.18": [
18
],
"layers.19": [
19
],
"layers.20": [
20
],
"layers.21": [
21
],
"layers.22": [
22
],
"layers.23": [
23
]
},
"cluster_hookpoints": {
"k1-c0": [
"layers.0",
"layers.1",
"layers.2",
"layers.3",
"layers.4",
"layers.5",
"layers.6",
"layers.7",
"layers.8",
"layers.9",
"layers.10",
"layers.11",
"layers.12",
"layers.13",
"layers.14",
"layers.15",
"layers.16",
"layers.17",
"layers.18",
"layers.19",
"layers.20",
"layers.21",
"layers.22"
],
"k2-c0": [
"layers.0",
"layers.1",
"layers.2",
"layers.3",
"layers.4",
"layers.5",
"layers.6",
"layers.7",
"layers.8",
"layers.9",
"layers.10",
"layers.11",
"layers.12",
"layers.13",
"layers.14",
"layers.15"
],
"k2-c1": [
"layers.16",
"layers.17",
"layers.18",
"layers.19",
"layers.20",
"layers.21",
"layers.22"
],
"k3-c0": [
"layers.7",
"layers.8",
"layers.9",
"layers.10",
"layers.11",
"layers.12",
"layers.13",
"layers.14",
"layers.15"
],
"k3-c1": [
"layers.0",
"layers.1",
"layers.2",
"layers.3",
"layers.4",
"layers.5",
"layers.6"
],
"k4-c0": [
"layers.11",
"layers.12",
"layers.13",
"layers.14",
"layers.15"
],
"k4-c1": [
"layers.7",
"layers.8",
"layers.9",
"layers.10"
],
"k5-c0": [
"layers.19",
"layers.20",
"layers.21",
"layers.22"
],
"k5-c1": [
"layers.16",
"layers.17",
"layers.18"
],
"k6-c0": [
"layers.3",
"layers.4",
"layers.5",
"layers.6"
],
"k6-c1": [
"layers.0",
"layers.1",
"layers.2"
],
"k7-c0": [
"layers.13",
"layers.14",
"layers.15"
],
"k7-c1": [
"layers.11",
"layers.12"
],
"k8-c0": [
"layers.3",
"layers.4"
],
"k8-c1": [
"layers.5",
"layers.6"
],
"k9-c0": [
"layers.21",
"layers.22"
],
"k9-c1": [
"layers.19",
"layers.20"
],
"k10-c0": [
"layers.7",
"layers.8"
],
"k10-c1": [
"layers.9",
"layers.10"
],
"k11-c0": [
"layers.17",
"layers.18"
],
"k12-c0": [
"layers.14",
"layers.15"
],
"k13-c0": [
"layers.1",
"layers.2"
],
"layers.0": [
"layers.0"
],
"layers.1": [
"layers.1"
],
"layers.2": [
"layers.2"
],
"layers.3": [
"layers.3"
],
"layers.4": [
"layers.4"
],
"layers.5": [
"layers.5"
],
"layers.6": [
"layers.6"
],
"layers.7": [
"layers.7"
],
"layers.8": [
"layers.8"
],
"layers.9": [
"layers.9"
],
"layers.10": [
"layers.10"
],
"layers.11": [
"layers.11"
],
"layers.12": [
"layers.12"
],
"layers.13": [
"layers.13"
],
"layers.14": [
"layers.14"
],
"layers.15": [
"layers.15"
],
"layers.16": [
"layers.16"
],
"layers.17": [
"layers.17"
],
"layers.18": [
"layers.18"
],
"layers.19": [
"layers.19"
],
"layers.20": [
"layers.20"
],
"layers.21": [
"layers.21"
],
"layers.22": [
"layers.22"
],
"layers.23": [
"layers.23"
]
},
"hook": null,
"keep_last_n_checkpoints": 4,
"resume_from": null,
"log_to_wandb": true,
"run_name": "checkpoints-clusters/pythia-410m-topk",
"wandb_log_frequency": 1
}