tim-lawson commited on
Commit
8439b50
·
verified ·
1 Parent(s): 395a1b6

Push model using huggingface_hub.

Browse files
Files changed (3) hide show
  1. README.md +5 -18
  2. config.json +4 -3
  3. model.safetensors +1 -1
README.md CHANGED
@@ -3,23 +3,10 @@ language: en
3
  library_name: mlsae
4
  license: mit
5
  tags:
6
- - model_hub_mixin
7
- - pytorch_model_hub_mixin
8
- datasets:
9
- - monology/pile-uncopyrighted
10
  ---
11
 
12
- # mlsae-pythia-160m-deduped-x64-k32-tfm
13
-
14
- A Multi-Layer Sparse Autoencoder (MLSAE) trained on the residual stream
15
- activation vectors from every layer of
16
- [EleutherAI/pythia-160m-deduped](https://huggingface.co/EleutherAI/pythia-160m-deduped)
17
- with an expansion factor of 64 and k = 32, over 1 billion tokens from
18
- [monology/pile-uncopyrighted](https://huggingface.co/datasets/monology/pile-uncopyrighted).
19
- This model includes the underlying transformer.
20
-
21
- For more details, see:
22
-
23
- - Paper: <https://arxiv.org/abs/2409.04185>
24
- - GitHub repository: <https://github.com/tim-lawson/mlsae>
25
- - Weights & Biases project: <https://wandb.ai/timlawson-/mlsae>
 
3
  library_name: mlsae
4
  license: mit
5
  tags:
6
+ - model_hub_mixin
7
+ - pytorch_model_hub_mixin
 
 
8
  ---
9
 
10
+ This model has been pushed to the Hub using the [PytorchModelHubMixin](https://huggingface.co/docs/huggingface_hub/package_reference/mixins#huggingface_hub.PyTorchModelHubMixin) integration:
11
+ - Library: https://github.com/tim-lawson/mlsae
12
+ - Docs: [More Information Needed]
 
 
 
 
 
 
 
 
 
 
 
config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "accumulate_grad_batches": 64,
3
- "autoencoder": null,
4
  "auxk": 256,
5
  "auxk_coef": 0.03125,
6
  "batch_size": 1,
@@ -9,11 +8,13 @@
9
  "dead_tokens_threshold": 10000000,
10
  "expansion_factor": 64,
11
  "k": 32,
12
- "layers": null,
 
 
13
  "lr": 0.0001,
14
  "max_length": 2048,
15
  "model_name": "EleutherAI/pythia-160m-deduped",
16
  "skip_special_tokens": true,
17
  "standardize": true,
18
- "transformer": null
19
  }
 
1
  {
2
  "accumulate_grad_batches": 64,
 
3
  "auxk": 256,
4
  "auxk_coef": 0.03125,
5
  "batch_size": 1,
 
8
  "dead_tokens_threshold": 10000000,
9
  "expansion_factor": 64,
10
  "k": 32,
11
+ "layers": [
12
+ 0
13
+ ],
14
  "lr": 0.0001,
15
  "max_length": 2048,
16
  "model_name": "EleutherAI/pythia-160m-deduped",
17
  "skip_special_tokens": true,
18
  "standardize": true,
19
+ "tuned_lens": false
20
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:652833f14442dcd780c647324391df1f54b50e63d044ce7ff9a70d12ddaf14e8
3
  size 951304624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea3db6cdc86c4175f155f5381bec1bf8d2d1be353c81159126565001c1accd74
3
  size 951304624