Commit
·
fc42dc0
1
Parent(s):
35a30a4
add readme and correct pad token
Browse files- README.md +1 -0
- special_tokens_map.json +1 -1
README.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
WARNING: this adds the Redwood 2L Attn Only tokenizer for compatibility with transformer lens. I think this implementation is correct, though check against the reference tokenizer here: https://github.com/redwoodresearch/rust_circuit_public/blob/42c3fcbffbc367897d3a810b20c12d7c1c99a00d/python/rust_circuit/module_library.py#L1012
|
special_tokens_map.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"bos_token": {"content": "[BEGIN]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "[END]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": "
|
|
|
1 |
+
{"bos_token": {"content": "[BEGIN]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "[END]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": "[END]"}
|