weathon commited on
Commit
776ee5f
Β·
verified Β·
1 Parent(s): 3c50afe

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +29 -47
tokenizer_config.json CHANGED
@@ -1,48 +1,30 @@
1
  {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "\u0000",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "57344": {
13
- "content": "ξ€€",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "57345": {
21
- "content": "",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "57347": {
29
- "content": "",
30
- "lstrip": true,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- }
36
- },
37
- "bos_token": "ξ€€",
38
- "clean_up_tokenization_spaces": false,
39
- "cls_token": "ξ€€",
40
- "eos_token": "",
41
- "extra_special_tokens": {},
42
- "mask_token": "",
43
- "model_max_length": 2048,
44
- "pad_token": "\u0000",
45
- "processor_class": "GitProcessor",
46
- "sep_token": "",
47
- "tokenizer_class": "CanineTokenizer"
48
- }
 
1
  {
2
+ "char_ords": [
3
+ 108,
4
+ 35,
5
+ 41,
6
+ 61,
7
+ 51,
8
+ 70,
9
+ 40,
10
+ 93,
11
+ 52,
12
+ 43,
13
+ 79,
14
+ 49,
15
+ 54,
16
+ 78,
17
+ 66,
18
+ 83,
19
+ 46,
20
+ 50,
21
+ 72,
22
+ 45,
23
+ 114,
24
+ 67,
25
+ 80,
26
+ 91,
27
+ 53
28
+ ],
29
+ "model_max_length": 1024
30
+ }