amenon commited on
Commit
775fe68
·
verified ·
1 Parent(s): 5686bc0

Upload folder using huggingface_hub

Browse files
special_tokens_map.json CHANGED
@@ -13,5 +13,5 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|end▁of▁sentence|>"
17
  }
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "ç²ĺèĨľ"
17
  }
tokenizer.json CHANGED
@@ -95,6 +95,12 @@
95
  "id": "A",
96
  "type_id": 0
97
  }
 
 
 
 
 
 
98
  }
99
  ],
100
  "pair": [
@@ -110,6 +116,12 @@
110
  "type_id": 0
111
  }
112
  },
 
 
 
 
 
 
113
  {
114
  "SpecialToken": {
115
  "id": "<|begin▁of▁sentence|>",
@@ -121,6 +133,12 @@
121
  "id": "B",
122
  "type_id": 1
123
  }
 
 
 
 
 
 
124
  }
125
  ],
126
  "special_tokens": {
@@ -132,6 +150,15 @@
132
  "tokens": [
133
  "<|begin▁of▁sentence|>"
134
  ]
 
 
 
 
 
 
 
 
 
135
  }
136
  }
137
  },
 
95
  "id": "A",
96
  "type_id": 0
97
  }
98
+ },
99
+ {
100
+ "SpecialToken": {
101
+ "id": "<|end▁of▁sentence|>",
102
+ "type_id": 0
103
+ }
104
  }
105
  ],
106
  "pair": [
 
116
  "type_id": 0
117
  }
118
  },
119
+ {
120
+ "SpecialToken": {
121
+ "id": "<|end▁of▁sentence|>",
122
+ "type_id": 0
123
+ }
124
+ },
125
  {
126
  "SpecialToken": {
127
  "id": "<|begin▁of▁sentence|>",
 
133
  "id": "B",
134
  "type_id": 1
135
  }
136
+ },
137
+ {
138
+ "SpecialToken": {
139
+ "id": "<|end▁of▁sentence|>",
140
+ "type_id": 1
141
+ }
142
  }
143
  ],
144
  "special_tokens": {
 
150
  "tokens": [
151
  "<|begin▁of▁sentence|>"
152
  ]
153
+ },
154
+ "<|end▁of▁sentence|>": {
155
+ "id": "<|end▁of▁sentence|>",
156
+ "ids": [
157
+ 100001
158
+ ],
159
+ "tokens": [
160
+ "<|end▁of▁sentence|>"
161
+ ]
162
  }
163
  }
164
  },
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "add_bos_token": true,
3
- "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "100000": {
6
  "content": "<|begin▁of▁sentence|>",
@@ -25,7 +25,7 @@
25
  "eos_token": "<|end▁of▁sentence|>",
26
  "legacy": true,
27
  "model_max_length": 4096,
28
- "pad_token": "<|end▁of▁sentence|>",
29
  "sp_model_kwargs": {},
30
  "tokenizer_class": "LlamaTokenizer",
31
  "unk_token": null,
 
1
  {
2
  "add_bos_token": true,
3
+ "add_eos_token": true,
4
  "added_tokens_decoder": {
5
  "100000": {
6
  "content": "<|begin▁of▁sentence|>",
 
25
  "eos_token": "<|end▁of▁sentence|>",
26
  "legacy": true,
27
  "model_max_length": 4096,
28
+ "pad_token": "ç²ĺèĨľ",
29
  "sp_model_kwargs": {},
30
  "tokenizer_class": "LlamaTokenizer",
31
  "unk_token": null,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5120bec8217e83e3938f5e5ddff89f9ffa5db97b8b162c881090d4f0f0c773ba
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96b5e60328a492f940d5a5530e6a4a6c73cc104813fb13145d6561e52f95c1dc
3
  size 4984