hyunwoongko commited on
Commit
73bb2a8
Β·
1 Parent(s): 356ac4f
Files changed (4) hide show
  1. tokenizer.json +0 -0
  2. tokenizer.origin +0 -0
  3. vocab.origin +0 -0
  4. vocab.txt +5 -5
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer.origin ADDED
The diff for this file is too large to render. See raw diff
 
vocab.origin ADDED
The diff for this file is too large to render. See raw diff
 
vocab.txt CHANGED
@@ -1,4 +1,7 @@
1
- [PAD]
 
 
 
2
  [unused1]
3
  [unused2]
4
  [unused3]
@@ -98,10 +101,6 @@
98
  [unused97]
99
  [unused98]
100
  [unused99]
101
- [UNK]
102
- [CLS]
103
- [SEP]
104
- [MASK]
105
  <S>
106
  <T>
107
  !
@@ -21126,3 +21125,4 @@ fishbase
21126
  ##πŸ”₯
21127
  ##πŸ˜‚
21128
  ##😎
 
 
1
+ <s>
2
+ <pad>
3
+ </s>
4
+ <unk>
5
  [unused1]
6
  [unused2]
7
  [unused3]
 
101
  [unused97]
102
  [unused98]
103
  [unused99]
 
 
 
 
104
  <S>
105
  <T>
106
  !
 
21125
  ##πŸ”₯
21126
  ##πŸ˜‚
21127
  ##😎
21128
+ <mask>