charlesxsh commited on
Commit
504dd41
·
1 Parent(s): 4af520c
Files changed (1) hide show
  1. custom_tokenizer.py +5 -1
custom_tokenizer.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from transformers import PreTrainedTokenizer, AddedToken
2
 
3
  class CustomTokenizer(PreTrainedTokenizer):
@@ -7,4 +8,7 @@ class CustomTokenizer(PreTrainedTokenizer):
7
 
8
  def tokenize(self, text):
9
  print("Tokenizing text", text)
10
- return text.split()
 
 
 
 
1
+ from typing import Dict
2
  from transformers import PreTrainedTokenizer, AddedToken
3
 
4
  class CustomTokenizer(PreTrainedTokenizer):
 
8
 
9
  def tokenize(self, text):
10
  print("Tokenizing text", text)
11
+ return text.split()
12
+
13
+ def get_vocab(self) -> Dict[str, int]:
14
+ return {}