Commit
·
7b2146d
1
Parent(s):
20f6f96
Update envibert_tokenizer.py
Browse files- envibert_tokenizer.py +4 -0
envibert_tokenizer.py
CHANGED
@@ -50,6 +50,10 @@ class RobertaTokenizer(PreTrainedTokenizer):
|
|
50 |
|
51 |
def _tokenize(self, text):
|
52 |
return self.sp_model.EncodeAsPieces(text)
|
|
|
|
|
|
|
|
|
53 |
|
54 |
def _convert_token_to_id(self, token):
|
55 |
""" Converts a token (str) in an id using the vocab. """
|
|
|
50 |
|
51 |
def _tokenize(self, text):
|
52 |
return self.sp_model.EncodeAsPieces(text)
|
53 |
+
|
54 |
+
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
|
55 |
+
#TODO
|
56 |
+
return "", ""
|
57 |
|
58 |
def _convert_token_to_id(self, token):
|
59 |
""" Converts a token (str) in an id using the vocab. """
|