Spaces:
Runtime error
Runtime error
Update train.py
Browse files
train.py
CHANGED
@@ -36,7 +36,7 @@ def create_tokenizer(training_corpus):
|
|
36 |
tokenizer = ByteLevelBPETokenizer()
|
37 |
special_tokens = ["<s>", "<pad>", "</s>", "<unk>", "<mask>"]
|
38 |
if INSTRUCT_FINETUNE_BOOL:
|
39 |
-
special_tokens.append("<|user|>", "<|bot|>", "<|end|>")
|
40 |
tokenizer.train_from_iterator(
|
41 |
training_corpus,
|
42 |
vocab_size=VOCAB_SIZE,
|
@@ -50,7 +50,7 @@ def create_tokenizer(training_corpus):
|
|
50 |
def load_tokenizer(training_corpus):
|
51 |
tokenizer = AutoTokenizer.from_pretrained(OUTPUT_REPO)
|
52 |
special_tokens = ["<s>", "<pad>", "</s>", "<unk>", "<mask>"]
|
53 |
-
special_tokens.append("<|user|>", "<|bot|>", "<|end|>")
|
54 |
tokenizer.train_from_iterator(
|
55 |
training_corpus,
|
56 |
vocab_size=VOCAB_SIZE,
|
|
|
36 |
tokenizer = ByteLevelBPETokenizer()
|
37 |
special_tokens = ["<s>", "<pad>", "</s>", "<unk>", "<mask>"]
|
38 |
if INSTRUCT_FINETUNE_BOOL:
|
39 |
+
special_tokens.append(["<|user|>", "<|bot|>", "<|end|>"])
|
40 |
tokenizer.train_from_iterator(
|
41 |
training_corpus,
|
42 |
vocab_size=VOCAB_SIZE,
|
|
|
50 |
def load_tokenizer(training_corpus):
|
51 |
tokenizer = AutoTokenizer.from_pretrained(OUTPUT_REPO)
|
52 |
special_tokens = ["<s>", "<pad>", "</s>", "<unk>", "<mask>"]
|
53 |
+
special_tokens.append(["<|user|>", "<|bot|>", "<|end|>"])
|
54 |
tokenizer.train_from_iterator(
|
55 |
training_corpus,
|
56 |
vocab_size=VOCAB_SIZE,
|