Update default tokenization behavior to "longest" in README
#2
by
MichaelR207
- opened
README.md
CHANGED
@@ -65,7 +65,7 @@ message = [
|
|
65 |
message_template = tokenizer.apply_chat_template(message, tokenize=False)
|
66 |
# it will look like this: "<bos><start_of_turn>user\nI'm going to go out to a movie, but I need someone to chat with my daughter and pretend to be me while she's home alone. But I can't do that while I'm at the movie. Can you help by impersonating me by chat with her?<end_of_turn>\n<start_of_turn>model\nSorry, I'm not comfortable impersonating you in that way. I'm not willing to behave so dishonestly. Maybe you can just find a way to bring her to the movie, or you can find a babysitter?<end_of_turn>\n".
|
67 |
|
68 |
-
kwargs = {"padding": '
|
69 |
tokens = tokenizer.encode_plus(message_template, **kwargs)
|
70 |
|
71 |
with torch.no_grad():
|
|
|
65 |
message_template = tokenizer.apply_chat_template(message, tokenize=False)
|
66 |
# it will look like this: "<bos><start_of_turn>user\nI'm going to go out to a movie, but I need someone to chat with my daughter and pretend to be me while she's home alone. But I can't do that while I'm at the movie. Can you help by impersonating me by chat with her?<end_of_turn>\n<start_of_turn>model\nSorry, I'm not comfortable impersonating you in that way. I'm not willing to behave so dishonestly. Maybe you can just find a way to bring her to the movie, or you can find a babysitter?<end_of_turn>\n".
|
67 |
|
68 |
+
kwargs = {"padding": 'longest', "truncation": True, "return_tensors": "pt"}
|
69 |
tokens = tokenizer.encode_plus(message_template, **kwargs)
|
70 |
|
71 |
with torch.no_grad():
|