Update custom_tokenizer.py
Browse files- custom_tokenizer.py +12 -5
custom_tokenizer.py
CHANGED
@@ -5,9 +5,17 @@ class CustomGPT2Tokenizer(GPT2Tokenizer):
|
|
5 |
super().__init__(*args, **kwargs)
|
6 |
self.chat_template = "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named Securitron, trained by Aquilax.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
|
7 |
|
8 |
-
def apply_chat_template(self, messages, add_generation_prompt=True):
|
9 |
"""
|
10 |
-
Applies the chat template to the provided messages.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
"""
|
12 |
# Handle template processing here
|
13 |
formatted_messages = []
|
@@ -17,12 +25,11 @@ class CustomGPT2Tokenizer(GPT2Tokenizer):
|
|
17 |
if role and content:
|
18 |
formatted_messages.append(f"<|im_start|>{role}\n{content}<|im_end|>\n")
|
19 |
|
20 |
-
# If the first message is not from 'system'
|
21 |
-
if messages[0]['role'] != 'system':
|
22 |
formatted_messages.insert(0, "<|im_start|>system\nYou are a helpful AI assistant named Securitron, trained by Aquilax.<|im_end|>\n")
|
23 |
|
24 |
formatted_text = "".join(formatted_messages)
|
25 |
-
|
26 |
|
27 |
return formatted_text
|
28 |
|
|
|
5 |
super().__init__(*args, **kwargs)
|
6 |
self.chat_template = "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named Securitron, trained by Aquilax.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
|
7 |
|
8 |
+
def apply_chat_template(self, messages, add_system_prompt=True, add_generation_prompt=True):
|
9 |
"""
|
10 |
+
Applies the chat template to the provided messages. Optionally adds the system prompt.
|
11 |
+
|
12 |
+
Args:
|
13 |
+
messages (list): List of message dictionaries with 'role' and 'content'.
|
14 |
+
add_system_prompt (bool): If True, adds the system prompt at the beginning.
|
15 |
+
add_generation_prompt (bool): If True, adds a prompt for generation.
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
str: Formatted text with the chat template applied.
|
19 |
"""
|
20 |
# Handle template processing here
|
21 |
formatted_messages = []
|
|
|
25 |
if role and content:
|
26 |
formatted_messages.append(f"<|im_start|>{role}\n{content}<|im_end|>\n")
|
27 |
|
28 |
+
# If the first message is not from 'system' and we want to add the system prompt, do so
|
29 |
+
if add_system_prompt and messages[0]['role'] != 'system':
|
30 |
formatted_messages.insert(0, "<|im_start|>system\nYou are a helpful AI assistant named Securitron, trained by Aquilax.<|im_end|>\n")
|
31 |
|
32 |
formatted_text = "".join(formatted_messages)
|
|
|
33 |
|
34 |
return formatted_text
|
35 |
|