suriya7 commited on
Commit
66cf356
·
verified ·
1 Parent(s): 6f8c6fa

Update custom_tokenizer.py

Browse files
Files changed (1) hide show
  1. custom_tokenizer.py +12 -5
custom_tokenizer.py CHANGED
@@ -5,9 +5,17 @@ class CustomGPT2Tokenizer(GPT2Tokenizer):
5
  super().__init__(*args, **kwargs)
6
  self.chat_template = "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named Securitron, trained by Aquilax.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
7
 
8
- def apply_chat_template(self, messages, add_generation_prompt=True):
9
  """
10
- Applies the chat template to the provided messages.
 
 
 
 
 
 
 
 
11
  """
12
  # Handle template processing here
13
  formatted_messages = []
@@ -17,12 +25,11 @@ class CustomGPT2Tokenizer(GPT2Tokenizer):
17
  if role and content:
18
  formatted_messages.append(f"<|im_start|>{role}\n{content}<|im_end|>\n")
19
 
20
- # If the first message is not from 'system', insert system message
21
- if messages[0]['role'] != 'system':
22
  formatted_messages.insert(0, "<|im_start|>system\nYou are a helpful AI assistant named Securitron, trained by Aquilax.<|im_end|>\n")
23
 
24
  formatted_text = "".join(formatted_messages)
25
-
26
 
27
  return formatted_text
28
 
 
5
  super().__init__(*args, **kwargs)
6
  self.chat_template = "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named Securitron, trained by Aquilax.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
7
 
8
+ def apply_chat_template(self, messages, add_system_prompt=True, add_generation_prompt=True):
9
  """
10
+ Applies the chat template to the provided messages. Optionally adds the system prompt.
11
+
12
+ Args:
13
+ messages (list): List of message dictionaries with 'role' and 'content'.
14
+ add_system_prompt (bool): If True, adds the system prompt at the beginning.
15
+ add_generation_prompt (bool): If True, adds a prompt for generation.
16
+
17
+ Returns:
18
+ str: Formatted text with the chat template applied.
19
  """
20
  # Handle template processing here
21
  formatted_messages = []
 
25
  if role and content:
26
  formatted_messages.append(f"<|im_start|>{role}\n{content}<|im_end|>\n")
27
 
28
+ # If the first message is not from 'system' and we want to add the system prompt, do so
29
+ if add_system_prompt and messages[0]['role'] != 'system':
30
  formatted_messages.insert(0, "<|im_start|>system\nYou are a helpful AI assistant named Securitron, trained by Aquilax.<|im_end|>\n")
31
 
32
  formatted_text = "".join(formatted_messages)
 
33
 
34
  return formatted_text
35