avemio-digital commited on
Commit
aad89fc
·
verified ·
1 Parent(s): dbb07aa

Update tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +4 -7
tokenizer_config.json CHANGED
@@ -779,13 +779,10 @@
779
  }
780
  },
781
  "bos_token": "<|endoftext|>",
782
- "chat_template": "{% if 'role' in messages[0] %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'system' %}{{ message['content'] + '\n' }}{% elif message['role'] == 'user' %}{{ '<|User|>' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{{ '<|Assistant|>' + message['content'] + eos_token }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|Assistant|>' }}{% endif %}{% else %}{{ bos_token }}{% for message in messages %}{% if message['from'] == 'system' %}{{ message['value'] + '\n' }}{% elif message['from'] == 'user' %}{{ '<|User|>' + message['value'] + '\n' }}{% elif message['from'] == 'gpt' %}{{ '<|Assistant|>' + message['value'] + eos_token }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|Assistant|>' }}{% endif %}{% endif %}",
783
  "clean_up_tokenization_spaces": false,
784
  "eos_token": "<|im_end|>",
785
- "extra_special_tokens": {},
786
- "model_max_length": 35000,
787
- "pad_token": "<|dummy_87|>",
788
- "padding_side": "left",
789
- "tokenizer_class": "GPT2Tokenizer",
790
- "unk_token": "�"
791
  }
 
779
  }
780
  },
781
  "bos_token": "<|endoftext|>",
782
+ "chat_template": "{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'<|im_start|>assistant<|im_sep|>' + message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant<|im_sep|>' }}{% endif %}",
783
  "clean_up_tokenization_spaces": false,
784
  "eos_token": "<|im_end|>",
785
+ "model_max_length": 16384,
786
+ "pad_token": "<|dummy_85|>",
787
+ "tokenizer_class": "GPT2Tokenizer"
 
 
 
788
  }