kahou1234 commited on
Commit
550c29a
·
verified ·
1 Parent(s): ba08738

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +1 -7
  2. tokenizer_config.json +4 -3
special_tokens_map.json CHANGED
@@ -13,11 +13,5 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": {
17
- "content": "<|reserved_special_token_250|>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
  }
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "<|eot_id|>"
 
 
 
 
 
 
17
  }
tokenizer_config.json CHANGED
@@ -2050,15 +2050,16 @@
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
2053
- "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|eot_id|>",
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"
2059
  ],
2060
- "model_max_length": 8192,
2061
- "pad_token": "<|reserved_special_token_250|>",
2062
  "padding_side": "left",
 
2063
  "tokenizer_class": "PreTrainedTokenizerFast"
2064
  }
 
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{% set system_message = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '### Instruction:\n' + content + '\n\n### Response:\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|eot_id|>' + '\n\n' }}{% endif %}{% endfor %}",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|eot_id|>",
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"
2059
  ],
2060
+ "model_max_length": 1000000000000000019884624838656,
2061
+ "pad_token": "<|eot_id|>",
2062
  "padding_side": "left",
2063
+ "split_special_tokens": false,
2064
  "tokenizer_class": "PreTrainedTokenizerFast"
2065
  }