Update chat_template (#4)
Browse files- Update chat_template (44bf2153ffe7ee35988c1096b2cb3cd9690158bb)
Co-authored-by: Joshua <[email protected]>
- README.md +2 -3
- tokenizer.json +2 -2
- tokenizer_config.json +4 -3
README.md
CHANGED
@@ -3,9 +3,8 @@ license: gemma
|
|
3 |
library_name: transformers
|
4 |
pipeline_tag: text-generation
|
5 |
extra_gated_heading: Access Gemma on Hugging Face
|
6 |
-
extra_gated_prompt:
|
7 |
-
|
8 |
-
Google’s usage license. To do this, please ensure you’re logged in to Hugging
|
9 |
Face and click below. Requests are processed immediately.
|
10 |
extra_gated_button_content: Acknowledge license
|
11 |
base_model: google/gemma-3-1b-pt
|
|
|
3 |
library_name: transformers
|
4 |
pipeline_tag: text-generation
|
5 |
extra_gated_heading: Access Gemma on Hugging Face
|
6 |
+
extra_gated_prompt: To access Gemma on Hugging Face, you’re required to review and
|
7 |
+
agree to Google’s usage license. To do this, please ensure you’re logged in to Hugging
|
|
|
8 |
Face and click below. Requests are processed immediately.
|
9 |
extra_gated_button_content: Acknowledge license
|
10 |
base_model: google/gemma-3-1b-pt
|
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
|
3 |
+
size 33384568
|
tokenizer_config.json
CHANGED
@@ -2160,7 +2160,7 @@
|
|
2160 |
"normalized": false,
|
2161 |
"rstrip": false,
|
2162 |
"single_word": false,
|
2163 |
-
"special":
|
2164 |
},
|
2165 |
"256000": {
|
2166 |
"content": "<end_of_image>",
|
@@ -2168,7 +2168,7 @@
|
|
2168 |
"normalized": false,
|
2169 |
"rstrip": false,
|
2170 |
"single_word": false,
|
2171 |
-
"special":
|
2172 |
},
|
2173 |
"256001": {
|
2174 |
"content": "<unused99>",
|
@@ -51325,7 +51325,7 @@
|
|
51325 |
},
|
51326 |
"boi_token": "<start_of_image>",
|
51327 |
"bos_token": "<bos>",
|
51328 |
-
"chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
|
51329 |
"clean_up_tokenization_spaces": false,
|
51330 |
"eoi_token": "<end_of_image>",
|
51331 |
"eos_token": "<eos>",
|
@@ -51337,6 +51337,7 @@
|
|
51337 |
"image_token": "<image_soft_token>",
|
51338 |
"model_max_length": 1000000000000000019884624838656,
|
51339 |
"pad_token": "<pad>",
|
|
|
51340 |
"sp_model_kwargs": null,
|
51341 |
"spaces_between_special_tokens": false,
|
51342 |
"tokenizer_class": "GemmaTokenizer",
|
|
|
2160 |
"normalized": false,
|
2161 |
"rstrip": false,
|
2162 |
"single_word": false,
|
2163 |
+
"special": true
|
2164 |
},
|
2165 |
"256000": {
|
2166 |
"content": "<end_of_image>",
|
|
|
2168 |
"normalized": false,
|
2169 |
"rstrip": false,
|
2170 |
"single_word": false,
|
2171 |
+
"special": true
|
2172 |
},
|
2173 |
"256001": {
|
2174 |
"content": "<unused99>",
|
|
|
51325 |
},
|
51326 |
"boi_token": "<start_of_image>",
|
51327 |
"bos_token": "<bos>",
|
51328 |
+
"chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
|
51329 |
"clean_up_tokenization_spaces": false,
|
51330 |
"eoi_token": "<end_of_image>",
|
51331 |
"eos_token": "<eos>",
|
|
|
51337 |
"image_token": "<image_soft_token>",
|
51338 |
"model_max_length": 1000000000000000019884624838656,
|
51339 |
"pad_token": "<pad>",
|
51340 |
+
"processor_class": "Gemma3Processor",
|
51341 |
"sp_model_kwargs": null,
|
51342 |
"spaces_between_special_tokens": false,
|
51343 |
"tokenizer_class": "GemmaTokenizer",
|