SorenDreano commited on
Commit
489efed
·
verified ·
1 Parent(s): fe5b2f0

Upload processor

Browse files
added_tokens.json CHANGED
@@ -1,7 +1,14 @@
1
  {
 
 
2
  "<|box_end|>": 151649,
3
  "<|box_start|>": 151648,
4
  "<|endoftext|>": 151643,
 
 
 
 
 
5
  "<|im_end|>": 151645,
6
  "<|im_start|>": 151644,
7
  "<|image_pad|>": 151655,
@@ -9,6 +16,7 @@
9
  "<|object_ref_start|>": 151646,
10
  "<|quad_end|>": 151651,
11
  "<|quad_start|>": 151650,
 
12
  "<|video_pad|>": 151656,
13
  "<|vision_end|>": 151653,
14
  "<|vision_pad|>": 151654,
 
1
  {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
  "<|box_end|>": 151649,
5
  "<|box_start|>": 151648,
6
  "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
  "<|im_end|>": 151645,
13
  "<|im_start|>": 151644,
14
  "<|image_pad|>": 151655,
 
16
  "<|object_ref_start|>": 151646,
17
  "<|quad_end|>": 151651,
18
  "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
  "<|video_pad|>": 151656,
21
  "<|vision_end|>": 151653,
22
  "<|vision_pad|>": 151654,
chat_template.json CHANGED
@@ -1,3 +1,3 @@
1
  {
2
- "chat_template": "{% set image_placeholder = '<|vision_start|><|image_pad|><|vision_end|>' %}\n{% for message in messages %}\n {#--- Handle User Messages with Template and Examples ---#}\n {%- if message['role'] == 'user' and template -%}\n {% if loop.first and message['role'] != 'system' %}\n {{- '<|im_start|>system\nYou are NuExtract, an information extraction tool created by NuMind.<|im_end|>' }}\n {% endif %}\n \n {{- '<|im_start|>' + message['role'] -}}\n \n {#--- Template Section ---#}\n {{ '\n# Template:' }}\n {{- '\n' + template + '\n' }}\n \n {#--- Examples Section (if provided) ---#}\n {% if examples -%}\n {{- '# Examples:' }}\n {% for example in examples %}\n {{- '## Input:\n' }}\n {#--- Handle image examples ---#}\n {% if example['input'] is mapping and example['input']['type'] == 'image' %}\n {{- image_placeholder | trim -}}\n {% elif example['input'] == '<image>' %}\n {{- image_placeholder | trim -}}\n {% else %}\n {{- example['input'] -}}\n {% endif %}\n {{- '\n## Output:\n' ~ example['output'] }}\n {% endfor %}\n {%- endif %}\n \n {#--- Context Section: Handle various content types ---#}\n {{- '# Context:\n' }}\n {%- if message['content'] is string -%}\n {#--- Simple string content ---#}\n {{- message['content'] | trim -}}\n {%- elif message['content'] is mapping and message['content']['type'] == 'image' -%}\n {#--- Single image document ---#}\n {{- image_placeholder | trim -}}\n {%- else -%}\n {#--- List of content items (mixed text/images) ---#}\n {#--- First, determine what the actual input content is (not ICL images) ---#}\n {%- set ns = namespace(has_text_input=false, text_content='') -%}\n \n {#--- Count content types and identify actual input document ---#}\n {%- for content in message['content'] -%}\n {%- if content is mapping and content.get('type') == 'text' -%}\n {%- if content.get('text') != '<image>' -%}\n {%- set ns.has_text_input = true -%}\n {%- set ns.text_content = content['text'] -%}\n {%- endif -%}\n {%- elif content is string -%}\n {%- if content != '<image>' -%}\n {%- set ns.has_text_input = true -%}\n {%- set ns.text_content = content -%}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n \n {#--- Determine what to output based on actual input type ---#}\n {%- if ns.has_text_input -%}\n {#--- Main input is text, so output the text content ---#}\n {{- ns.text_content | trim -}}\n {%- else -%}\n {#--- Main input is image or <image> placeholder ---#}\n {%- set ns2 = namespace(found_image=false) -%}\n {%- for content in message['content'] -%}\n {%- if content is mapping and content.get('type') == 'image' and not ns2.found_image -%}\n {{- image_placeholder | trim -}}\n {%- set ns2.found_image = true -%}\n {%- elif content is mapping and content.get('type') == 'text' and content.get('text') == '<image>' and not ns2.found_image -%}\n {{- image_placeholder | trim -}}\n {%- set ns2.found_image = true -%}\n {%- elif content is string and content == '<image>' and not ns2.found_image -%}\n {{- image_placeholder | trim -}}\n {%- set ns2.found_image = true -%}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n {%- endif -%}\n {{- '<|im_end|>\n'}}\n \n {#--- Handle All Other Messages (Assistant, System, etc.) ---#}\n {% else %}\n {% if loop.first and message['role'] != 'system' %}\n {{- '<|im_start|>system\nYou are a helpful assistant.<|im_end|>' }}\n {% endif %}\n \n {{- '<|im_start|>' + message['role'] + '\n' }}\n \n {#--- Same content handling logic as above but without template/examples ---#}\n {%- if message['content'] is string -%}\n {{- message['content'] | trim }}\n {%- elif message['content'] is mapping and message['content']['type'] == 'image' -%}\n {{- image_placeholder | trim }}\n {%- else -%}\n {%- for content in message['content'] -%}\n {%- if content is string -%}\n {{- content | trim -}}\n {%- elif content is mapping and content.get('type') == 'text' and content.get('text') == '<image>' -%}\n {{- image_placeholder | trim }}\n {%- elif content is mapping and content.get('type') == 'text' -%}\n {{- content['text'] | trim -}}\n {%- elif content is mapping and content.get('type') == 'image' -%}\n {# Skip adding image placeholder - it's already in the text #}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n {{- '<|im_end|>'}}\n {% endif %}\n{% endfor -%}\n{#--- Add Generation Prompt if Requested ---#}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant' }}\n{% endif -%}"
3
  }
 
1
  {
2
+ "chat_template": "{%- set image_placeholder = '<|vision_start|><|image_pad|><|vision_end|>' -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'user' -%}\n {%- if loop.first and message['role'] != 'system' -%}\n {{- '<|im_start|>system\n' -}}\n {%- if template -%}\n {#--- If template, extraction task ---#}\n {{- 'You are NuExtract, an information extraction tool created by NuMind.' -}}\n {%- else -%}\n {#--- Else, template generation task ---#}\n {{- 'You are a helpful assistant.' -}}\n {%- endif -%}\n {{ '<|im_end|>\n' }}\n {%- endif -%}\n {{- '<|im_start|>' + message['role'] + '\n' -}}\n {%- if template -%}\n {#--- Template Section ---#}\n {{- '# Template:\n' -}}\n {{- template -}}\n {{- '\n' -}}\n \n {%- if examples -%}\n {#--- Examples can only exist in the extraction task ---#}\n {{- '# Examples:\n' -}}\n {%- for example in examples -%}\n {{- '## Input:\n' -}}\n {%- if example['input'] is mapping and (example['input']['type'] == 'image' or example['input']['type'] == 'image_url') -%}\n {{- image_placeholder | trim -}}\n {%- elif example['input'] == '<image>' -%}\n {#--- Keep compatibility with <image> for now ---#}\n {{- image_placeholder | trim -}}\n {%- else -%}\n {#--- Text input example ---#}\n {{- example['input'] -}}\n {%- endif -%}\n {{- '\n' -}}\n {{- '## Output:\n' -}}\n {{- example['output'] -}}\n {{- '\n' -}}\n {%- endfor -%}\n {%- endif -%}\n {{- '# Context:\n' -}}\n {%- endif -%}\n \n {%- if message['content'] is string -%}\n {#--- Simple string content ---#}\n message['content'] | trim -}}\n {%- elif message['content'] is mapping and (message['content']['type'] == 'image' or message['content']['type'] == 'image_url') -%}\n {{- image_placeholder | trim -}}\n {%- else -%}\n {#--- List of content items (mixed text/images) ---#}\n {#--- First, determine what the actual input content is (not ICL images) ---#}\n {%- set ns = namespace(has_text_input=false, text_content='') -%}\n \n {#--- Count content types and identify actual input document ---#}\n {%- for content in message['content'] -%}\n {%- if content is mapping and content.get('type') == 'text' -%}\n {%- if content.get('text') != '<image>' -%}\n {#--- Keep compatibility with <image> for now ---#}\n {%- set ns.has_text_input = true -%}\n {%- set ns.text_content = content['text'] -%}\n {%- endif -%}\n {%- elif content is string -%}\n {%- if content != '<image>' -%}\n {#--- Keep compatibility with <image> for now ---#}\n {%- set ns.has_text_input = true -%}\n {%- set ns.text_content = content -%}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n \n {#--- Determine what to output based on actual input type ---#}\n {%- if ns.has_text_input -%}\n {#--- Main input is text, so output the text content ---#}\n {{- ns.text_content | trim -}}\n {%- else -%}\n {#--- Main input is image or <image> placeholder ---#}\n {%- set ns2 = namespace(found_image=false) -%}\n {%- for content in message['content'] -%}\n {%- if content is mapping and (content.get('type') == 'image' or content.get('type') == 'image_url') and not ns2.found_image -%}\n {{- image_placeholder | trim -}}\n {%- set ns2.found_image = true -%}\n {%- elif content is mapping and content.get('type') == 'text' and content.get('text') == '<image>' and not ns2.found_image -%}\n {#--- Keep compatibility with <image> for now ---#}\n {{- image_placeholder | trim -}}\n {%- set ns2.found_image = true -%}\n {%- elif content is string and content == '<image>' and not ns2.found_image -%}\n {#--- Keep compatibility with <image> for now ---#}\n {{- image_placeholder | trim -}}\n {%- set ns2.found_image = true -%}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n {%- endif -%}\n {{- '<|im_end|>\n'}}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{- '<|im_start|>assistant\n' -}}\n{%- endif -%}"
3
  }
preprocessor_config.json CHANGED
@@ -14,16 +14,16 @@
14
  0.26130258,
15
  0.27577711
16
  ],
17
- "max_pixels": 2352000,
18
  "merge_size": 2,
19
  "min_pixels": 200704,
20
  "patch_size": 14,
21
- "processor_class": "Qwen2VLProcessor",
22
  "resample": 3,
23
  "rescale_factor": 0.00392156862745098,
24
  "size": {
25
- "longest_edge": 12845056,
26
- "shortest_edge": 3136
27
  },
28
  "temporal_patch_size": 2
29
  }
 
14
  0.26130258,
15
  0.27577711
16
  ],
17
+ "max_pixels": 23000000,
18
  "merge_size": 2,
19
  "min_pixels": 200704,
20
  "patch_size": 14,
21
+ "processor_class": "Qwen2_5_VLProcessor",
22
  "resample": 3,
23
  "rescale_factor": 0.00392156862745098,
24
  "size": {
25
+ "longest_edge": 23000000,
26
+ "shortest_edge": 200704
27
  },
28
  "temporal_patch_size": 2
29
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:091aa7594dc2fcfbfa06b9e3c22a5f0562ac14f30375c13af7309407a0e67b8a
3
- size 11420371
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba0c439f7be467bf47d12a7e6f9adc6116201056fc60c67f431c679b7c16afc8
3
+ size 11422064
tokenizer_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
 
 
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "151643": {
@@ -112,6 +114,70 @@
112
  "rstrip": false,
113
  "single_word": false,
114
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  }
116
  },
117
  "additional_special_tokens": [
@@ -130,17 +196,20 @@
130
  "<|video_pad|>"
131
  ],
132
  "bos_token": null,
133
- "chat_template": "{% set image_placeholder = '<|vision_start|><|image_pad|><|vision_end|>' %}\n{% for message in messages %}\n {#--- Handle User Messages with Template and Examples ---#}\n {%- if message['role'] == 'user' and template -%}\n {% if loop.first and message['role'] != 'system' %}\n {{- '<|im_start|>system\nYou are NuExtract, an information extraction tool created by NuMind.<|im_end|>' }}\n {% endif %}\n \n {{- '<|im_start|>' + message['role'] -}}\n \n {#--- Template Section ---#}\n {{ '\n# Template:' }}\n {{- '\n' + template + '\n' }}\n \n {#--- Examples Section (if provided) ---#}\n {% if examples -%}\n {{- '# Examples:' }}\n {% for example in examples %}\n {{- '## Input:\n' }}\n {#--- Handle image examples ---#}\n {% if example['input'] is mapping and example['input']['type'] == 'image' %}\n {{- image_placeholder | trim -}}\n {% elif example['input'] == '<image>' %}\n {{- image_placeholder | trim -}}\n {% else %}\n {{- example['input'] -}}\n {% endif %}\n {{- '\n## Output:\n' ~ example['output'] }}\n {% endfor %}\n {%- endif %}\n \n {#--- Context Section: Handle various content types ---#}\n {{- '# Context:\n' }}\n {%- if message['content'] is string -%}\n {#--- Simple string content ---#}\n {{- message['content'] | trim -}}\n {%- elif message['content'] is mapping and message['content']['type'] == 'image' -%}\n {#--- Single image document ---#}\n {{- image_placeholder | trim -}}\n {%- else -%}\n {#--- List of content items (mixed text/images) ---#}\n {#--- First, determine what the actual input content is (not ICL images) ---#}\n {%- set ns = namespace(has_text_input=false, text_content='') -%}\n \n {#--- Count content types and identify actual input document ---#}\n {%- for content in message['content'] -%}\n {%- if content is mapping and content.get('type') == 'text' -%}\n {%- if content.get('text') != '<image>' -%}\n {%- set ns.has_text_input = true -%}\n {%- set ns.text_content = content['text'] -%}\n {%- endif -%}\n {%- elif content is string -%}\n {%- if content != '<image>' -%}\n {%- set ns.has_text_input = true -%}\n {%- set ns.text_content = content -%}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n \n {#--- Determine what to output based on actual input type ---#}\n {%- if ns.has_text_input -%}\n {#--- Main input is text, so output the text content ---#}\n {{- ns.text_content | trim -}}\n {%- else -%}\n {#--- Main input is image or <image> placeholder ---#}\n {%- set ns2 = namespace(found_image=false) -%}\n {%- for content in message['content'] -%}\n {%- if content is mapping and content.get('type') == 'image' and not ns2.found_image -%}\n {{- image_placeholder | trim -}}\n {%- set ns2.found_image = true -%}\n {%- elif content is mapping and content.get('type') == 'text' and content.get('text') == '<image>' and not ns2.found_image -%}\n {{- image_placeholder | trim -}}\n {%- set ns2.found_image = true -%}\n {%- elif content is string and content == '<image>' and not ns2.found_image -%}\n {{- image_placeholder | trim -}}\n {%- set ns2.found_image = true -%}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n {%- endif -%}\n {{- '<|im_end|>\n'}}\n \n {#--- Handle All Other Messages (Assistant, System, etc.) ---#}\n {% else %}\n {% if loop.first and message['role'] != 'system' %}\n {{- '<|im_start|>system\nYou are a helpful assistant.<|im_end|>' }}\n {% endif %}\n \n {{- '<|im_start|>' + message['role'] + '\n' }}\n \n {#--- Same content handling logic as above but without template/examples ---#}\n {%- if message['content'] is string -%}\n {{- message['content'] | trim }}\n {%- elif message['content'] is mapping and message['content']['type'] == 'image' -%}\n {{- image_placeholder | trim }}\n {%- else -%}\n {%- for content in message['content'] -%}\n {%- if content is string -%}\n {{- content | trim -}}\n {%- elif content is mapping and content.get('type') == 'text' and content.get('text') == '<image>' -%}\n {{- image_placeholder | trim }}\n {%- elif content is mapping and content.get('type') == 'text' -%}\n {{- content['text'] | trim -}}\n {%- elif content is mapping and content.get('type') == 'image' -%}\n {# Skip adding image placeholder - it's already in the text #}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n {{- '<|im_end|>'}}\n {% endif %}\n{% endfor -%}\n{#--- Add Generation Prompt if Requested ---#}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant' }}\n{% endif -%}",
134
  "clean_up_tokenization_spaces": false,
135
  "eos_token": "<|im_end|>",
136
  "errors": "replace",
137
  "extra_special_tokens": {},
138
- "max_pixels": 2352000,
 
139
  "min_pixels": 200704,
140
- "model_max_length": 32768,
 
141
  "pad_token": "<|endoftext|>",
142
- "padding_side": "left",
143
- "processor_class": "Qwen2VLProcessor",
 
144
  "split_special_tokens": false,
145
  "tokenizer_class": "Qwen2Tokenizer",
146
  "unk_token": null
 
1
  {
2
+ "_commit_hash": null,
3
+ "add_bos_token": false,
4
  "add_prefix_space": false,
5
  "added_tokens_decoder": {
6
  "151643": {
 
114
  "rstrip": false,
115
  "single_word": false,
116
  "special": true
117
+ },
118
+ "151657": {
119
+ "content": "<tool_call>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false,
124
+ "special": false
125
+ },
126
+ "151658": {
127
+ "content": "</tool_call>",
128
+ "lstrip": false,
129
+ "normalized": false,
130
+ "rstrip": false,
131
+ "single_word": false,
132
+ "special": false
133
+ },
134
+ "151659": {
135
+ "content": "<|fim_prefix|>",
136
+ "lstrip": false,
137
+ "normalized": false,
138
+ "rstrip": false,
139
+ "single_word": false,
140
+ "special": false
141
+ },
142
+ "151660": {
143
+ "content": "<|fim_middle|>",
144
+ "lstrip": false,
145
+ "normalized": false,
146
+ "rstrip": false,
147
+ "single_word": false,
148
+ "special": false
149
+ },
150
+ "151661": {
151
+ "content": "<|fim_suffix|>",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false,
156
+ "special": false
157
+ },
158
+ "151662": {
159
+ "content": "<|fim_pad|>",
160
+ "lstrip": false,
161
+ "normalized": false,
162
+ "rstrip": false,
163
+ "single_word": false,
164
+ "special": false
165
+ },
166
+ "151663": {
167
+ "content": "<|repo_name|>",
168
+ "lstrip": false,
169
+ "normalized": false,
170
+ "rstrip": false,
171
+ "single_word": false,
172
+ "special": false
173
+ },
174
+ "151664": {
175
+ "content": "<|file_sep|>",
176
+ "lstrip": false,
177
+ "normalized": false,
178
+ "rstrip": false,
179
+ "single_word": false,
180
+ "special": false
181
  }
182
  },
183
  "additional_special_tokens": [
 
196
  "<|video_pad|>"
197
  ],
198
  "bos_token": null,
199
+ "chat_template": "{%- set image_placeholder = '<|vision_start|><|image_pad|><|vision_end|>' -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'user' -%}\n {%- if loop.first and message['role'] != 'system' -%}\n {{- '<|im_start|>system\n' -}}\n {%- if template -%}\n {#--- If template, extraction task ---#}\n {{- 'You are NuExtract, an information extraction tool created by NuMind.' -}}\n {%- else -%}\n {#--- Else, template generation task ---#}\n {{- 'You are a helpful assistant.' -}}\n {%- endif -%}\n {{ '<|im_end|>\n' }}\n {%- endif -%}\n {{- '<|im_start|>' + message['role'] + '\n' -}}\n {%- if template -%}\n {#--- Template Section ---#}\n {{- '# Template:\n' -}}\n {{- template -}}\n {{- '\n' -}}\n \n {%- if examples -%}\n {#--- Examples can only exist in the extraction task ---#}\n {{- '# Examples:\n' -}}\n {%- for example in examples -%}\n {{- '## Input:\n' -}}\n {%- if example['input'] is mapping and (example['input']['type'] == 'image' or example['input']['type'] == 'image_url') -%}\n {{- image_placeholder | trim -}}\n {%- elif example['input'] == '<image>' -%}\n {#--- Keep compatibility with <image> for now ---#}\n {{- image_placeholder | trim -}}\n {%- else -%}\n {#--- Text input example ---#}\n {{- example['input'] -}}\n {%- endif -%}\n {{- '\n' -}}\n {{- '## Output:\n' -}}\n {{- example['output'] -}}\n {{- '\n' -}}\n {%- endfor -%}\n {%- endif -%}\n {{- '# Context:\n' -}}\n {%- endif -%}\n \n {%- if message['content'] is string -%}\n {#--- Simple string content ---#}\n message['content'] | trim -}}\n {%- elif message['content'] is mapping and (message['content']['type'] == 'image' or message['content']['type'] == 'image_url') -%}\n {{- image_placeholder | trim -}}\n {%- else -%}\n {#--- List of content items (mixed text/images) ---#}\n {#--- First, determine what the actual input content is (not ICL images) ---#}\n {%- set ns = namespace(has_text_input=false, text_content='') -%}\n \n {#--- Count content types and identify actual input document ---#}\n {%- for content in message['content'] -%}\n {%- if content is mapping and content.get('type') == 'text' -%}\n {%- if content.get('text') != '<image>' -%}\n {#--- Keep compatibility with <image> for now ---#}\n {%- set ns.has_text_input = true -%}\n {%- set ns.text_content = content['text'] -%}\n {%- endif -%}\n {%- elif content is string -%}\n {%- if content != '<image>' -%}\n {#--- Keep compatibility with <image> for now ---#}\n {%- set ns.has_text_input = true -%}\n {%- set ns.text_content = content -%}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n \n {#--- Determine what to output based on actual input type ---#}\n {%- if ns.has_text_input -%}\n {#--- Main input is text, so output the text content ---#}\n {{- ns.text_content | trim -}}\n {%- else -%}\n {#--- Main input is image or <image> placeholder ---#}\n {%- set ns2 = namespace(found_image=false) -%}\n {%- for content in message['content'] -%}\n {%- if content is mapping and (content.get('type') == 'image' or content.get('type') == 'image_url') and not ns2.found_image -%}\n {{- image_placeholder | trim -}}\n {%- set ns2.found_image = true -%}\n {%- elif content is mapping and content.get('type') == 'text' and content.get('text') == '<image>' and not ns2.found_image -%}\n {#--- Keep compatibility with <image> for now ---#}\n {{- image_placeholder | trim -}}\n {%- set ns2.found_image = true -%}\n {%- elif content is string and content == '<image>' and not ns2.found_image -%}\n {#--- Keep compatibility with <image> for now ---#}\n {{- image_placeholder | trim -}}\n {%- set ns2.found_image = true -%}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n {%- endif -%}\n {{- '<|im_end|>\n'}}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{- '<|im_start|>assistant\n' -}}\n{%- endif -%}",
200
  "clean_up_tokenization_spaces": false,
201
  "eos_token": "<|im_end|>",
202
  "errors": "replace",
203
  "extra_special_tokens": {},
204
+ "max_length": null,
205
+ "max_pixels": 23000000,
206
  "min_pixels": 200704,
207
+ "model_max_length": 131072,
208
+ "pad_to_multiple_of": null,
209
  "pad_token": "<|endoftext|>",
210
+ "pad_token_type_id": 0,
211
+ "padding_side": "right",
212
+ "processor_class": "Qwen2_5_VLProcessor",
213
  "split_special_tokens": false,
214
  "tokenizer_class": "Qwen2Tokenizer",
215
  "unk_token": null