Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
fix id model
Browse files
app.py
CHANGED
@@ -59,6 +59,10 @@ def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
|
|
59 |
return video_count, non_video_count
|
60 |
|
61 |
|
|
|
|
|
|
|
|
|
62 |
def validate_media_constraints(message: dict) -> bool:
|
63 |
video_count, non_video_count = count_files_in_new_message(message["files"])
|
64 |
if video_count > 1:
|
@@ -189,7 +193,7 @@ def generate(message: dict, history: list[dict], model_id: str, max_new_tokens:
|
|
189 |
n_tokens = inputs["input_ids"].shape[1]
|
190 |
if n_tokens > MAX_INPUT_TOKENS:
|
191 |
gr.Warning(
|
192 |
-
f"Input too long. Max {MAX_INPUT_TOKENS} tokens. Got {n_tokens} tokens. This limit is set to avoid
|
193 |
)
|
194 |
yield ""
|
195 |
return
|
@@ -257,7 +261,7 @@ examples = [
|
|
257 |
model_choices = [
|
258 |
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino",
|
259 |
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static",
|
260 |
-
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq
|
261 |
]
|
262 |
|
263 |
demo = gr.ChatInterface(
|
@@ -276,7 +280,7 @@ demo = gr.ChatInterface(
|
|
276 |
],
|
277 |
stop_btn=False,
|
278 |
title="Fast quantized SmolVLM2 ⚡",
|
279 |
-
description="Play with a [SmolVLM2-500M-Video-Instruct](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino) and its quantized variants : [SmolVLM2-500M-Video-Instruct-openvino-8bit-woq
|
280 |
examples=examples,
|
281 |
run_examples_on_click=False,
|
282 |
cache_examples=False,
|
|
|
59 |
return video_count, non_video_count
|
60 |
|
61 |
|
62 |
+
|
63 |
+
|
64 |
+
|
65 |
+
|
66 |
def validate_media_constraints(message: dict) -> bool:
|
67 |
video_count, non_video_count = count_files_in_new_message(message["files"])
|
68 |
if video_count > 1:
|
|
|
193 |
n_tokens = inputs["input_ids"].shape[1]
|
194 |
if n_tokens > MAX_INPUT_TOKENS:
|
195 |
gr.Warning(
|
196 |
+
f"Input too long. Max {MAX_INPUT_TOKENS} tokens. Got {n_tokens} tokens. This limit is set to avoid out-of-memory errors in this Space."
|
197 |
)
|
198 |
yield ""
|
199 |
return
|
|
|
261 |
model_choices = [
|
262 |
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino",
|
263 |
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static",
|
264 |
+
"echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq",
|
265 |
]
|
266 |
|
267 |
demo = gr.ChatInterface(
|
|
|
280 |
],
|
281 |
stop_btn=False,
|
282 |
title="Fast quantized SmolVLM2 ⚡",
|
283 |
+
description="Play with a [SmolVLM2-500M-Video-Instruct](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino) and its quantized variants : [SmolVLM2-500M-Video-Instruct-openvino-8bit-woq](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq) and [SmolVLM2-500M-Video-Instruct-openvino-8bit-static](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static) both obtained by respectively applying Weight-Only Quantization and Static Quantization using [Optimum Intel](https://github.com/huggingface/optimum-intel) NNCF integration. To get started, upload an image and text or try one of the examples. This demo runs on 4th Generation Intel Xeon (Sapphire Rapids) processors.",
|
284 |
examples=examples,
|
285 |
run_examples_on_click=False,
|
286 |
cache_examples=False,
|