Spaces:

echarlaix
/

vision-langage-openvino

Running on CPU Upgrade

App Files Files Community

echarlaix HF Staff commited on Jul 16

Commit

407114c

1 Parent(s): 23b0e0e

fix id model

Browse files

Files changed (1) hide show

app.py +7 -3

app.py CHANGED Viewed

@@ -59,6 +59,10 @@ def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     return video_count, non_video_count
 def validate_media_constraints(message: dict) -> bool:
     video_count, non_video_count = count_files_in_new_message(message["files"])
     if video_count > 1:
@@ -189,7 +193,7 @@ def generate(message: dict, history: list[dict], model_id: str, max_new_tokens:
     n_tokens = inputs["input_ids"].shape[1]
     if n_tokens > MAX_INPUT_TOKENS:
         gr.Warning(
-            f"Input too long. Max {MAX_INPUT_TOKENS} tokens. Got {n_tokens} tokens. This limit is set to avoid CUDA out-of-memory errors in this Space."
         )
         yield ""
         return
@@ -257,7 +261,7 @@ examples = [
 model_choices = [
     "echarlaix/SmolVLM2-500M-Video-Instruct-openvino",
     "echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static",
-    "echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free",
 ]
 demo = gr.ChatInterface(
@@ -276,7 +280,7 @@ demo = gr.ChatInterface(
     ],
     stop_btn=False,
     title="Fast quantized SmolVLM2 ⚡",
-    description="Play with a [SmolVLM2-500M-Video-Instruct](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino) and its quantized variants : [SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free) and [SmolVLM2-500M-Video-Instruct-openvino-8bit-static](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static) both obtained by respectively applying Weight-Only Quantization and Static Quantization  using [Optimum Intel](https://github.com/huggingface/optimum-intel) NNCF integration. To get started, upload an image and text or try one of the examples. This demo runs on 4th Generation Intel Xeon (Sapphire Rapids) processors.",
     examples=examples,
     run_examples_on_click=False,
     cache_examples=False,

     return video_count, non_video_count
 def validate_media_constraints(message: dict) -> bool:
     video_count, non_video_count = count_files_in_new_message(message["files"])
     if video_count > 1:
     n_tokens = inputs["input_ids"].shape[1]
     if n_tokens > MAX_INPUT_TOKENS:
         gr.Warning(
+            f"Input too long. Max {MAX_INPUT_TOKENS} tokens. Got {n_tokens} tokens. This limit is set to avoid out-of-memory errors in this Space."
         )
         yield ""
         return
 model_choices = [
     "echarlaix/SmolVLM2-500M-Video-Instruct-openvino",
     "echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static",
+    "echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq",
 ]
 demo = gr.ChatInterface(
     ],
     stop_btn=False,
     title="Fast quantized SmolVLM2 ⚡",
+    description="Play with a [SmolVLM2-500M-Video-Instruct](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino) and its quantized variants : [SmolVLM2-500M-Video-Instruct-openvino-8bit-woq](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq) and [SmolVLM2-500M-Video-Instruct-openvino-8bit-static](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static) both obtained by respectively applying Weight-Only Quantization and Static Quantization  using [Optimum Intel](https://github.com/huggingface/optimum-intel) NNCF integration. To get started, upload an image and text or try one of the examples. This demo runs on 4th Generation Intel Xeon (Sapphire Rapids) processors.",
     examples=examples,
     run_examples_on_click=False,
     cache_examples=False,