echarlaix HF Staff commited on
Commit
407114c
·
1 Parent(s): 23b0e0e

fix id model

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -59,6 +59,10 @@ def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
59
  return video_count, non_video_count
60
 
61
 
 
 
 
 
62
  def validate_media_constraints(message: dict) -> bool:
63
  video_count, non_video_count = count_files_in_new_message(message["files"])
64
  if video_count > 1:
@@ -189,7 +193,7 @@ def generate(message: dict, history: list[dict], model_id: str, max_new_tokens:
189
  n_tokens = inputs["input_ids"].shape[1]
190
  if n_tokens > MAX_INPUT_TOKENS:
191
  gr.Warning(
192
- f"Input too long. Max {MAX_INPUT_TOKENS} tokens. Got {n_tokens} tokens. This limit is set to avoid CUDA out-of-memory errors in this Space."
193
  )
194
  yield ""
195
  return
@@ -257,7 +261,7 @@ examples = [
257
  model_choices = [
258
  "echarlaix/SmolVLM2-500M-Video-Instruct-openvino",
259
  "echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static",
260
- "echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free",
261
  ]
262
 
263
  demo = gr.ChatInterface(
@@ -276,7 +280,7 @@ demo = gr.ChatInterface(
276
  ],
277
  stop_btn=False,
278
  title="Fast quantized SmolVLM2 ⚡",
279
- description="Play with a [SmolVLM2-500M-Video-Instruct](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino) and its quantized variants : [SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq-data-free) and [SmolVLM2-500M-Video-Instruct-openvino-8bit-static](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static) both obtained by respectively applying Weight-Only Quantization and Static Quantization using [Optimum Intel](https://github.com/huggingface/optimum-intel) NNCF integration. To get started, upload an image and text or try one of the examples. This demo runs on 4th Generation Intel Xeon (Sapphire Rapids) processors.",
280
  examples=examples,
281
  run_examples_on_click=False,
282
  cache_examples=False,
 
59
  return video_count, non_video_count
60
 
61
 
62
+
63
+
64
+
65
+
66
  def validate_media_constraints(message: dict) -> bool:
67
  video_count, non_video_count = count_files_in_new_message(message["files"])
68
  if video_count > 1:
 
193
  n_tokens = inputs["input_ids"].shape[1]
194
  if n_tokens > MAX_INPUT_TOKENS:
195
  gr.Warning(
196
+ f"Input too long. Max {MAX_INPUT_TOKENS} tokens. Got {n_tokens} tokens. This limit is set to avoid out-of-memory errors in this Space."
197
  )
198
  yield ""
199
  return
 
261
  model_choices = [
262
  "echarlaix/SmolVLM2-500M-Video-Instruct-openvino",
263
  "echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static",
264
+ "echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq",
265
  ]
266
 
267
  demo = gr.ChatInterface(
 
280
  ],
281
  stop_btn=False,
282
  title="Fast quantized SmolVLM2 ⚡",
283
+ description="Play with a [SmolVLM2-500M-Video-Instruct](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino) and its quantized variants : [SmolVLM2-500M-Video-Instruct-openvino-8bit-woq](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-woq) and [SmolVLM2-500M-Video-Instruct-openvino-8bit-static](https://huggingface.co/echarlaix/SmolVLM2-500M-Video-Instruct-openvino-8bit-static) both obtained by respectively applying Weight-Only Quantization and Static Quantization using [Optimum Intel](https://github.com/huggingface/optimum-intel) NNCF integration. To get started, upload an image and text or try one of the examples. This demo runs on 4th Generation Intel Xeon (Sapphire Rapids) processors.",
284
  examples=examples,
285
  run_examples_on_click=False,
286
  cache_examples=False,