robot0820 commited on
Commit
d150731
·
verified ·
1 Parent(s): b75d3d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -10
app.py CHANGED
@@ -2,7 +2,6 @@ import torch
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, BitsAndBytesConfig
4
  from deepseek_vl.models import VLChatProcessor, MultiModalityCausalLM
5
- from deepseek_vl.utils.io import load_pil_images
6
 
7
  # 模型路徑
8
  model_path = "deepseek-ai/deepseek-vl-7b-chat"
@@ -29,22 +28,19 @@ vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(
29
  # ==== 單張圖片推理函式 ====
30
  def chat_with_image(image, user_message):
31
  try:
32
- # 建立對話格式
33
  conversation = [
34
  {"role": "User", "content": "<image_placeholder>" + user_message, "images": [image]},
35
  {"role": "Assistant", "content": ""}
36
  ]
37
 
38
- # 輸入處理
39
- pil_images = load_pil_images(conversation)
40
  prepare_inputs = vl_chat_processor(
41
  conversations=conversation,
42
- images=pil_images,
43
  force_batchify=True
44
  ).to(vl_gpt.device)
45
 
46
- # 🚨 正確 dtype 處理
47
- # 只將需要的 tensor 轉 float16,input_ids 必須是 long
48
  new_inputs = {}
49
  for k, v in prepare_inputs.items():
50
  if torch.is_tensor(v):
@@ -56,7 +52,7 @@ def chat_with_image(image, user_message):
56
  new_inputs[k] = v
57
  prepare_inputs = new_inputs
58
 
59
- # 取得輸入 embeddings
60
  inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs)
61
 
62
  # 生成回答
@@ -66,12 +62,11 @@ def chat_with_image(image, user_message):
66
  pad_token_id=tokenizer.eos_token_id,
67
  bos_token_id=tokenizer.bos_token_id,
68
  eos_token_id=tokenizer.eos_token_id,
69
- max_new_tokens=128, # 降低生成長度以減少記憶體
70
  do_sample=False,
71
  use_cache=True
72
  )
73
 
74
- # 解碼
75
  answer = tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True)
76
  return answer
77
 
 
2
  import gradio as gr
3
  from transformers import AutoModelForCausalLM, BitsAndBytesConfig
4
  from deepseek_vl.models import VLChatProcessor, MultiModalityCausalLM
 
5
 
6
  # 模型路徑
7
  model_path = "deepseek-ai/deepseek-vl-7b-chat"
 
28
  # ==== 單張圖片推理函式 ====
29
  def chat_with_image(image, user_message):
30
  try:
 
31
  conversation = [
32
  {"role": "User", "content": "<image_placeholder>" + user_message, "images": [image]},
33
  {"role": "Assistant", "content": ""}
34
  ]
35
 
36
+ # 直接傳入 PIL.Image,不再使用 load_pil_images
 
37
  prepare_inputs = vl_chat_processor(
38
  conversations=conversation,
39
+ images=[image],
40
  force_batchify=True
41
  ).to(vl_gpt.device)
42
 
43
+ # 正確 dtype 處理
 
44
  new_inputs = {}
45
  for k, v in prepare_inputs.items():
46
  if torch.is_tensor(v):
 
52
  new_inputs[k] = v
53
  prepare_inputs = new_inputs
54
 
55
+ # 取得 embeddings
56
  inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs)
57
 
58
  # 生成回答
 
62
  pad_token_id=tokenizer.eos_token_id,
63
  bos_token_id=tokenizer.bos_token_id,
64
  eos_token_id=tokenizer.eos_token_id,
65
+ max_new_tokens=128,
66
  do_sample=False,
67
  use_cache=True
68
  )
69
 
 
70
  answer = tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True)
71
  return answer
72