Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,6 @@ import torch
|
|
2 |
import gradio as gr
|
3 |
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
|
4 |
from deepseek_vl.models import VLChatProcessor, MultiModalityCausalLM
|
5 |
-
from deepseek_vl.utils.io import load_pil_images
|
6 |
|
7 |
# 模型路徑
|
8 |
model_path = "deepseek-ai/deepseek-vl-7b-chat"
|
@@ -29,22 +28,19 @@ vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(
|
|
29 |
# ==== 單張圖片推理函式 ====
|
30 |
def chat_with_image(image, user_message):
|
31 |
try:
|
32 |
-
# 建立對話格式
|
33 |
conversation = [
|
34 |
{"role": "User", "content": "<image_placeholder>" + user_message, "images": [image]},
|
35 |
{"role": "Assistant", "content": ""}
|
36 |
]
|
37 |
|
38 |
-
#
|
39 |
-
pil_images = load_pil_images(conversation)
|
40 |
prepare_inputs = vl_chat_processor(
|
41 |
conversations=conversation,
|
42 |
-
images=
|
43 |
force_batchify=True
|
44 |
).to(vl_gpt.device)
|
45 |
|
46 |
-
#
|
47 |
-
# 只將需要的 tensor 轉 float16,input_ids 必須是 long
|
48 |
new_inputs = {}
|
49 |
for k, v in prepare_inputs.items():
|
50 |
if torch.is_tensor(v):
|
@@ -56,7 +52,7 @@ def chat_with_image(image, user_message):
|
|
56 |
new_inputs[k] = v
|
57 |
prepare_inputs = new_inputs
|
58 |
|
59 |
-
#
|
60 |
inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs)
|
61 |
|
62 |
# 生成回答
|
@@ -66,12 +62,11 @@ def chat_with_image(image, user_message):
|
|
66 |
pad_token_id=tokenizer.eos_token_id,
|
67 |
bos_token_id=tokenizer.bos_token_id,
|
68 |
eos_token_id=tokenizer.eos_token_id,
|
69 |
-
max_new_tokens=128,
|
70 |
do_sample=False,
|
71 |
use_cache=True
|
72 |
)
|
73 |
|
74 |
-
# 解碼
|
75 |
answer = tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True)
|
76 |
return answer
|
77 |
|
|
|
2 |
import gradio as gr
|
3 |
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
|
4 |
from deepseek_vl.models import VLChatProcessor, MultiModalityCausalLM
|
|
|
5 |
|
6 |
# 模型路徑
|
7 |
model_path = "deepseek-ai/deepseek-vl-7b-chat"
|
|
|
28 |
# ==== 單張圖片推理函式 ====
|
29 |
def chat_with_image(image, user_message):
|
30 |
try:
|
|
|
31 |
conversation = [
|
32 |
{"role": "User", "content": "<image_placeholder>" + user_message, "images": [image]},
|
33 |
{"role": "Assistant", "content": ""}
|
34 |
]
|
35 |
|
36 |
+
# 直接傳入 PIL.Image,不再使用 load_pil_images
|
|
|
37 |
prepare_inputs = vl_chat_processor(
|
38 |
conversations=conversation,
|
39 |
+
images=[image],
|
40 |
force_batchify=True
|
41 |
).to(vl_gpt.device)
|
42 |
|
43 |
+
# 正確 dtype 處理
|
|
|
44 |
new_inputs = {}
|
45 |
for k, v in prepare_inputs.items():
|
46 |
if torch.is_tensor(v):
|
|
|
52 |
new_inputs[k] = v
|
53 |
prepare_inputs = new_inputs
|
54 |
|
55 |
+
# 取得 embeddings
|
56 |
inputs_embeds = vl_gpt.prepare_inputs_embeds(**prepare_inputs)
|
57 |
|
58 |
# 生成回答
|
|
|
62 |
pad_token_id=tokenizer.eos_token_id,
|
63 |
bos_token_id=tokenizer.bos_token_id,
|
64 |
eos_token_id=tokenizer.eos_token_id,
|
65 |
+
max_new_tokens=128,
|
66 |
do_sample=False,
|
67 |
use_cache=True
|
68 |
)
|
69 |
|
|
|
70 |
answer = tokenizer.decode(outputs[0].cpu().tolist(), skip_special_tokens=True)
|
71 |
return answer
|
72 |
|