Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,7 @@ import torchvision.transforms as T
|
|
| 10 |
from PIL import Image
|
| 11 |
from torchvision.transforms.functional import InterpolationMode
|
| 12 |
from transformers import AutoModel, AutoTokenizer
|
|
|
|
| 13 |
|
| 14 |
from threading import Thread
|
| 15 |
import re
|
|
@@ -91,8 +92,31 @@ def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbna
|
|
| 91 |
processed_images.append(thumbnail_img)
|
| 92 |
return processed_images
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
def load_image(image_file, input_size=448, max_num=12):
|
| 95 |
-
image =
|
| 96 |
print("Image size: ", image.size)
|
| 97 |
transform = build_transform(input_size=input_size)
|
| 98 |
images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
|
|
@@ -107,8 +131,7 @@ model = AutoModel.from_pretrained(
|
|
| 107 |
trust_remote_code=True,
|
| 108 |
).eval().cuda()
|
| 109 |
tokenizer = AutoTokenizer.from_pretrained("5CD-AI/Vintern-1B-v3_5", trust_remote_code=True, use_fast=False)
|
| 110 |
-
|
| 111 |
-
|
| 112 |
@spaces.GPU
|
| 113 |
def chat(message, history):
|
| 114 |
print("history",history)
|
|
@@ -133,7 +156,7 @@ We currently only support one image at the start of the context! Please start a
|
|
| 133 |
pixel_values = None
|
| 134 |
|
| 135 |
|
| 136 |
-
generation_config = dict(max_new_tokens=
|
| 137 |
|
| 138 |
if len(history) == 0:
|
| 139 |
if pixel_values is not None:
|
|
|
|
| 10 |
from PIL import Image
|
| 11 |
from torchvision.transforms.functional import InterpolationMode
|
| 12 |
from transformers import AutoModel, AutoTokenizer
|
| 13 |
+
from PIL import Image, ExifTags
|
| 14 |
|
| 15 |
from threading import Thread
|
| 16 |
import re
|
|
|
|
| 92 |
processed_images.append(thumbnail_img)
|
| 93 |
return processed_images
|
| 94 |
|
| 95 |
+
def correct_image_orientation(image_path):
|
| 96 |
+
# Mở ảnh
|
| 97 |
+
image = Image.open(image_path)
|
| 98 |
+
|
| 99 |
+
# Kiểm tra dữ liệu Exif (nếu có)
|
| 100 |
+
try:
|
| 101 |
+
exif = image._getexif()
|
| 102 |
+
if exif is not None:
|
| 103 |
+
for tag, value in exif.items():
|
| 104 |
+
if ExifTags.TAGS.get(tag) == "Orientation":
|
| 105 |
+
# Sửa hướng dựa trên Orientation
|
| 106 |
+
if value == 3:
|
| 107 |
+
image = image.rotate(180, expand=True)
|
| 108 |
+
elif value == 6:
|
| 109 |
+
image = image.rotate(-90, expand=True)
|
| 110 |
+
elif value == 8:
|
| 111 |
+
image = image.rotate(90, expand=True)
|
| 112 |
+
break
|
| 113 |
+
except Exception as e:
|
| 114 |
+
print("Không thể xử lý Exif:", e)
|
| 115 |
+
|
| 116 |
+
return image
|
| 117 |
+
|
| 118 |
def load_image(image_file, input_size=448, max_num=12):
|
| 119 |
+
image = correct_image_orientation(image_file).convert('RGB')
|
| 120 |
print("Image size: ", image.size)
|
| 121 |
transform = build_transform(input_size=input_size)
|
| 122 |
images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
|
|
|
|
| 131 |
trust_remote_code=True,
|
| 132 |
).eval().cuda()
|
| 133 |
tokenizer = AutoTokenizer.from_pretrained("5CD-AI/Vintern-1B-v3_5", trust_remote_code=True, use_fast=False)
|
| 134 |
+
|
|
|
|
| 135 |
@spaces.GPU
|
| 136 |
def chat(message, history):
|
| 137 |
print("history",history)
|
|
|
|
| 156 |
pixel_values = None
|
| 157 |
|
| 158 |
|
| 159 |
+
generation_config = dict(max_new_tokens= 700, do_sample=False, num_beams = 3, repetition_penalty=2.5)
|
| 160 |
|
| 161 |
if len(history) == 0:
|
| 162 |
if pixel_values is not None:
|