VRIS_vip / .history /mbench_a2d /gpt_a2d_numbered_20250205111640.py
dianecy's picture
Add files using upload-large-folder tool
3ec4928 verified
raw
history blame
2.75 kB
from datasets import build_dataset
import argparse
import opts
import sys
import os
import time
import numpy as np
import matplotlib.pyplot as plt
import cv2
from io import BytesIO
import base64
from PIL import Image
from openai import OpenAI
def mark_object_and_encode(frame, mask, instance_id, text_query, color_mask=False, label_number=False):
#๋งˆ์Šคํฌ ์ƒ‰์น ํ• ์ง€
if color_mask == True:
alpha = 0.1
colored_mask = np.zeros_like(frame)
colored_mask[mask == 1] = [255, 0, 0]
frame[mask == 1] = (
(1 - alpha) * frame[mask == 1] +
alpha * colored_mask[mask == 1]
)
#๋งˆ์Šคํฌ ์•„์›ƒ๋ผ์ธ ๊ทธ๋ฆฌ๊ธฐ
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(frame, contours, -1, [255, 0, 0], 2)
#instance_id ์ ์„์ง€
if label_number == True:
if len(contours) > 0:
largest_contour = max(contours, key=cv2.contourArea)
M = cv2.moments(largest_contour)
if M["m00"] != 0:
center_x = int(M["m10"] / M["m00"])
center_y = int(M["m01"] / M["m00"])
else:
center_x, center_y = 0, 0
font = cv2.FONT_HERSHEY_SIMPLEX
text = str(instance_id)
font_scale = 0.6
text_size = cv2.getTextSize(text, font, font_scale, 2)[0]
text_x = center_x - text_size[0] // 1 # ํ…์ŠคํŠธ์˜ ๊ฐ€๋กœ ์ค‘์‹ฌ
text_y = center_y
# text_y = center_y + text_size[1] // 2 # ํ…์ŠคํŠธ์˜ ์„ธ๋กœ ์ค‘์‹ฌ
# ํ…์ŠคํŠธ ๋ฐฐ๊ฒฝ ์‚ฌ๊ฐํ˜• ์ขŒํ‘œ ๊ณ„์‚ฐ
rect_start = (text_x - 5, text_y - text_size[1] - 5) # ๋ฐฐ๊ฒฝ ์‚ฌ๊ฐํ˜• ์ขŒ์ƒ๋‹จ
# rect_end = (text_x + text_size[0] + 5, text_y + 5)
rect_end = (text_x + text_size[0] + 5, text_y)
cv2.rectangle(frame, rect_start, rect_end, (0, 0, 0), -1)
cv2.putText(frame, text, (text_x, text_y), font, font_scale, (255, 255, 255), 2)
# plt.figure(figsize=(6, 10))
# plt.imshow(frame)
# plt.title(text_query)
# plt.tight_layout()
# plt.axis('off')
# plt.show()
buffer = BytesIO()
frame = Image.fromarray(frame)
frame.save(buffer, format='jpeg')
buffer.seek(0)
encoded_frame = base64.b64encode(buffer.read()).decode("utf-8")
return encoded_frame
if __name__ == "__main__":
parser = argparse.ArgumentParser('ReferFormer training and evaluation script', parents=[opts.get_args_parser()])
args = parser.parse_args()
train_dataset = build_dataset('a2d', image_set = 'train', args = args)
text_annotations = train_dataset.text_annotations