|
from datasets import build_dataset |
|
import argparse |
|
import opts |
|
|
|
import sys |
|
import os |
|
import time |
|
|
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import cv2 |
|
from io import BytesIO |
|
import base64 |
|
from PIL import Image |
|
|
|
from openai import OpenAI |
|
|
|
def mark_object_and_encode(frame, mask, instance_id, text_query, color_mask=False, label_number=False): |
|
|
|
if color_mask == True: |
|
alpha = 0.1 |
|
|
|
colored_mask = np.zeros_like(frame) |
|
colored_mask[mask == 1] = [255, 0, 0] |
|
frame[mask == 1] = ( |
|
(1 - alpha) * frame[mask == 1] + |
|
alpha * colored_mask[mask == 1] |
|
) |
|
|
|
|
|
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|
cv2.drawContours(frame, contours, -1, [255, 0, 0], 2) |
|
|
|
|
|
if label_number == True: |
|
if len(contours) > 0: |
|
largest_contour = max(contours, key=cv2.contourArea) |
|
M = cv2.moments(largest_contour) |
|
if M["m00"] != 0: |
|
center_x = int(M["m10"] / M["m00"]) |
|
center_y = int(M["m01"] / M["m00"]) |
|
else: |
|
center_x, center_y = 0, 0 |
|
|
|
font = cv2.FONT_HERSHEY_SIMPLEX |
|
text = str(instance_id) |
|
font_scale = 0.6 |
|
text_size = cv2.getTextSize(text, font, font_scale, 2)[0] |
|
text_x = center_x - text_size[0] // 1 |
|
text_y = center_y |
|
|
|
|
|
|
|
rect_start = (text_x - 5, text_y - text_size[1] - 5) |
|
|
|
rect_end = (text_x + text_size[0] + 5, text_y) |
|
|
|
cv2.rectangle(frame, rect_start, rect_end, (0, 0, 0), -1) |
|
cv2.putText(frame, text, (text_x, text_y), font, font_scale, (255, 255, 255), 2) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
buffer = BytesIO() |
|
frame = Image.fromarray(frame) |
|
frame.save(buffer, format='jpeg') |
|
buffer.seek(0) |
|
encoded_frame = base64.b64encode(buffer.read()).decode("utf-8") |
|
|
|
return encoded_frame |
|
|
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser('ReferFormer training and evaluation script', parents=[opts.get_args_parser()]) |
|
args = parser.parse_args() |
|
|
|
train_dataset = build_dataset('a2d', image_set = 'train', args = args) |
|
text_annotations = train_dataset.text_annotations |