|
import os |
|
import sys |
|
from os import path as osp |
|
from io import BytesIO |
|
|
|
from mbench.ytvos_ref import build as build_ytvos_ref |
|
import argparse |
|
import opts |
|
|
|
import sys |
|
from pathlib import Path |
|
import os |
|
from os import path as osp |
|
import skimage |
|
from io import BytesIO |
|
|
|
import numpy as np |
|
import pandas as pd |
|
import regex as re |
|
import json |
|
|
|
import cv2 |
|
from PIL import Image, ImageDraw |
|
import torch |
|
from torchvision.transforms import functional as F |
|
|
|
from skimage import measure |
|
from shapely.geometry import Polygon, MultiPolygon |
|
|
|
import matplotlib.pyplot as plt |
|
import matplotlib.patches as patches |
|
from matplotlib.collections import PatchCollection |
|
from matplotlib.patches import Rectangle |
|
import textwrap |
|
|
|
|
|
import ipywidgets as widgets |
|
from IPython.display import display, clear_output |
|
|
|
from openai import OpenAI |
|
import base64 |
|
|
|
def number_objects_and_encode(idx, color_mask=False): |
|
encoded_frames = {} |
|
contoured_frames = {} |
|
vid_cat_cnts = {} |
|
|
|
vid_meta = metas[idx] |
|
vid_data = train_dataset[idx] |
|
vid_id = vid_meta['video'] |
|
frame_indx = vid_meta['sample_indx'] |
|
cat_names = set(vid_meta['obj_id_cat'].values()) |
|
imgs = vid_data[0] |
|
|
|
for cat in cat_names: |
|
cat_frames = [] |
|
contour_frames = [] |
|
frame_cat_cnts = {} |
|
|
|
for i in range(imgs.size(0)): |
|
frame_name = frame_indx[i] |
|
frame = np.copy(imgs[i].permute(1, 2, 0).numpy()) |
|
frame_for_contour = np.copy(imgs[i].permute(1, 2, 0).numpy()) |
|
|
|
frame_data = vid_data[2][frame_name] |
|
obj_ids = list(frame_data.keys()) |
|
|
|
cat_cnt = 0 |
|
|
|
for j in range(len(obj_ids)): |
|
obj_id = obj_ids[j] |
|
obj_data = frame_data[obj_id] |
|
obj_bbox = obj_data['bbox'] |
|
obj_valid = obj_data['valid'] |
|
obj_mask = obj_data['mask'].numpy().astype(np.uint8) |
|
obj_cat = obj_data['category_name'] |
|
|
|
if obj_cat == cat and obj_valid: |
|
cat_cnt += 1 |
|
|
|
if color_mask == False: |
|
contours, _ = cv2.findContours(obj_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|
cv2.drawContours(frame, contours, -1, colors[j], 3) |
|
for i, contour in enumerate(contours): |
|
|
|
moments = cv2.moments(contour) |
|
if moments["m00"] != 0: |
|
cx = int(moments["m10"] / moments["m00"]) |
|
cy = int(moments["m01"] / moments["m00"]) |
|
else: |
|
cx, cy = contour[0][0] |
|
|
|
|
|
font = cv2.FONT_HERSHEY_SIMPLEX |
|
text = obj_id |
|
text_size = cv2.getTextSize(text, font, 1, 2)[0] |
|
text_w, text_h = text_size |
|
|
|
|
|
cv2.rectangle(frame, (cx - text_w // 2 - 5, cy - text_h // 2 - 5), |
|
(cx + text_w // 2 + 5, cy + text_h // 2 + 5), (0, 0, 0), -1) |
|
|
|
|
|
cv2.putText(frame, text, (cx - text_w // 2, cy + text_h // 2), |
|
font, 1, (255, 255, 255), 2) |
|
|
|
else: |
|
alpha = 0.08 |
|
|
|
colored_obj_mask = np.zeros_like(frame) |
|
colored_obj_mask[obj_mask == 1] = colors[j] |
|
frame[obj_mask == 1] = ( |
|
(1 - alpha) * frame[obj_mask == 1] |
|
+ alpha * colored_obj_mask[obj_mask == 1] |
|
) |
|
|
|
|
|
contours, _ = cv2.findContours(obj_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|
cv2.drawContours(frame, contours, -1, colors[j], 2) |
|
cv2.drawContours(frame_for_contour, contours, -1, colors[j], 2) |
|
|
|
|
|
|
|
if len(contours) > 0: |
|
largest_contour = max(contours, key=cv2.contourArea) |
|
M = cv2.moments(largest_contour) |
|
if M["m00"] != 0: |
|
center_x = int(M["m10"] / M["m00"]) |
|
center_y = int(M["m01"] / M["m00"]) |
|
else: |
|
center_x, center_y = 0, 0 |
|
|
|
font = cv2.FONT_HERSHEY_SIMPLEX |
|
text = obj_id |
|
|
|
font_scale = 0.9 |
|
text_size = cv2.getTextSize(text, font, font_scale, 2)[0] |
|
text_x = center_x - text_size[0] // 1 |
|
text_y = center_y |
|
|
|
|
|
|
|
rect_start = (text_x - 5, text_y - text_size[1] - 5) |
|
|
|
rect_end = (text_x + text_size[0] + 5, text_y) |
|
|
|
cv2.rectangle(frame, rect_start, rect_end, (0, 0, 0), -1) |
|
cv2.putText(frame, text, (text_x, text_y), font, 1, (255, 255, 255), 2) |
|
|
|
plt.figure(figsize=(12, 8)) |
|
plt.imshow(frame) |
|
plt.title(f"frame {frame_name}") |
|
plt.tight_layout() |
|
plt.axis('off') |
|
plt.show() |
|
|
|
buffer = BytesIO() |
|
frame = Image.fromarray(frame) |
|
frame.save(buffer, format='jpeg') |
|
buffer.seek(0) |
|
cat_frames.append(base64.b64encode(buffer.read()).decode("utf-8")) |
|
frame_cat_cnts[frame_name] = cat_cnt |
|
|
|
buffer.seek(0) |
|
buffer.truncate() |
|
frame_for_contour = Image.fromarray(frame_for_contour) |
|
frame_for_contour.save(buffer, format='jpeg') |
|
buffer.seek(0) |
|
contour_frames.append(base64.b64encode(buffer.read()).decode("utf-8")) |
|
|
|
encoded_frames[cat] = cat_frames |
|
contoured_frames[cat] = contour_frames |
|
vid_cat_cnts[cat] = frame_cat_cnts |
|
|
|
return encoded_frames, vid_cat_cnts, contoured_frames |
|
|
|
if __name__ == '__main__': |
|
parser = argparse.ArgumentParser('ReferFormer training and evaluation script', parents=[opts.get_args_parser()]) |
|
args = parser.parse_args() |
|
|
|
|
|
|
|
train_dataset = build_ytvos_ref(image_set = 'train', args = args) |
|
|
|
|
|
metas = train_dataset.metas |
|
|
|
|
|
colors = [ |
|
(255, 0, 0), |
|
(0, 255, 0), |
|
(0, 0, 255), |
|
(255, 255, 0), |
|
(255, 0, 255), |
|
(0, 255, 255), |
|
(128, 0, 128), |
|
(255, 165, 0) |
|
] |
|
|
|
|