UHow
/

Image-to-Video
C-Drag / OPM /2-recognize_objects.py
UHow's picture
Upload 12 files
24e5d63 verified
import os
import cv2
import json
import numpy as np
from PIL import Image, ImageDraw
from gpt4v import get_image_description
image_folder = 'data/VOI'
seqs = os.listdir(image_folder)
seqs.sort()
print(seqs)
trajectory_path = 'data/VOI-GT'
img_with_initial_point = 'data/VOI-initial-point'
# time synchronization and time statistics
import time
start = time.time()
for seq in seqs:
print(f"Processing {seq}, {seqs.index(seq)+1}/{len(seqs)}")
title = "Analyze the image to identify objects. First, identify the object segmented by a prominent color at the part level, then determine other objects it may contact. List only the categories of these objects as single words, separated by commas, with no additional text, symbols, or line breaks."
seg_path = os.path.join(img_with_initial_point, seq + '_segmented_sam_b.jpg')
description = get_image_description(seg_path, title)
print(description)
words = [word.strip() for word in description[0].replace('\n', '').replace(',', '').split()]
# Change the list of words to JSON format
description_json = json.dumps({"classes": words})
# print(description_json)
save_path = os.path.join(image_folder,seq, 'objects_classes.json')
with open(save_path, 'w') as f:
f.write(description_json)
end = time.time()
print(f"Time: {end-start}")
print(f"Average time: {(end-start)/len(seqs)}")