import os
import cv2
import json
import numpy as np

from PIL import Image, ImageDraw
from gpt4v import get_image_description

image_folder = 'data/VOI'

seqs = os.listdir(image_folder)
seqs.sort()
print(seqs)

trajectory_path = 'data/VOI-GT'

img_with_initial_point = 'data/VOI-initial-point'

# time synchronization and time statistics
import time
start = time.time()

for seq in seqs:
    print(f"Processing {seq}, {seqs.index(seq)+1}/{len(seqs)}")
    title = "Analyze the image to identify objects. First, identify the object segmented by a prominent color at the part level, then determine other objects it may contact. List only the categories of these objects as single words, separated by commas, with no additional text, symbols, or line breaks."
    seg_path = os.path.join(img_with_initial_point, seq + '_segmented_sam_b.jpg')
    description = get_image_description(seg_path, title)
    print(description)
    
    words = [word.strip() for word in description[0].replace('\n', '').replace(',', '').split()]

    #  Change the list of words to JSON format
    description_json = json.dumps({"classes": words})

    # print(description_json)
    save_path = os.path.join(image_folder,seq, 'objects_classes.json')
    with open(save_path, 'w') as f:
        f.write(description_json)


end = time.time()
print(f"Time: {end-start}")
print(f"Average time: {(end-start)/len(seqs)}")