# python3
# Please install OpenAI SDK first：`pip3 install openai`
from openai import OpenAI
import json
import os
import numpy as np


QUESTION_HEAD = ["Describe the target object in the 3D scene concisely.",
"Provide a brief description of the given target object in the 3D scene.",
"Offer a succinct explanation of the target object in the 3D scene presented.",
"Summarize the visual content of the target object in the 3D scene.",
"Give a short and clear explanation of the previous target object in the 3D scene.",
"Share a concise interpretation of the target object in the 3D scene provided.",
"Present a compact description of the the target object's key features in the 3D scene.",
"Relay a brief, clear account of the target object shown in the 3D scene.",
"Render a clear and concise summary of the target object in the 3D scene.",
"Write a terse but informative summary of the target object in the 3D scene.",
"Create a compact narrative representing the target object in the 3D scene presented."]

api_key  = "sk-3I6se0vPc8lYCIXH8eDd7e1fBe6341Ae92025dBd8cF9A426"
base_url = "https://vip.yi-zhan.top/v1"
model    = "gpt-4o-2024-05-13" 
client = OpenAI(api_key=api_key, 
                base_url="https://vip.yi-zhan.top/v1")


def get_result(prompt):
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a helpful assistant"},
            {"role": "user", "content": prompt},
        ],
        stream=False,
        temperature=1.25
    )
    return response.choices[0].message.content


if __name__ == '__main__':
    output_path = "/mnt1/wjl/InternLM-XComposer/instruct_gen_v3/pwiseg/caption_dataset_pwiseg_0710.json"
    with open("/mnt1/wjl/InternLM-XComposer/instruct_gen_v3/pwiseg/pwiseg_info.json","r") as f:
        info = json.load(f)

    prompt = """
            ## Role
            - You are an AI visual assistant, and you are looking at a picture of many surgical tools.

            ## Information
            - You will receive a list of dictionaries of annotated tools that can be seen on the table.
            - Note that each dictionary contains "name":"bbox", "name is the name of the surgical tool that can be seen on the table, and "bbox" is the numerical value of the corresponding surgical tool position (top left x, top left y, bottom right x, bottom right y.
            - The list is as follows:
            ```json
            {position}
            ```

            ## Task
            - Your task is to generate a comprehensive description of the surgical table, including what is on the table, where they are, and their positional relationship.

            ## Constraints
            - Don't mention any specific numbers for its bounding box, using rough positions such as left, top right, etc.
            - Don't make up anything not mentioned in the list, just an objective and direct description.
            - The description should be more than 100 words and less than 150 words.

            Now take a deep breath and start your response step by step.
            """
    img_path = '/mnt1/wjl/InternLM-XComposer/data/pwiseg/train/'
    llava_dataset = []
    import ipdb

    # 读取已处理的数据，如果文件存在
    if os.path.exists(output_path):
        with open(output_path, "r") as f:
                llava_dataset = json.load(f)
    else:
        llava_dataset = []
    
    # 获取已处理的IDs
    processed_ids = [data["id"] for data in llava_dataset]

    count, all_number = 0, len(info)
    for ix, (i,v) in enumerate(info.items()):

        if ix in processed_ids:
                  continue  # 跳过已处理的数据
        
        llava_dict = {}
        info_dict = v

        final_prompt = prompt.format(position=info_dict)

        try:
            answer = get_result(final_prompt)
            print("## {count}/{all_number}, {answer}".format(count=ix,
                                                        all_number=all_number,
                                                        answer=answer))
            llava_dict["id"] = ix
            llava_dict["image"] = os.path.join(img_path, i)
            llava_dict["caption"] = answer

            llava_dataset.append(llava_dict)
            #print(llava_dataset)
            with open(output_path,"w") as f:
                f.write(json.dumps(llava_dataset,indent=2))
        
        except Exception as e:
                 print(f"Error processing {i}: {e}")
                 continue