|
|
|
|
|
from openai import OpenAI |
|
import json |
|
import os |
|
import numpy as np |
|
|
|
|
|
api_key = "sk-WVJp2orFuuvPTf5P5dD936B1De78421b9eEa2c99D70b8a06" |
|
base_url = "https://vip.yi-zhan.top/v1" |
|
model = "gpt-4o-2024-05-13" |
|
client = OpenAI(api_key=api_key, |
|
base_url="https://vip.yi-zhan.top/v1") |
|
|
|
|
|
def get_result(prompt): |
|
response = client.chat.completions.create( |
|
model=model, |
|
messages=[ |
|
{"role": "system", "content": "You are a helpful assistant"}, |
|
{"role": "user", "content": prompt}, |
|
], |
|
stream=False, |
|
temperature=1.25 |
|
) |
|
return response.choices[0].message.content |
|
|
|
|
|
if __name__ == '__main__': |
|
output_path = "/mnt1/wjl/InternLM-XComposer/instruct_gen_v3/pwiseg/count_dataset_pwiseg_0710.json" |
|
with open("/mnt1/wjl/InternLM-XComposer/instruct_gen_v3/pwiseg/pwiseg_info.json", "r") as f: |
|
infos = json.load(f) |
|
|
|
prompt = """ |
|
## Role |
|
- You are an AI visual assistant, and you are looking at a picture of many surgical tools. |
|
|
|
## Information |
|
- You will receive a list of dictionaries of annotated tools that can be seen on the table. |
|
- Note that each dictionary contains "name":"bbox", "name is the name of the surgical tool that can be seen on the table, and "bbox" is the numerical value of the corresponding surgical tool position (top left x, top left y, bottom right x, bottom right y. |
|
- The list is as follows: |
|
```json |
|
{position} |
|
``` |
|
|
|
## Task |
|
Based on the list, Your task is to generate several questions and corresponding answers about counting surgical tools on the table. |
|
|
|
## Example: |
|
- "Question: How many scalpals are on the table? Answer: Two scalpels are on the table", |
|
- "Question: How many forceps are on the table? Answer: There is no forcep on the table", |
|
- "Question: How many surgical tools in total are on the table? Answer: There are 2 scalpals on the table and 3 tweezers on the table", |
|
|
|
## Constraints |
|
- Remeber, all the questions must can be clearly answered based on the information of given lists. |
|
- Do not make up any questions and answers without solid evidence in the given lists. |
|
- Importantly, you do not need to give any reasoning process, just give a straightforward answers. |
|
- Do not use coordinates to generate answers and questions. |
|
|
|
Now take a deep breath and start your response step by step. |
|
""" |
|
|
|
|
|
if os.path.exists(output_path): |
|
with open(output_path, "r") as f: |
|
llava_dataset = json.load(f) |
|
else: |
|
llava_dataset = [] |
|
|
|
|
|
processed_ids = [data["id"] for data in llava_dataset] |
|
count = len(processed_ids) |
|
count, total = 0, len(infos) |
|
img_path = '/mnt1/wjl/InternLM-XComposer/data/pwiseg/train/' |
|
|
|
for ix, (i, v) in enumerate(infos.items()): |
|
|
|
|
|
|
|
if ix in processed_ids: |
|
continue |
|
|
|
llava_dict = {} |
|
|
|
info_dict = v |
|
|
|
final_prompt = prompt.format(position=info_dict) |
|
|
|
try: |
|
answer = get_result(final_prompt) |
|
|
|
llava_dict["id"] = ix |
|
llava_dict["image"] = os.path.join(img_path, i) |
|
llava_dict["caption"] = answer |
|
llava_dataset.append(llava_dict) |
|
|
|
print("## {count}/{all_number}, {answer}".format(count=ix, |
|
all_number=total, |
|
answer=answer)) |
|
|
|
with open(output_path,"w") as f: |
|
f.write(json.dumps(llava_dataset,indent=2)) |
|
except Exception as e: |
|
print(f"Error processing {i}: {e}") |
|
continue |
|
|
|
if count > 20: |
|
break |
|
|
|
|
|
|