# python3 # Please install OpenAI SDK first:`pip3 install openai` from openai import OpenAI import json import os import numpy as np api_key = "sk-WVJp2orFuuvPTf5P5dD936B1De78421b9eEa2c99D70b8a06" base_url = "https://vip.yi-zhan.top/v1" model = "gpt-4o-2024-05-13" client = OpenAI(api_key=api_key, base_url="https://vip.yi-zhan.top/v1") def get_result(prompt): response = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": "You are a helpful assistant"}, {"role": "user", "content": prompt}, ], stream=False, temperature=1.25 ) return response.choices[0].message.content if __name__ == '__main__': output_path = "/mnt1/wjl/InternLM-XComposer/instruct_gen_v3/pwiseg/count_dataset_pwiseg_0710.json" with open("/mnt1/wjl/InternLM-XComposer/instruct_gen_v3/pwiseg/pwiseg_info.json", "r") as f: infos = json.load(f) prompt = """ ## Role - You are an AI visual assistant, and you are looking at a picture of many surgical tools. ## Information - You will receive a list of dictionaries of annotated tools that can be seen on the table. - Note that each dictionary contains "name":"bbox", "name is the name of the surgical tool that can be seen on the table, and "bbox" is the numerical value of the corresponding surgical tool position (top left x, top left y, bottom right x, bottom right y. - The list is as follows: ```json {position} ``` ## Task Based on the list, Your task is to generate several questions and corresponding answers about counting surgical tools on the table. ## Example: - "Question: How many scalpals are on the table? Answer: Two scalpels are on the table", - "Question: How many forceps are on the table? Answer: There is no forcep on the table", - "Question: How many surgical tools in total are on the table? Answer: There are 2 scalpals on the table and 3 tweezers on the table", ## Constraints - Remeber, all the questions must can be clearly answered based on the information of given lists. - Do not make up any questions and answers without solid evidence in the given lists. - Importantly, you do not need to give any reasoning process, just give a straightforward answers. - Do not use coordinates to generate answers and questions. Now take a deep breath and start your response step by step. """ # 读取已处理的数据,如果文件存在 if os.path.exists(output_path): with open(output_path, "r") as f: llava_dataset = json.load(f) else: llava_dataset = [] # 获取已处理的IDs processed_ids = [data["id"] for data in llava_dataset] count = len(processed_ids) count, total = 0, len(infos) img_path = '/mnt1/wjl/InternLM-XComposer/data/pwiseg/train/' for ix, (i, v) in enumerate(infos.items()): # import ipdb; ipdb.set_trace() # count = count + 1 if ix in processed_ids: continue # 跳过已处理的数据 llava_dict = {} info_dict = v final_prompt = prompt.format(position=info_dict) try: answer = get_result(final_prompt) #print(answer) llava_dict["id"] = ix llava_dict["image"] = os.path.join(img_path, i) llava_dict["caption"] = answer llava_dataset.append(llava_dict) #print(llava_dataset) print("## {count}/{all_number}, {answer}".format(count=ix, all_number=total, answer=answer)) with open(output_path,"w") as f: f.write(json.dumps(llava_dataset,indent=2)) except Exception as e: print(f"Error processing {i}: {e}") continue if count > 20: break