lyclyc52
/

llava_finetune

Model card Files Files and versions Community

lyclyc52 commited on Jul 24, 2024

Commit

f168581

1 Parent(s): 157f5b2

Update: add distributed inference code

Browse files

Files changed (8) hide show

distributed_inference/distribute_llama_caption_generation.py +94 -0
distributed_inference/distribute_llama_question_generation.py +91 -0
distributed_inference/distribute_llama_rephrase.py +86 -0
distributed_inference/gpt4.py +102 -0
distributed_inference/gpt_generate_caption_pwiseg.py +111 -0
distributed_inference/gpt_generate_count_pwiseg.py +111 -0
distributed_inference/prompt_config.py +157 -0
distributed_inference/utils.py +131 -0

distributed_inference/distribute_llama_caption_generation.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import torch
+from accelerate import PartialState
+import transformers
+import torch
+import json
+from tqdm import tqdm
+import os
+from distributed_inference.utils import caption_repharse
+import random
+from distributed_inference.prompt_config import prompt_dict, instruction_dict
+def main(num_gpu = 1):
+    distributed_state = PartialState()
+    model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+    pipeline = transformers.pipeline(
+        "text-generation",
+        model=model_id,
+        model_kwargs={"torch_dtype": torch.bfloat16},
+        device=distributed_state.device
+    )
+    instruction = instruction_dict['general']
+    user_prompt = prompt_dict['caption_generation']
+    # file_list = [ '4dor_count_dataset_0702.json', '4dor_phase_dataset_0702.json']
+    # file_list = ['4dor_recognition_dataset_0702.json']
+    file_list = ['./pwiseg_info.json']
+    img_path = '/mnt1/wjl/InternLM-XComposer/data/pwiseg/train/'
+    for file in file_list:
+        with open(file, 'r') as f:
+            data = json.load(f)
+        data_keys = list(data.keys())
+        data_keys.sort()
+        output_name = '4dor_caption_dataset_pwiseg_llama3.json'
+        output_data = []
+        if os.path.exists(output_name):
+            with open(output_name, 'r') as f:
+                output_data = json.load(f)
+        start_index = len(output_data)
+        for i in tqdm(range(start_index, len(data_keys), num_gpu)):
+            tempature = random.uniform(0.8, 0.9)
+            top_p = random.uniform(0.7, 0.9)
+            indices = list(range(i, min(i+num_gpu, len(data_keys))))
+            input_list = [data[data_keys[j]] for j in indices]
+            temp_output = {}
+            with distributed_state.split_between_processes(input_list, apply_padding=True) as s_input:
+                messages = [
+                        {"role": "system", "content": instruction},
+                        {"role": "user", "content": user_prompt.format(position=s_input)},
+                    ]
+                prompt = pipeline.tokenizer.apply_chat_template(
+                        messages,
+                        tokenize=False,
+                            add_generation_prompt=True
+                    )
+                terminators = [
+                    pipeline.tokenizer.eos_token_id,
+                    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
+                ]
+                outputs = pipeline(
+                    prompt,
+                    max_new_tokens=500,
+                    eos_token_id=terminators,
+                    do_sample=True,
+                    temperature=tempature,
+                    top_p=top_p,
+                )
+                results = outputs[0]["generated_text"][len(prompt):]
+                results = caption_repharse(results)
+                with open(f'{distributed_state.process_index}.json', 'w') as f:
+                    json.dump(results, f, indent=4)
+            distributed_state.wait_for_everyone()
+            if distributed_state.is_last_process:
+                for j in range(len(indices)):
+                    with open(f'{j}.json', 'r') as f:
+                        temp_output = json.load(f)
+                        llava_dict = {}
+                        llava_dict["id"] = data_keys[indices[j]]
+                        llava_dict["image"] = os.path.join(img_path, data_keys[indices[j]])
+                        llava_dict["caption"] = temp_output
+                    output_data.append(llava_dict)
+                with open(output_name, 'w') as f:
+                    json.dump(output_data, f, indent=4)
+            distributed_state.wait_for_everyone()
+    for j in range(num_gpu):
+        if os.path.exists(f'{j}.json'):
+            os.remove(f'{j}.json')
+if __name__ == '__main__':
+    main(num_gpu = 4)

distributed_inference/distribute_llama_question_generation.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import torch
+from accelerate import PartialState
+import transformers
+import torch
+import json
+from tqdm import tqdm
+import os
+from distributed_inference.utils import repharse
+from distributed_inference.prompt_config import prompt_dict
+def main(num_gpu = 1):
+    distributed_state = PartialState()
+    model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+    pipeline = transformers.pipeline(
+        "text-generation",
+        model=model_id,
+        model_kwargs={"torch_dtype": torch.bfloat16},
+        device=distributed_state.device
+    )
+    instruction = ' You are an AI visual assistant, and you are looking at a picture of many surgical tools.'
+    user_prompt = prompt_dict['question_generation_with_ex']
+    # file_list = [ '4dor_count_dataset_0702.json', '4dor_phase_dataset_0702.json']
+    # file_list = ['4dor_recognition_dataset_0702.json']
+    file_list = ['./pwiseg_info.json']
+    img_path = '/mnt1/wjl/InternLM-XComposer/data/pwiseg/train/'
+    for file in file_list:
+        with open(file, 'r') as f:
+            data = json.load(f)
+        data_keys = list(data.keys())
+        data_keys.sort()
+        output_name = '4dor_count_dataset_pwiseg_llama3.json'
+        output_data = []
+        if os.path.exists(output_name):
+            with open(output_name, 'r') as f:
+                output_data = json.load(f)
+        start_index = len(output_data)
+        print(start_index)
+        for i in tqdm(range(start_index, len(data_keys), num_gpu)):
+            indices = list(range(i, min(i+num_gpu, len(data_keys))))
+            input_list = [data[data_keys[j]] for j in indices]
+            temp_output = {}
+            with distributed_state.split_between_processes(input_list, apply_padding=True) as s_input:
+                messages = [
+                        {"role": "system", "content": instruction},
+                        {"role": "user", "content": user_prompt.format(position=s_input)},
+                    ]
+                prompt = pipeline.tokenizer.apply_chat_template(
+                        messages,
+                        tokenize=False,
+                            add_generation_prompt=True
+                    )
+                terminators = [
+                    pipeline.tokenizer.eos_token_id,
+                    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
+                ]
+                outputs = pipeline(
+                    prompt,
+                    max_new_tokens=500,
+                    eos_token_id=terminators,
+                    do_sample=True,
+                    temperature=0.6,
+                    top_p=0.9,
+                )
+                results = outputs[0]["generated_text"][len(prompt):]
+                results = repharse(results)
+                with open(f'{distributed_state.process_index}.json', 'w') as f:
+                    json.dump(results, f, indent=4)
+            distributed_state.wait_for_everyone()
+            if distributed_state.is_last_process:
+                for j in range(len(indices)):
+                    with open(f'{j}.json', 'r') as f:
+                        temp_output = json.load(f)
+                        llava_dict = {}
+                        llava_dict["id"] = data_keys[indices[j]]
+                        llava_dict["image"] = os.path.join(img_path, data_keys[indices[j]])
+                        llava_dict["caption"] = temp_output
+                    output_data.append(llava_dict)
+                with open(output_name, 'w') as f:
+                    json.dump(output_data, f, indent=4)
+            distributed_state.wait_for_everyone()
+        for j in range(len(indices)):
+            if os.path.exists(f'{j}.json'):
+                os.remove(f'{j}.json')
+if __name__ == '__main__':
+    main(num_gpu = 4)

distributed_inference/distribute_llama_rephrase.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import torch
+from accelerate import PartialState
+import transformers
+import torch
+import json
+from tqdm import tqdm
+import os
+from distributed_inference.utils import repharse
+from distributed_inference.prompt_config import prompt_dict
+def main(num_gpu = 1):
+    distributed_state = PartialState()
+    model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+    pipeline = transformers.pipeline(
+        "text-generation",
+        model=model_id,
+        model_kwargs={"torch_dtype": torch.bfloat16},
+        device=distributed_state.device
+    )
+    instruction = prompt_dict['rephrase_with_ex']
+    # file_list = [ '4dor_count_dataset_0702.json', '4dor_phase_dataset_0702.json']
+    # file_list = ['4dor_recognition_dataset_0702.json']
+    file_list = ['./data/count_dataset_pwiseg_0710.json']
+    for file in file_list:
+        with open(file, 'r') as f:
+            data = json.load(f)
+        output_name = file.replace('.json', '_rephrased.json')
+        existing_data = []
+        if os.path.exists(output_name):
+            with open(output_name, 'r') as f:
+                existing_data = json.load(f)
+        data[:len(existing_data)] = existing_data
+        start_index = len(existing_data)
+        for i in tqdm(range(start_index, len(data), num_gpu)):
+            indices = list(range(i, min(i+num_gpu, len(data))))
+            caption_list = [data[j]['caption'] for j in indices]
+            temp_output = {}
+            with distributed_state.split_between_processes(caption_list, apply_padding=True) as caption:
+                messages = [
+                        {"role": "system", "content": instruction},
+                        {"role": "user", "content": caption},
+                    ]
+                prompt = pipeline.tokenizer.apply_chat_template(
+                        messages,
+                        tokenize=False,
+                            add_generation_prompt=True
+                    )
+                terminators = [
+                    pipeline.tokenizer.eos_token_id,
+                    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
+                ]
+                outputs = pipeline(
+                    prompt,
+                    max_new_tokens=500,
+                    eos_token_id=terminators,
+                    do_sample=True,
+                    temperature=0.6,
+                    top_p=0.9,
+                )
+                results = outputs[0]["generated_text"][len(prompt):]
+                output_data = repharse(results)
+                with open(f'{distributed_state.process_index}.json', 'w') as f:
+                    json.dump(output_data, f, indent=4)
+            distributed_state.wait_for_everyone()
+            if distributed_state.is_last_process:
+                output_data = data[:indices[-1] + 1].copy()
+                for j in range(len(indices)):
+                    with open(f'{j}.json', 'r') as f:
+                        temp_output = json.load(f)
+                    output_data[j + i]['caption'] = temp_output
+                with open(output_name, 'w') as f:
+                    json.dump(output_data, f, indent=4)
+            distributed_state.wait_for_everyone()
+            for j in range(len(indices)):
+                os.remove(f'{j}.json')
+if __name__ == '__main__':
+    main(num_gpu = 4)

distributed_inference/gpt4.py ADDED Viewed

	@@ -0,0 +1,102 @@

+from openai import OpenAI
+from utils import repharse
+from tqdm import tqdm
+import json
+import os
+def get_result(prompt, instruction, client, base64_image=None, ):
+    response = client.chat.completions.create(
+        model="gpt-4o",
+        messages=[
+            {"role": "system", "content": instruction},
+            {"role": "user", "content":
+            [
+                {"type": "text", "text": prompt}
+            ]
+            },
+        ],
+        stream=False,
+        temperature=0.0
+    )
+    return response.choices[0].message.content
+def get_result_video(prompt, instruction, client, base64_image=None, ):
+    response = client.chat.completions.create(
+            model="gpt-4o",
+            messages=[
+            {"role": "system", "content": "You are generating a video summary. Please provide a summary of the video. Respond in Markdown."},
+            {"role": "user", "content": [
+                "These are the frames from the video.",
+                *map(lambda x: {"type": "image_url",
+                                "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames)
+                ],
+            }
+            ],
+            temperature=0,
+        )
+def main():
+    instruction = f'Given the some sentences, rewrite it into a regular format. It contain some pairs of questions and answers. For each pair, the output should be formulated as: \n' + \
+                    'Q: <question>\n' + \
+                    'A: <answer>\n' + \
+                'Please do not output anything else. The output should only contain the pairs of questions and answers. \n' + \
+                'Here are some examples.\n' +\
+                'Example 1: \n' + \
+                'Input: \n' + \
+                "### Question 1:\nWhat equipment can be found in the operating room?\n\n**Answer:** The operating room contains anesthesia equipment, an instrument table, and a secondary table.\n\n---\n\n### Question 2:\nWhat is positioned in the center of the operating room?\n\n**Answer:** There's an instrument table in the center of the operating room. It is typically used to keep surgical instruments in a readily accessible location doing to an operation.\n\n---\n\n### Question 3:\nIs there any anesthesia equipment present in the operating room?\n\n**Answer:** Yes, there is anesthesia equipment present in the operating room, which is used to administer anesthesia to patients in order to keep them unconscious or pain-free during surgical procedures.\n" + \
+                'Output: \n' + \
+                'Q: What equipment can be found in the operating room?\n' + \
+                'A: The operating room contains anesthesia equipment, an instrument table, and a secondary table.\n\n' + \
+                'Q: What is positioned in the center of the operating room?\n' + \
+                'A: There\'s an instrument table in the center of the operating room. It is typically used to keep surgical instruments in a readily accessible location doing to an operation.\n\n' + \
+                'Q: Is there any anesthesia equipment present in the operating room?\n' + \
+                'A: Yes, there is anesthesia equipment present in the operating room, which is used to administer anesthesia to patients in order to keep them unconscious or pain-free during surgical procedures.\n' + \
+                'Example 2: \n' + \
+                'Input: \n' + \
+                "Sure, I'll describe the current phase and generate related questions and answers that pertain to maintaining the sterile field during surgery.\n\n### Description\nIn the image, the medical staff are wearing blue sterile gowns, gloves, masks, and caps, indicating adherence to strict aseptic techniques. There is equipment carefully covered with sterile coverings, and positions are maintained near the required instruments such as the instrument table and anesthesia equipment. Staff members seem to be in diligent preparation and monitoring phases prior to more active surgical intervention.\n\n### Questions and Answers about the Sterile Phase\n\n#### Question 1:\nWhat is the current surgical phase, and how should it be maintained?\n##### Answer:\nThe current surgical phase is sterile. It is crucial to maintain strict aseptic techniques: all personnel must continue to wear sterile gowns, gloves, and masks while thoroughly disinfecting the surgical site using sterilized instruments and materials. The team should minimize unnecessary movement and contact to reduce contamination risks. Clear communication within the team is important to promptly address any breaches in sterility.\n\n#### Question 2:\nWhy is it essential to maintain the sterile phase during surgery?\n##### Answer:\nMaintaining the sterile phase during surgery is essential to prevent infections. It ensures the surgical site and instruments remain free from harmful bacteria and other microorganisms. Proper sterilization reduces the risk of postoperative complications, speeds up patient recovery, and enhances the overall success of the surgical procedure.\n\n#### Question 3:\nWhat suggestions could help surgeons improve the quality of surgery during the sterile phase?\n##### Answer:\nTo improve the quality of surgery during the sterile phase, it is important to:\n1. Ensure a thorough hand washing and use of sterilizing solutions before scrubbing in.\n2. Use sterile drapes to isolate the surgical site efficiently.\n3. Maintain constant surveillance of sterilization procedures.\n4. Label and organize sterile instruments properly on the instrument table.\n5. Implement a clear protocol for notifying the team if the sterile field is compromised, so corrective actions can be taken immediately.\n6. Avoid over-cluttering the sterile field with non-essential equipment.\n\nBy following these suggestions, the sterility of the procedure can be maintained, resulting in higher surgery quality and better patient outcomes." + \
+                'Output: \n' + \
+                'Q: What is the current surgical phase, and how should it be maintained?\n' + \
+                'A: The current surgical phase is sterile. It is crucial to maintain strict aseptic techniques: all personnel must continue to wear sterile gowns, gloves, and masks while thoroughly disinfecting the surgical site using sterilized instruments and materials. The team should minimize unnecessary movement and contact to reduce contamination risks. Clear communication within the team is important to promptly address any breaches in sterility.\n\n' + \
+                'Q: Why is it essential to maintain the sterile phase during surgery?\n' + \
+                'A: Maintaining the sterile phase during surgery is essential to prevent infections. It ensures the surgical site and instruments remain free from harmful bacteria and other microorganisms. Proper sterilization reduces the risk of postoperative complications, speeds up patient recovery, and enhances the overall success of the surgical procedure.\n\n' + \
+                'Q: What suggestions could help surgeons improve the quality of surgery during the sterile phase?\n' + \
+                'A: To improve the quality of surgery during the sterile phase, it is important to:\n1. Ensure a thorough hand washing and use of sterilizing solutions before scrubbing in.\n2. Use sterile drapes to isolate the surgical site efficiently.\n3. Maintain constant surveillance of sterilization procedures.\n4. Label and organize sterile instruments properly on the instrument table.\n5. Implement a clear protocol for notifying the team if the sterile field is compromised, so corrective actions can be taken immediately.\n6. Avoid over-cluttering the sterile field with non-essential equipment.\n' + \
+                'Example 3: \n' + \
+                'Input: \n' + \
+                "Question: What equipment can be found in the operating room?\nAnswer: Anesthesia equipment.\n\nQuestion: What is placed on the instrument table?\nAnswer: Various surgical instruments needed for the procedure are typically placed on the instrument table for easy access by the surgical team.\n\nQuestion: Where can the secondary table be found, and what might it be used for?\nAnswer: The secondary table is part of the auxiliary equipment in the operating room and may be used to hold additional surgical instruments or supplies needed during the procedure." + \
+                'Output: \n' + \
+                'Q: What equipment can be found in the operating room?\n' + \
+                'A: Anesthesia equipment.\n\n' + \
+                'Q: What is placed on the instrument table?\n' + \
+                'A: Various surgical instruments needed for the procedure are typically placed on the instrument table for easy access by the surgical team.\n\n' + \
+                'Q: Where can the secondary table be found, and what might it be used for?\n' + \
+                'A: The secondary table is part of the auxiliary equipment in the operating room and may be used to hold additional surgical instruments or supplies needed during the procedure.\n'
+    client = OpenAI(api_key="sk-sqilImjoPeMWLosGC7EbB5Dc215d4320BbDa49C59f73Eb85",
+                    base_url="https://vip.yi-zhan.top/v1")
+    file_list = ['./data/4dor_recognition_dataset_0702.json']
+    for file in file_list:
+        with open(file, 'r') as f:
+            data = json.load(f)
+        output_name = file.replace('.json', '_rephrased.json')
+        existing_data = []
+        if os.path.exists(output_name):
+            with open(output_name, 'r') as f:
+                existing_data = json.load(f)
+        # data[:len(existing_data)] = existing_data
+        # start_index = len(existing_data)
+        start_index = 0
+        for i in tqdm(range(start_index, len(data))):
+            prompt = data[i]['caption']
+            results = get_result(prompt, instruction, client)
+            data[i]['caption'] = repharse(results)
+            exit()
+            with open(output_name, 'w') as f:
+                json.dump(data[:i+1], f, indent=4)
+if __name__ == '__main__':
+    main()

distributed_inference/gpt_generate_caption_pwiseg.py ADDED Viewed

	@@ -0,0 +1,111 @@

+# python3
+# Please install OpenAI SDK first：`pip3 install openai`
+from openai import OpenAI
+import json
+import os
+import numpy as np
+QUESTION_HEAD = ["Describe the target object in the 3D scene concisely.",
+"Provide a brief description of the given target object in the 3D scene.",
+"Offer a succinct explanation of the target object in the 3D scene presented.",
+"Summarize the visual content of the target object in the 3D scene.",
+"Give a short and clear explanation of the previous target object in the 3D scene.",
+"Share a concise interpretation of the target object in the 3D scene provided.",
+"Present a compact description of the the target object's key features in the 3D scene.",
+"Relay a brief, clear account of the target object shown in the 3D scene.",
+"Render a clear and concise summary of the target object in the 3D scene.",
+"Write a terse but informative summary of the target object in the 3D scene.",
+"Create a compact narrative representing the target object in the 3D scene presented."]
+api_key  = "sk-3I6se0vPc8lYCIXH8eDd7e1fBe6341Ae92025dBd8cF9A426"
+base_url = "https://vip.yi-zhan.top/v1"
+model    = "gpt-4o-2024-05-13"
+client = OpenAI(api_key=api_key,
+                base_url="https://vip.yi-zhan.top/v1")
+def get_result(prompt):
+    response = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant"},
+            {"role": "user", "content": prompt},
+        ],
+        stream=False,
+        temperature=1.25
+    )
+    return response.choices[0].message.content
+if __name__ == '__main__':
+    output_path = "/mnt1/wjl/InternLM-XComposer/instruct_gen_v3/pwiseg/caption_dataset_pwiseg_0710.json"
+    with open("/mnt1/wjl/InternLM-XComposer/instruct_gen_v3/pwiseg/pwiseg_info.json","r") as f:
+        info = json.load(f)
+    prompt = """
+            ## Role
+            - You are an AI visual assistant, and you are looking at a picture of many surgical tools.
+            ## Information
+            - You will receive a list of dictionaries of annotated tools that can be seen on the table.
+            - Note that each dictionary contains "name":"bbox", "name is the name of the surgical tool that can be seen on the table, and "bbox" is the numerical value of the corresponding surgical tool position (top left x, top left y, bottom right x, bottom right y.
+            - The list is as follows:
+            ```json
+            {position}
+            ```
+            ## Task
+            - Your task is to generate a comprehensive description of the surgical table, including what is on the table, where they are, and their positional relationship.
+            ## Constraints
+            - Don't mention any specific numbers for its bounding box, using rough positions such as left, top right, etc.
+            - Don't make up anything not mentioned in the list, just an objective and direct description.
+            - The description should be more than 100 words and less than 150 words.
+            Now take a deep breath and start your response step by step.
+            """
+    img_path = '/mnt1/wjl/InternLM-XComposer/data/pwiseg/train/'
+    llava_dataset = []
+    import ipdb
+    # 读取已处理的数据，如果文件存在
+    if os.path.exists(output_path):
+        with open(output_path, "r") as f:
+                llava_dataset = json.load(f)
+    else:
+        llava_dataset = []
+    # 获取已处理的IDs
+    processed_ids = [data["id"] for data in llava_dataset]
+    count, all_number = 0, len(info)
+    for ix, (i,v) in enumerate(info.items()):
+        if ix in processed_ids:
+                  continue  # 跳过已处理的数据
+        llava_dict = {}
+        info_dict = v
+        final_prompt = prompt.format(position=info_dict)
+        try:
+            answer = get_result(final_prompt)
+            print("## {count}/{all_number}, {answer}".format(count=ix,
+                                                        all_number=all_number,
+                                                        answer=answer))
+            llava_dict["id"] = ix
+            llava_dict["image"] = os.path.join(img_path, i)
+            llava_dict["caption"] = answer
+            llava_dataset.append(llava_dict)
+            #print(llava_dataset)
+            with open(output_path,"w") as f:
+                f.write(json.dumps(llava_dataset,indent=2))
+        except Exception as e:
+                 print(f"Error processing {i}: {e}")
+                 continue

distributed_inference/gpt_generate_count_pwiseg.py ADDED Viewed

	@@ -0,0 +1,111 @@

+# python3
+# Please install OpenAI SDK first：`pip3 install openai`
+from openai import OpenAI
+import json
+import os
+import numpy as np
+api_key = "sk-WVJp2orFuuvPTf5P5dD936B1De78421b9eEa2c99D70b8a06"
+base_url = "https://vip.yi-zhan.top/v1"
+model    = "gpt-4o-2024-05-13"
+client = OpenAI(api_key=api_key,
+                base_url="https://vip.yi-zhan.top/v1")
+def get_result(prompt):
+    response = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant"},
+            {"role": "user", "content": prompt},
+        ],
+        stream=False,
+        temperature=1.25
+    )
+    return response.choices[0].message.content
+if __name__ == '__main__':
+    output_path = "/mnt1/wjl/InternLM-XComposer/instruct_gen_v3/pwiseg/count_dataset_pwiseg_0710.json"
+    with open("/mnt1/wjl/InternLM-XComposer/instruct_gen_v3/pwiseg/pwiseg_info.json", "r") as f:
+        infos = json.load(f)
+    prompt = """
+        ## Role
+        - You are an AI visual assistant, and you are looking at a picture of many surgical tools.
+        ## Information
+        - You will receive a list of dictionaries of annotated tools that can be seen on the table.
+        - Note that each dictionary contains "name":"bbox", "name is the name of the surgical tool that can be seen on the table, and "bbox" is the numerical value of the corresponding surgical tool position (top left x, top left y, bottom right x, bottom right y.
+        - The list is as follows:
+        ```json
+        {position}
+        ```
+        ## Task
+        Based on the list, Your task is to generate several questions and corresponding answers about counting surgical tools on the table.
+        ## Example:
+        - "Question: How many scalpals are on the table? Answer: Two scalpels are on the table",
+        - "Question: How many forceps are on the table? Answer: There is no forcep on the table",
+        - "Question: How many surgical tools in total are on the table? Answer: There are 2 scalpals on the table and 3 tweezers on the table",
+        ## Constraints
+        - Remeber, all the questions must can be clearly answered based on the information of given lists.
+        - Do not make up any questions and answers without solid evidence in the given lists.
+        - Importantly, you do not need to give any reasoning process, just give a straightforward answers.
+        - Do not use coordinates to generate answers and questions.
+        Now take a deep breath and start your response step by step.
+        """
+    # 读取已处理的数据，如果文件存在
+    if os.path.exists(output_path):
+        with open(output_path, "r") as f:
+                llava_dataset = json.load(f)
+    else:
+        llava_dataset = []
+    # 获取已处理的IDs
+    processed_ids = [data["id"] for data in llava_dataset]
+    count = len(processed_ids)
+    count, total = 0, len(infos)
+    img_path = '/mnt1/wjl/InternLM-XComposer/data/pwiseg/train/'
+    for ix, (i, v) in enumerate(infos.items()):
+        # import ipdb; ipdb.set_trace()
+        # count = count + 1
+        if ix in processed_ids:
+                  continue  # 跳过已处理的数据
+        llava_dict = {}
+        info_dict = v
+        final_prompt = prompt.format(position=info_dict)
+        try:
+            answer = get_result(final_prompt)
+            #print(answer)
+            llava_dict["id"] = ix
+            llava_dict["image"] = os.path.join(img_path, i)
+            llava_dict["caption"] = answer
+            llava_dataset.append(llava_dict)
+            #print(llava_dataset)
+            print("## {count}/{all_number}, {answer}".format(count=ix,
+                                                      all_number=total,
+                                                      answer=answer))
+            with open(output_path,"w") as f:
+                f.write(json.dumps(llava_dataset,indent=2))
+        except Exception as e:
+                 print(f"Error processing {i}: {e}")
+                 continue
+        if count > 20:
+             break

distributed_inference/prompt_config.py ADDED Viewed

	@@ -0,0 +1,157 @@

+prompt_dict = {
+    'caption_generation':
+        """
+        ## Information
+        - You will receive a list of dictionaries of annotated tools that can be seen on the table.
+        - Note that each dictionary contains "name":"bbox", "name is the name of the surgical tool that can be seen on the table, and "bbox" is the numerical value of the corresponding surgical tool position (top left x, top left y, bottom right x, bottom right y.
+        - The list is as follows:
+        ```json
+        {position}
+        ```
+        ## Task
+        - Your task is to generate a comprehensive description of the instrument table, including what is on the table, where they are, and their positional relationship.
+        ## Constraints
+        - Don't mention any specific numbers for its bounding box, using rough positions such as left, top right, etc.
+        - Don't make up anything not mentioned in the list, just an objective and direct description.
+        - The description should be more than 100 words and less than 150 words.
+        - Don't include any special characters such as "#" and don't include any next line character in your response
+        - You response should start with "Here is a comprehensive description of the instrument table:\n\n"
+        Now take a deep breath and start your response step by step.
+        """,
+    'relationship_generation':
+        """
+        """,
+    'question_generation':
+        """
+        ## Information
+        - You will receive a list of dictionaries of annotated tools that can be seen on the table.
+        - Note that each dictionary contains "name":"bbox", "name is the name of the surgical tool that can be seen on the table, and "bbox" is the numerical value of the corresponding surgical tool position (top left x, top left y, bottom right x, bottom right y.
+        - The list is as follows:
+        ```json
+        {position}
+        ```
+        ## Task
+        Based on the list, Your task is to generate several questions and corresponding answers about counting surgical tools on the table.
+        ## Example:
+        - "Question: How many scalpals are on the table? Answer: Two scalpels are on the table",
+        - "Question: How many forceps are on the table? Answer: There is no forcep on the table",
+        - "Question: How many surgical tools in total are on the table? Answer: There are 2 scalpals on the table and 3 tweezers on the table",
+        ## Constraints
+        - Remeber, all the questions must can be clearly answered based on the information of given lists.
+        - Do not make up any questions and answers without solid evidence in the given lists.
+        - Importantly, you do not need to give any reasoning process, just give a straightforward answers.
+        - Do not use coordinates to generate answers and questions.
+        Now take a deep breath and start your response step by step.
+        """,
+    'question_generation_with_ex':
+        """
+        ## Information
+        - You will receive a list of dictionaries of annotated tools that can be seen on the table.
+        - Note that each dictionary contains "name":"bbox", "name is the name of the surgical tool that can be seen on the table, and "bbox" is the numerical value of the corresponding surgical tool position (top left x, top left y, bottom right x, bottom right y.
+        - The list is as follows:
+        ```json
+        {position}
+        ```
+        ## Task
+        Based on the list, Your task is to generate several questions and corresponding answers about counting surgical tools on the table.
+        ## Example:
+        - Q: How many people are there in the operating room?
+          A: There are five people in the operating room.
+          Q: Which human roles are present in the operating room?
+          A: The team includes a patient, an anesthetist, and circulating nurses.
+          Q: How many devices are in the operating room?
+          A: There are four individual devices in the operating room: an anesthesia equipment, an operating table, an instrument table, and a secondary table.
+        - Q: "How many medical staff members are in the operating room?",
+          A: "Two, Staff 2 and Staff 3 are in the operating room."
+          Q: "Can you list the names of all the medical equipment in the operating room?",
+          A: "Yes, the equipment in the operating room includes the anesthesia equipment, instrument table, and secondary table."
+          Q: "Who is closer to the secondary table?",
+          A: "According to the provided positions, neither Staff 2 nor Staff 3 is specifically close to the secondary table."
+        - Q: How many people are present in the operating room?
+          A: Four, the patient and three medical staff members are in the operating room.
+          Q: How many items of equipment are there on the left side of the room?
+          A: Two, the anesthesia equipment and the secondary table are on the left side of the room.
+          Q: How many tables are in the operating room?
+          A: Three, there are an operating table, an instrument table, and a secondary table in the operating room.
+          Q: Which medical staff members are closest to the patient?
+          A: The circulating nurse and the anaesthetist are closest to the patient.
+          Q: How many pieces of equipment are being used close by the medical staff around the patient?
+          A: Two, the operating table and the anesthesia equipment are being used close by the medical staff around the patient.
+        ## Constraints
+        - Remeber, all the questions must can be clearly answered based on the information of given lists.
+        - Do not make up any questions and answers without solid evidence in the given lists.
+        - Importantly, you do not need to give any reasoning process, just give a straightforward answers.
+        - Do not use coordinates to generate answers and questions.
+        - The questions and answers should be strictly follow the format, without any starting and ending words.:
+        Q: ...
+        A: ...
+        Q: ...
+        A: ...
+        Q: ...
+        A: ...
+        Now take a deep breath and start your response step by step.
+        """
+}
+instruction_dict = {
+    'general':
+        'You are an AI visual assistant, and you are looking at a picture of many surgical tools.',
+    'rephrase_with_ex':
+        'Given the some sentences, rewrite it into a regular format. It contain some pairs of questions and answers. For each pair, the output should be formulated as: \n' + \
+                'Q: <question>\n' + \
+                'A: <answer>\n' + \
+            'Please do not output anything else. The output should only contain the pairs of questions and answers. \n' + \
+            'Here are some examples.\n' +\
+            'Example 1: \n' + \
+            'Input: \n' + \
+            "### Question 1:\nWhat equipment can be found in the operating room?\n\n**Answer:** The operating room contains anesthesia equipment, an instrument table, and a secondary table.\n\n---\n\n### Question 2:\nWhat is positioned in the center of the operating room?\n\n**Answer:** There's an instrument table in the center of the operating room. It is typically used to keep surgical instruments in a readily accessible location doing to an operation.\n\n---\n\n### Question 3:\nIs there any anesthesia equipment present in the operating room?\n\n**Answer:** Yes, there is anesthesia equipment present in the operating room, which is used to administer anesthesia to patients in order to keep them unconscious or pain-free during surgical procedures.\n" + \
+            'Output: \n' + \
+            'Q: What equipment can be found in the operating room?\n' + \
+            'A: The operating room contains anesthesia equipment, an instrument table, and a secondary table.\n\n' + \
+            'Q: What is positioned in the center of the operating room?\n' + \
+            'A: There\'s an instrument table in the center of the operating room. It is typically used to keep surgical instruments in a readily accessible location doing to an operation.\n\n' + \
+            'Q: Is there any anesthesia equipment present in the operating room?\n' + \
+            'A: Yes, there is anesthesia equipment present in the operating room, which is used to administer anesthesia to patients in order to keep them unconscious or pain-free during surgical procedures.\n' + \
+            'Example 2: \n' + \
+            'Input: \n' + \
+            "Sure, I'll describe the current phase and generate related questions and answers that pertain to maintaining the sterile field during surgery.\n\n### Description\nIn the image, the medical staff are wearing blue sterile gowns, gloves, masks, and caps, indicating adherence to strict aseptic techniques. There is equipment carefully covered with sterile coverings, and positions are maintained near the required instruments such as the instrument table and anesthesia equipment. Staff members seem to be in diligent preparation and monitoring phases prior to more active surgical intervention.\n\n### Questions and Answers about the Sterile Phase\n\n#### Question 1:\nWhat is the current surgical phase, and how should it be maintained?\n##### Answer:\nThe current surgical phase is sterile. It is crucial to maintain strict aseptic techniques: all personnel must continue to wear sterile gowns, gloves, and masks while thoroughly disinfecting the surgical site using sterilized instruments and materials. The team should minimize unnecessary movement and contact to reduce contamination risks. Clear communication within the team is important to promptly address any breaches in sterility.\n\n#### Question 2:\nWhy is it essential to maintain the sterile phase during surgery?\n##### Answer:\nMaintaining the sterile phase during surgery is essential to prevent infections. It ensures the surgical site and instruments remain free from harmful bacteria and other microorganisms. Proper sterilization reduces the risk of postoperative complications, speeds up patient recovery, and enhances the overall success of the surgical procedure.\n\n#### Question 3:\nWhat suggestions could help surgeons improve the quality of surgery during the sterile phase?\n##### Answer:\nTo improve the quality of surgery during the sterile phase, it is important to:\n1. Ensure a thorough hand washing and use of sterilizing solutions before scrubbing in.\n2. Use sterile drapes to isolate the surgical site efficiently.\n3. Maintain constant surveillance of sterilization procedures.\n4. Label and organize sterile instruments properly on the instrument table.\n5. Implement a clear protocol for notifying the team if the sterile field is compromised, so corrective actions can be taken immediately.\n6. Avoid over-cluttering the sterile field with non-essential equipment.\n\nBy following these suggestions, the sterility of the procedure can be maintained, resulting in higher surgery quality and better patient outcomes." + \
+            'Output: \n' + \
+            'Q: What is the current surgical phase, and how should it be maintained?\n' + \
+            'A: The current surgical phase is sterile. It is crucial to maintain strict aseptic techniques: all personnel must continue to wear sterile gowns, gloves, and masks while thoroughly disinfecting the surgical site using sterilized instruments and materials. The team should minimize unnecessary movement and contact to reduce contamination risks. Clear communication within the team is important to promptly address any breaches in sterility.\n\n' + \
+            'Q: Why is it essential to maintain the sterile phase during surgery?\n' + \
+            'A: Maintaining the sterile phase during surgery is essential to prevent infections. It ensures the surgical site and instruments remain free from harmful bacteria and other microorganisms. Proper sterilization reduces the risk of postoperative complications, speeds up patient recovery, and enhances the overall success of the surgical procedure.\n\n' + \
+            'Q: What suggestions could help surgeons improve the quality of surgery during the sterile phase?\n' + \
+            'A: To improve the quality of surgery during the sterile phase, it is important to:\n1. Ensure a thorough hand washing and use of sterilizing solutions before scrubbing in.\n2. Use sterile drapes to isolate the surgical site efficiently.\n3. Maintain constant surveillance of sterilization procedures.\n4. Label and organize sterile instruments properly on the instrument table.\n5. Implement a clear protocol for notifying the team if the sterile field is compromised, so corrective actions can be taken immediately.\n6. Avoid over-cluttering the sterile field with non-essential equipment.\n' + \
+            'Example 3: \n' + \
+            'Input: \n' + \
+            "Question: What equipment can be found in the operating room?\nAnswer: Anesthesia equipment.\n\nQuestion: What is placed on the instrument table?\nAnswer: Various surgical instruments needed for the procedure are typically placed on the instrument table for easy access by the surgical team.\n\nQuestion: Where can the secondary table be found, and what might it be used for?\nAnswer: The secondary table is part of the auxiliary equipment in the operating room and may be used to hold additional surgical instruments or supplies needed during the procedure." + \
+            'Output: \n' + \
+            'Q: What equipment can be found in the operating room?\n' + \
+            'A: Anesthesia equipment.\n\n' + \
+            'Q: What is placed on the instrument table?\n' + \
+            'A: Various surgical instruments needed for the procedure are typically placed on the instrument table for easy access by the surgical team.\n\n' + \
+            'Q: Where can the secondary table be found, and what might it be used for?\n' + \
+            'A: The secondary table is part of the auxiliary equipment in the operating room and may be used to hold additional surgical instruments or supplies needed during the procedure.\n'
+}

distributed_inference/utils.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import json
+sys_instructions = {
+    'question_rephrase': 'Given the some sentences, rewrite it into a regular format. It contain some pairs of questions and answers. For each pair, the output should be formulated as: \n' + \
+                'Q: <question>\n' + \
+                'A: <answer>\n' + \
+            'Please do not output anything else. The output should only contain the pairs of questions and answers. \n' + \
+            'Here are some examples.\n' +\
+            'Example 1: \n' + \
+            'Input: \n' + \
+            "### Question 1:\nWhat equipment can be found in the operating room?\n\n**Answer:** The operating room contains anesthesia equipment, an instrument table, and a secondary table.\n\n---\n\n### Question 2:\nWhat is positioned in the center of the operating room?\n\n**Answer:** There's an instrument table in the center of the operating room. It is typically used to keep surgical instruments in a readily accessible location doing to an operation.\n\n---\n\n### Question 3:\nIs there any anesthesia equipment present in the operating room?\n\n**Answer:** Yes, there is anesthesia equipment present in the operating room, which is used to administer anesthesia to patients in order to keep them unconscious or pain-free during surgical procedures.\n" + \
+            'Output: \n' + \
+            'Q: What equipment can be found in the operating room?\n' + \
+            'A: The operating room contains anesthesia equipment, an instrument table, and a secondary table.\n\n' + \
+            'Q: What is positioned in the center of the operating room?\n' + \
+            'A: There\'s an instrument table in the center of the operating room. It is typically used to keep surgical instruments in a readily accessible location doing to an operation.\n\n' + \
+            'Q: Is there any anesthesia equipment present in the operating room?\n' + \
+            'A: Yes, there is anesthesia equipment present in the operating room, which is used to administer anesthesia to patients in order to keep them unconscious or pain-free during surgical procedures.\n' + \
+            'Example 2: \n' + \
+            'Input: \n' + \
+            "Sure, I'll describe the current phase and generate related questions and answers that pertain to maintaining the sterile field during surgery.\n\n### Description\nIn the image, the medical staff are wearing blue sterile gowns, gloves, masks, and caps, indicating adherence to strict aseptic techniques. There is equipment carefully covered with sterile coverings, and positions are maintained near the required instruments such as the instrument table and anesthesia equipment. Staff members seem to be in diligent preparation and monitoring phases prior to more active surgical intervention.\n\n### Questions and Answers about the Sterile Phase\n\n#### Question 1:\nWhat is the current surgical phase, and how should it be maintained?\n##### Answer:\nThe current surgical phase is sterile. It is crucial to maintain strict aseptic techniques: all personnel must continue to wear sterile gowns, gloves, and masks while thoroughly disinfecting the surgical site using sterilized instruments and materials. The team should minimize unnecessary movement and contact to reduce contamination risks. Clear communication within the team is important to promptly address any breaches in sterility.\n\n#### Question 2:\nWhy is it essential to maintain the sterile phase during surgery?\n##### Answer:\nMaintaining the sterile phase during surgery is essential to prevent infections. It ensures the surgical site and instruments remain free from harmful bacteria and other microorganisms. Proper sterilization reduces the risk of postoperative complications, speeds up patient recovery, and enhances the overall success of the surgical procedure.\n\n#### Question 3:\nWhat suggestions could help surgeons improve the quality of surgery during the sterile phase?\n##### Answer:\nTo improve the quality of surgery during the sterile phase, it is important to:\n1. Ensure a thorough hand washing and use of sterilizing solutions before scrubbing in.\n2. Use sterile drapes to isolate the surgical site efficiently.\n3. Maintain constant surveillance of sterilization procedures.\n4. Label and organize sterile instruments properly on the instrument table.\n5. Implement a clear protocol for notifying the team if the sterile field is compromised, so corrective actions can be taken immediately.\n6. Avoid over-cluttering the sterile field with non-essential equipment.\n\nBy following these suggestions, the sterility of the procedure can be maintained, resulting in higher surgery quality and better patient outcomes." + \
+            'Output: \n' + \
+            'Q: What is the current surgical phase, and how should it be maintained?\n' + \
+            'A: The current surgical phase is sterile. It is crucial to maintain strict aseptic techniques: all personnel must continue to wear sterile gowns, gloves, and masks while thoroughly disinfecting the surgical site using sterilized instruments and materials. The team should minimize unnecessary movement and contact to reduce contamination risks. Clear communication within the team is important to promptly address any breaches in sterility.\n\n' + \
+            'Q: Why is it essential to maintain the sterile phase during surgery?\n' + \
+            'A: Maintaining the sterile phase during surgery is essential to prevent infections. It ensures the surgical site and instruments remain free from harmful bacteria and other microorganisms. Proper sterilization reduces the risk of postoperative complications, speeds up patient recovery, and enhances the overall success of the surgical procedure.\n\n' + \
+            'Q: What suggestions could help surgeons improve the quality of surgery during the sterile phase?\n' + \
+            'A: To improve the quality of surgery during the sterile phase, it is important to:\n1. Ensure a thorough hand washing and use of sterilizing solutions before scrubbing in.\n2. Use sterile drapes to isolate the surgical site efficiently.\n3. Maintain constant surveillance of sterilization procedures.\n4. Label and organize sterile instruments properly on the instrument table.\n5. Implement a clear protocol for notifying the team if the sterile field is compromised, so corrective actions can be taken immediately.\n6. Avoid over-cluttering the sterile field with non-essential equipment.\n' + \
+            'Example 3: \n' + \
+            'Input: \n' + \
+            "Question: What equipment can be found in the operating room?\nAnswer: Anesthesia equipment.\n\nQuestion: What is placed on the instrument table?\nAnswer: Various surgical instruments needed for the procedure are typically placed on the instrument table for easy access by the surgical team.\n\nQuestion: Where can the secondary table be found, and what might it be used for?\nAnswer: The secondary table is part of the auxiliary equipment in the operating room and may be used to hold additional surgical instruments or supplies needed during the procedure." + \
+            'Output: \n' + \
+            'Q: What equipment can be found in the operating room?\n' + \
+            'A: Anesthesia equipment.\n\n' + \
+            'Q: What is placed on the instrument table?\n' + \
+            'A: Various surgical instruments needed for the procedure are typically placed on the instrument table for easy access by the surgical team.\n\n' + \
+            'Q: Where can the secondary table be found, and what might it be used for?\n' + \
+            'A: The secondary table is part of the auxiliary equipment in the operating room and may be used to hold additional surgical instruments or supplies needed during the procedure.\n',
+    'count_generate': 'You are an AI visual assistant, and you are looking at a picture of many surgical tools.'
+}
+user_prompt = {
+    'count_generate':  """
+        ## Information
+        - You will receive a list of dictionaries of annotated tools that can be seen on the table.
+        - Note that each dictionary contains "name":"bbox", "name is the name of the surgical tool that can be seen on the table, and "bbox" is the numerical value of the corresponding surgical tool position (top left x, top left y, bottom right x, bottom right y.
+        - The list is as follows:
+        ```json
+        {position}
+        ```
+        ## Task
+        Based on the list, Your task is to generate several questions and corresponding answers about counting surgical tools on the table.
+        ## Example:
+        - "Question: How many scalpals are on the table? Answer: Two scalpels are on the table",
+        - "Question: How many forceps are on the table? Answer: There is no forcep on the table",
+        - "Question: How many surgical tools in total are on the table? Answer: There are 2 scalpals on the table and 3 tweezers on the table",
+        ## Constraints
+        - Remeber, all the questions must can be clearly answered based on the information of given lists.
+        - Do not make up any questions and answers without solid evidence in the given lists.
+        - Importantly, you do not need to give any reasoning process, just give a straightforward answers.
+        - Do not use coordinates to generate answers and questions.
+        Now take a deep breath and start your response step by step.
+        """
+}
+def repharse(results):
+    try:
+        results = results.split('\n\n')
+        repharse_list = []
+        for r in results:
+            split_temp = r.split('\n')
+            if len(split_temp) != 2 or \
+            split_temp[0][:2] != 'Q:' or \
+            split_temp[1][:2] != 'A:':
+                continue
+            r = r.split('Q: ')[1]
+            r = r.split('A: ')
+            qa_pair = {}
+            for i in range(len(r)):
+                if i == 0:
+                    qa_pair['question'] = r[i][:-1]
+                elif i == 1:
+                    qa_pair['answer'] = r[i]
+            repharse_list.append(qa_pair)
+    except:
+        repharse_list = []
+    return repharse_list
+def caption_repharse(results):
+    try:
+        caption = ""
+        results = results.split('\n\n')
+        if len(results) == 2:
+            caption = results[1]
+        return caption
+    except:
+        return ""
+def get_output_data(data, output_data, indices, instruction):
+    if instruction == 'question_rephrase':
+        return data[:indices[-1] + 1].copy()
+    elif instruction == 'count_generate':
+        return output_data.copy()
+def clean_caption(file):
+    output_name = file.replace('.json', '_rephrased.json')
+    with open(file, 'r') as f:
+        data = json.load(f)
+    for i in range(len(data)):
+        caption = data[i]['caption']
+        caption = caption.replace('\n\n', ' ').replace('\n', ' ')
+        caption = caption.replace('\u2019', '\'').replace('\u2013', '-')
+        caption = caption.replace('### ', '').replace('## ', '').replace('###', '').replace('##', '')
+        caption = caption.replace('**', '')
+        caption = caption.replace('- ', '')
+        data[i]['caption'] = caption
+        with open(output_name, 'w') as f:
+            json.dump(data[:i+1], f, indent=4)
+if __name__=='__main__':
+    file_list = ['./data/caption_dataset_pwiseg_0710.json']
+    for file in file_list:
+        clean_caption(file)