|
import requests |
|
import json |
|
import base64 |
|
import os |
|
import time |
|
import pandas as pd |
|
from tqdm import tqdm |
|
from concurrent.futures import ThreadPoolExecutor |
|
|
|
DEBUG = True |
|
|
|
SK_LIST = [ |
|
"sk-V8HaW6l4a6qkTRlR07423f3c8c67431c8a9d9c365c0b7d9b", |
|
"sk-MZQXlv5tEG5hDX3yoK6sKRB4P9JBuw8PWtbeix1JITHWzIxW", |
|
"sk-NgALyBkzs6LPt5kbvLS8WBILov33pL2rB6J5bLTI4FBk7O2p", |
|
"sk-MEuJz0u5CyFyVgEP9CvUPhybfkP9eQg8iak82OU9pN6GC0xH", |
|
] |
|
|
|
COCO_ROOT = '/root/autodl-tmp/data/location_bench1/coco_resize/' |
|
|
|
|
|
def encode_image(image_path): |
|
with open(image_path, "rb") as image_file: |
|
return base64.b64encode(image_file.read()).decode('utf-8') |
|
|
|
def gemini_label(question, img,idx, sk, attempt=0): |
|
if attempt > 5: |
|
return None |
|
|
|
|
|
|
|
|
|
image_path = img |
|
|
|
base64_image = encode_image(image_path) |
|
|
|
url = "https://open.xiaojingai.com/v1/chat/completions" |
|
|
|
|
|
try: |
|
|
|
|
|
|
|
|
|
|
|
payload = json.dumps({ |
|
"model": "gemini-1.5-pro", |
|
|
|
"stream": False, |
|
"messages": [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{ |
|
"type": "text", |
|
"text": f'''You will see an image along with four corresponding descriptions (captions). Please carefully observe the image and select the description that best matches the content of the image. Choose one option from (A), (B), (C), or (D). |
|
Options: (A){question[0]}\n(B){question[1]}\n(C){question[2]}\n(D){question[3]}\nPlease provide your answer with only one of the options and nothing else.''' |
|
|
|
}, |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": f"data:image/jpeg;base64,{base64_image}" |
|
} |
|
} |
|
] |
|
} |
|
], |
|
"max_tokens": 1000 |
|
}) |
|
headers = { |
|
'Authorization': sk, |
|
'Content-Type': 'application/json' |
|
} |
|
|
|
|
|
response = requests.post(url, headers=headers, data=payload) |
|
print(response) |
|
print("Response Status Code:", response.status_code) |
|
print("Response Text:", response.text) |
|
output = response.json() |
|
print("output",output) |
|
return {"id": idx, "answer": output} |
|
|
|
except Exception as ex: |
|
print(idx, ex) |
|
time.sleep(2) |
|
return gemini_label(question, idx, sk, attempt + 1) |
|
|
|
def process_sample(i, img,questions, sk): |
|
uid = uids[i] |
|
question = questions[i] |
|
img = img[i] |
|
|
|
ans = gemini_label(question, img,uid, sk=sk) |
|
return ans if ans is not None else 'None' |
|
|
|
if __name__ == '__main__': |
|
save_path = './' |
|
os.makedirs(os.path.join(save_path), exist_ok=True) |
|
|
|
|
|
meta_path = '/home/aiops/wangzh/data/scanner/indoor-new/all.json' |
|
|
|
with open(meta_path, 'r', encoding='utf-8') as json_file: |
|
meta = json.load(json_file) |
|
import pdb;pdb.set_trace() |
|
lens=len(meta) |
|
uids = [meta[i]['id'] for i in range(lens)] |
|
questions =[meta[i]['captions'] for i in range(lens)] |
|
img = [meta[i]['image'] for i in range(lens)] |
|
|
|
|
|
|
|
img = [f'/home/aiops/wangzh/data/scanner/scannet_2d_HR3/{meta[i]["scene_id"]}/color/{meta[i]["image"]}' for i in range(lens)] |
|
|
|
answer_list = [] |
|
|
|
with ThreadPoolExecutor(max_workers=4) as executor: |
|
answer_list = list(tqdm(executor.map(lambda i: process_sample(i, img,questions, SK_LIST[i % 4]), range(len(uids))), total=len(uids))) |
|
|
|
print('gemini label sample:', len(answer_list)) |
|
answer_list = pd.DataFrame(answer_list) |
|
answer_list.to_csv(os.path.join(save_path, 'answers.csv'), index=False) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|