Truck2 / data.py
Wendy-Fly's picture
Upload data.py with huggingface_hub
3fe60cc verified
import json
import pyarrow.parquet as pq
import os
import numpy as np
import argparse
from PIL import Image
import io
def read_json(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
return data
def write_json(file_path, data):
with open(file_path, 'w', encoding='utf-8') as file:
json.dump(data, file, ensure_ascii=False, indent=4)
def read_parquet(file_path):
table = pq.read_table(file_path)
df = table.to_pandas()
data = df.values.tolist()
return data
def slove_images(bytes,path):
img = Image.open(io.BytesIO(bytes))
img.save(path)
# data = read_parquet('/Users/baixuehai/Downloads/2025_AAAI/dev-00000-of-00004-f147d414270a90e1.parquet')
# print(len(data[0]))
# print(data[0][0])
# print(data[0][1])
#print(data[0][2].keys())
file_path = '/home/zbz5349/WorkSpace/aigeeks/Qwen2.5-VL/magicbrush_dataset/data'
image_path = '/home/zbz5349/WorkSpace/aigeeks/Qwen2.5-VL/magicbrush_dataset/data'
os.makedirs(image_path,exist_ok=True)
save_data = []
for sud_ in os.listdir(file_path):
if sud_.endswith('.parquet'):
print(sud_)
data = read_parquet(os.path.join(file_path,sud_))
for x in data:
img_path = os.path.join(image_path,x[2]['path'])
## 在 Linux 放开
slove_images(x[2]['bytes'],img_path)
img_path = os.path.join(image_path,x[5]['path'])
## 在 Linux 放开
slove_images(x[5]['bytes'],img_path)
message = {
"role": "user",
"content": [
{
"type": "image",
"image": img_path,
},
{"type": "text", "text": "Please help me write a prompt for image editing on this picture. The requirements are as follows: complex editing instructions should include two to four simple editing instructions involving spatial relationships (simple editing instructions such as ADD: add an object to the left of a certain object, DELETE: delete a certain object, MODIFY: change a certain object into another object). We hope that the editing instructions can have simple reasoning . Please give me clear editing instructions and also consider whether such editing instructions are reasonable.Examples are as follows:Removed railway signals closer to trains,Remove the person standing on the tracks and Change the red train into red train"},
],
}
save_data.append(message)
print(len(save_data))
save_json = '/home/zbz5349/WorkSpace/aigeeks/Qwen2.5-VL/magicbrush_dataset/tg_1.json'
write_json(save_json,save_data)