File size: 2,607 Bytes
7c3dfeb
 
 
 
 
4a1bf47
51ae8ad
7c3dfeb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c68a2b
7c3dfeb
 
 
 
 
 
 
 
 
 
 
 
 
3af0a6e
7c3dfeb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import json 
import pyarrow.parquet as pq
import os
import numpy as np
import argparse
from PIL import Image
import io

def read_json(file_path): 
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

def write_json(file_path, data):
    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(data, file, ensure_ascii=False, indent=4)

def read_parquet(file_path):
    table = pq.read_table(file_path)
    df = table.to_pandas()
    data = df.values.tolist()
    return data

def slove_images(bytes,path):
    img = Image.open(io.BytesIO(bytes))
    img.save(path)

# data = read_parquet('/Users/baixuehai/Downloads/2025_AAAI/dev-00000-of-00004-f147d414270a90e1.parquet')
# print(len(data[0]))

# print(data[0][0])
# print(data[0][1])
#print(data[0][2].keys())

file_path = '/home/zbz5349/WorkSpace/aigeeks/Qwen2.5-VL/magicbrush_dataset/data'
image_path = '/home/zbz5349/WorkSpace/aigeeks/Qwen2.5-VL/magicbrush_dataset/images'
os.makedirs(image_path,exist_ok=True)

save_data = []
for sud_ in os.listdir(file_path):
    if sud_.endswith('.parquet'):
        print(sud_)
        data = read_parquet(os.path.join(file_path,sud_))
        for x in data:
            img_path = os.path.join(file_path,x[2]['path'])
            ## 在 Linux 放开
            slove_images(x[2]['bytes'],img_path)
            message =     {
                "role": "user",
                "content": [
                    {
                        "type": "image",
                        "image": img_path,
                    },
                {"type": "text", "text": "Please help me write a prompt for image editing on this picture. The requirements are as follows: complex editing instructions should include two to five simple editing instructions involving spatial relationships (simple editing instructions such as ADD: add an object to the left of a certain object, DELETE: delete a certain object, MODIFY: change a certain object into another object). We hope that the editing instructions can have simple reasoning and can also include some abstract concept-based editing (such as making the atmosphere more romantic, or making the diet healthier, or making the boy more handsome and the girl more beautiful, etc.). Please give me clear editing instructions and also consider whether such editing instructions are reasonable."},
                ],
            }
            save_data.append(message)

print(len(save_data))

save_json = '/home/zbz5349/WorkSpace/aigeeks/Qwen2.5-VL/magicbrush_dataset/dataset.json'
write_json(save_json,save_data)