euijinrnd commited on
Commit
eef26ad
·
verified ·
1 Parent(s): 16628c8

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/head.png filter=lfs diff=lfs merge=lfs -text
assets/head.png ADDED

Git LFS Details

  • SHA256: c8f735a5ff1eccb080256f9756aecab43c933cb4f3ea35b499618c9bcb64a9ec
  • Pointer size: 131 Bytes
  • Size of remote file: 743 kB
data/agilex/hdf5totfrecords.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import h5py
3
+ import os
4
+ import fnmatch
5
+ import shutil
6
+ from tqdm import tqdm
7
+ from multiprocessing import Pool
8
+ import numpy as np
9
+
10
+
11
+ def _bytes_feature(value):
12
+ """Returns a bytes_list from a string / byte."""
13
+ if isinstance(value, type(tf.constant(0))):
14
+ value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
15
+ return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
16
+
17
+
18
+ def _bool_feature(value):
19
+ """Returns a bool_list from a boolean."""
20
+ return tf.train.Feature(int64_list=tf.train.Int64List(value=[int(value)]))
21
+
22
+
23
+ def serialize_example(action, base_action, qpos, qvel, cam_high, cam_left_wrist, cam_right_wrist, instruction, terminate_episode):
24
+ feature = {
25
+ 'action': _bytes_feature(tf.io.serialize_tensor(action)),
26
+ 'base_action': _bytes_feature(tf.io.serialize_tensor(base_action)),
27
+ 'qpos': _bytes_feature(tf.io.serialize_tensor(qpos)),
28
+ 'qvel': _bytes_feature(tf.io.serialize_tensor(qvel)),
29
+ 'cam_high': _bytes_feature(tf.io.serialize_tensor(tf.convert_to_tensor(cam_high.tobytes(), dtype=tf.string))),
30
+ 'cam_left_wrist': _bytes_feature(tf.io.serialize_tensor(tf.convert_to_tensor(cam_left_wrist.tobytes(), dtype=tf.string))),
31
+ 'cam_right_wrist': _bytes_feature(tf.io.serialize_tensor(tf.convert_to_tensor(cam_right_wrist.tobytes(), dtype=tf.string))),
32
+ 'instruction': _bytes_feature(instruction),
33
+ 'terminate_episode': _bool_feature(terminate_episode)
34
+ }
35
+ example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
36
+ return example_proto.SerializeToString()
37
+
38
+
39
+ def process_hdf5_file(args):
40
+ filepath, root_dir, out_dir = args
41
+ output_dir = os.path.join(out_dir, os.path.relpath(os.path.dirname(filepath), root_dir))
42
+ os.makedirs(output_dir, exist_ok=True)
43
+ filename = os.path.basename(filepath)
44
+ tfrecord_path = os.path.join(output_dir, filename.replace('.hdf5', '.tfrecord'))
45
+
46
+ if os.path.exists(tfrecord_path) and os.path.getsize(tfrecord_path) > 0:
47
+ return f"TFRecords already exist at {tfrecord_path}"
48
+ try:
49
+ with h5py.File(filepath, 'r') as f, tf.io.TFRecordWriter(tfrecord_path) as writer:
50
+ num_episodes = f['action'].shape[0]
51
+ # Remove the first few still steps
52
+ EPS = 1e-2
53
+ qpos = f['observations']['qpos'][:]
54
+ # Get the idx of the first qpos whose delta exceeds the threshold
55
+ qpos_delta = np.abs(qpos - qpos[0:1])
56
+ indices = np.where(np.any(qpos_delta > EPS, axis=1))[0]
57
+ if len(indices) > 0:
58
+ first_idx = indices[0]
59
+ else:
60
+ raise ValueError("Found no qpos that exceeds the threshold.")
61
+
62
+ for i in range(first_idx-1, num_episodes):
63
+ action = f['action'][i]
64
+ base_action = f['base_action'][i]
65
+ qpos = f['observations']['qpos'][i]
66
+ qvel = f['observations']['qvel'][i]
67
+ cam_high = f['observations']['images']['cam_high'][i]
68
+ cam_left_wrist = f['observations']['images']['cam_left_wrist'][i]
69
+ cam_right_wrist = f['observations']['images']['cam_right_wrist'][i]
70
+ instruction = f['instruction'][()]
71
+ terminate_episode = i == num_episodes - 1
72
+ serialized_example = serialize_example(action, base_action, qpos, qvel, cam_high, cam_left_wrist, cam_right_wrist, instruction, terminate_episode)
73
+ writer.write(serialized_example)
74
+ except Exception as e:
75
+ with open("error_log.txt", "a") as f:
76
+ f.write(f"{filepath}\n")
77
+ print(f"error at {filepath}: {e}")
78
+ return f"TFRecords written to {tfrecord_path}"
79
+
80
+
81
+ def write_tfrecords(root_dir, out_dir):
82
+ if not os.path.exists(out_dir):
83
+ os.makedirs(out_dir)
84
+
85
+ hdf5_files = []
86
+ for root, dirs, files in os.walk(root_dir):
87
+ if os.path.exists(os.path.join(root,"expanded_instruction_gpt-4-turbo.json")):
88
+ # copy the instruction file
89
+ target_path = os.path.join(out_dir, os.path.relpath(root, root_dir))
90
+ os.makedirs(target_path, exist_ok=True)
91
+ shutil.copy(os.path.join(root,"expanded_instruction_gpt-4-turbo.json"), target_path)
92
+ elif os.path.exists(os.path.join(root,"expanded_instruction.json")):
93
+ print(root)
94
+ target_path = os.path.join(out_dir, os.path.relpath(root, root_dir))
95
+ os.makedirs(target_path, exist_ok=True)
96
+ shutil.copy(os.path.join(root,"expanded_instruction.json"), target_path)
97
+ # rename into expanded_instruction_gpt-4-turbo.json
98
+ os.rename(os.path.join(out_dir, os.path.relpath(root, root_dir), "expanded_instruction.json"), os.path.join(out_dir, os.path.relpath(root, root_dir), "expanded_instruction_gpt-4-turbo.json"))
99
+ for filename in fnmatch.filter(files, '*.hdf5'):
100
+ filepath = os.path.join(root, filename)
101
+ hdf5_files.append((filepath, root_dir, out_dir))
102
+
103
+ with Pool(16) as pool:
104
+ max_count = len(hdf5_files)
105
+ with tqdm(total=max_count) as pbar:
106
+ for _ in pool.imap_unordered(process_hdf5_file, hdf5_files):
107
+ pbar.update(1)
108
+
109
+ print(f"TFRecords written to {out_dir}")
110
+
111
+
112
+ root_dir = "../datasets/agilex/rdt_data/"
113
+ out_dir = "../datasets/agilex/tfrecords/"
114
+ write_tfrecords(root_dir, out_dir)
data/empty_lang_embed.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b073685d3b8627ac068e7907f4d53e1b831729fd34e01e05ed96ebe53bf19633
3
+ size 9432