chrisyrniu commited on
Commit
af85cc3
·
verified ·
1 Parent(s): cb324cb

Upload 8 files

Browse files
pour/config.json ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_steps": 160000,
3
+ "eval_every": 100000,
4
+ "validate_every": 1000,
5
+ "save_every": 8000,
6
+ "resume_ckpt_path": null,
7
+ "lr": 0.0001,
8
+ "policy_class": "cross",
9
+ "onscreen_render": false,
10
+ "seed": 6,
11
+ "temporal_agg": false,
12
+ "real_robot": true,
13
+ "load_pretrain": false,
14
+ "height": 480,
15
+ "width": 1280,
16
+ "normalize_resnet": false,
17
+ "wandb": true,
18
+ "pretrained_path": null,
19
+ "randomize_data_degree": 3,
20
+ "randomize_data": false,
21
+ "config": null,
22
+ "eval": false,
23
+ "batch_size": 32,
24
+ "lr_tokenizer": 0.0001,
25
+ "lr_action_head": 0.0001,
26
+ "lr_trunk": 0.0001,
27
+ "skip_mirrored_data": false,
28
+ "actuator_network_dir": null,
29
+ "history_len": null,
30
+ "future_len": null,
31
+ "prediction_len": null,
32
+ "kl_weight": null,
33
+ "chunk_size": 180,
34
+ "hidden_dim": null,
35
+ "dim_feedforward": null,
36
+ "no_encoder": true,
37
+ "dec_layers": 3,
38
+ "nheads": 8,
39
+ "use_pos_embd_image": 1,
40
+ "use_pos_embd_action": 1,
41
+ "feature_loss_weight": 0.0,
42
+ "self_attention": 1,
43
+ "same_backbones": true,
44
+ "use_mask": false,
45
+ "use_wrist": true,
46
+ "data_aug": false,
47
+ "grayscale": false,
48
+ "randomize_color": false,
49
+ "pretrain_image_width": 1276,
50
+ "pretrain_image_height": 480,
51
+ "gpu_id": 0,
52
+ "train_ratio": 0.99,
53
+ "share_cross_attn": false,
54
+ "policy_config": {
55
+ "lr": 0.0001,
56
+ "lr_backbone": 1e-05,
57
+ "embodiment_args_dict": {
58
+ "normalize_state": true,
59
+ "locoman": {
60
+ "tokenizer": {
61
+ "hidden_dim": 128,
62
+ "nhead": 4,
63
+ "dim_head": 32,
64
+ "dropout": 0.1,
65
+ "main_image": {
66
+ "output_dim": 128,
67
+ "weights": "DEFAULT",
68
+ "resnet_model": "resnet18",
69
+ "num_of_copy": 1,
70
+ "token_num": 16
71
+ },
72
+ "body_pose_state": {
73
+ "input_dim": 6,
74
+ "output_dim": 128,
75
+ "widths": [
76
+ 128,
77
+ 128
78
+ ],
79
+ "tanh_end": false,
80
+ "ln": true,
81
+ "token_num": 4
82
+ },
83
+ "eef_pose_state": {
84
+ "input_dim": 12,
85
+ "output_dim": 128,
86
+ "widths": [
87
+ 128,
88
+ 128
89
+ ],
90
+ "tanh_end": false,
91
+ "ln": true,
92
+ "token_num": 4
93
+ },
94
+ "eef_to_body_pose_state": {
95
+ "input_dim": 12,
96
+ "output_dim": 128,
97
+ "widths": [
98
+ 128,
99
+ 128
100
+ ],
101
+ "tanh_end": false,
102
+ "ln": true,
103
+ "token_num": 4
104
+ },
105
+ "gripper_state": {
106
+ "input_dim": 2,
107
+ "output_dim": 128,
108
+ "widths": [
109
+ 128,
110
+ 128
111
+ ],
112
+ "tanh_end": false,
113
+ "ln": true,
114
+ "token_num": 4
115
+ }
116
+ },
117
+ "action_head": {
118
+ "eef_pose": {
119
+ "type": "transformer",
120
+ "input_dim": 128,
121
+ "output_dim": 12,
122
+ "crossattn_modality_dropout": 0.1,
123
+ "crossattn_heads": 4,
124
+ "crossattn_dim_head": 16,
125
+ "action_horizon": 180
126
+ },
127
+ "body_pose": {
128
+ "type": "transformer",
129
+ "input_dim": 128,
130
+ "output_dim": 6,
131
+ "crossattn_modality_dropout": 0.1,
132
+ "crossattn_heads": 4,
133
+ "crossattn_dim_head": 16,
134
+ "action_horizon": 180
135
+ },
136
+ "gripper": {
137
+ "type": "transformer",
138
+ "input_dim": 128,
139
+ "output_dim": 2,
140
+ "crossattn_modality_dropout": 0.1,
141
+ "crossattn_heads": 4,
142
+ "crossattn_dim_head": 16,
143
+ "action_horizon": 180
144
+ }
145
+ }
146
+ },
147
+ "human": {
148
+ "tokenizer": {
149
+ "hidden_dim": 128,
150
+ "nhead": 4,
151
+ "dim_head": 32,
152
+ "dropout": 0.1,
153
+ "main_image": {
154
+ "output_dim": 128,
155
+ "weights": "DEFAULT",
156
+ "resnet_model": "resnet18",
157
+ "num_of_copy": 1,
158
+ "token_num": 16
159
+ },
160
+ "body_pose_state": {
161
+ "input_dim": 6,
162
+ "output_dim": 128,
163
+ "widths": [
164
+ 128,
165
+ 128
166
+ ],
167
+ "tanh_end": false,
168
+ "ln": true,
169
+ "token_num": 4
170
+ },
171
+ "eef_pose_state": {
172
+ "input_dim": 12,
173
+ "output_dim": 128,
174
+ "widths": [
175
+ 128,
176
+ 128
177
+ ],
178
+ "tanh_end": false,
179
+ "ln": true,
180
+ "token_num": 4
181
+ },
182
+ "eef_to_body_pose_state": {
183
+ "input_dim": 12,
184
+ "output_dim": 128,
185
+ "widths": [
186
+ 128,
187
+ 128
188
+ ],
189
+ "tanh_end": false,
190
+ "ln": true,
191
+ "token_num": 4
192
+ },
193
+ "gripper_state": {
194
+ "input_dim": 2,
195
+ "output_dim": 128,
196
+ "widths": [
197
+ 128,
198
+ 128
199
+ ],
200
+ "tanh_end": false,
201
+ "ln": true,
202
+ "token_num": 4
203
+ }
204
+ },
205
+ "action_head": {
206
+ "eef_pose": {
207
+ "type": "transformer",
208
+ "input_dim": 128,
209
+ "output_dim": 12,
210
+ "crossattn_modality_dropout": 0.1,
211
+ "crossattn_heads": 4,
212
+ "crossattn_dim_head": 16,
213
+ "action_horizon": 180
214
+ },
215
+ "body_pose": {
216
+ "type": "transformer",
217
+ "input_dim": 128,
218
+ "output_dim": 6,
219
+ "crossattn_modality_dropout": 0.1,
220
+ "crossattn_heads": 4,
221
+ "crossattn_dim_head": 16,
222
+ "action_horizon": 180
223
+ },
224
+ "gripper": {
225
+ "type": "transformer",
226
+ "input_dim": 128,
227
+ "output_dim": 2,
228
+ "crossattn_modality_dropout": 0.1,
229
+ "crossattn_heads": 4,
230
+ "crossattn_dim_head": 16,
231
+ "action_horizon": 180
232
+ }
233
+ }
234
+ }
235
+ },
236
+ "transformer_args": {
237
+ "output_len": {
238
+ "eef_pose": 6,
239
+ "body_pose": 6,
240
+ "gripper": 6
241
+ },
242
+ "trunk_mode": "encoder_decoder",
243
+ "hidden_dim": 128,
244
+ "dropout": 0.4,
245
+ "nheads": 16,
246
+ "dim_feedforward": 256,
247
+ "enc_layers": 4,
248
+ "dec_layers": 4,
249
+ "pre_norm": false,
250
+ "token_postprocessing": "none"
251
+ },
252
+ "share_cross_attn": false
253
+ }
254
+ }
pour/pour.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaa15e5ffcc46d91f53df96d4445ee3b72dda6ab2a459d36257091d26f14434f
3
+ size 52447058
scoop/config.json ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_steps": 200000,
3
+ "eval_every": 100000,
4
+ "validate_every": 1000,
5
+ "save_every": 5000,
6
+ "resume_ckpt_path": null,
7
+ "lr": 0.0001,
8
+ "policy_class": "cross",
9
+ "onscreen_render": false,
10
+ "seed": 6,
11
+ "temporal_agg": false,
12
+ "real_robot": true,
13
+ "load_pretrain": false,
14
+ "height": 480,
15
+ "width": 1280,
16
+ "normalize_resnet": false,
17
+ "wandb": true,
18
+ "pretrained_path": null,
19
+ "randomize_data_degree": 3,
20
+ "randomize_data": false,
21
+ "config": null,
22
+ "eval": false,
23
+ "batch_size": 32,
24
+ "lr_tokenizer": 0.0001,
25
+ "lr_action_head": 0.0001,
26
+ "lr_trunk": 0.0001,
27
+ "skip_mirrored_data": false,
28
+ "actuator_network_dir": null,
29
+ "history_len": null,
30
+ "future_len": null,
31
+ "prediction_len": null,
32
+ "kl_weight": null,
33
+ "chunk_size": 120,
34
+ "hidden_dim": null,
35
+ "dim_feedforward": null,
36
+ "no_encoder": true,
37
+ "dec_layers": 3,
38
+ "nheads": 8,
39
+ "use_pos_embd_image": 1,
40
+ "use_pos_embd_action": 1,
41
+ "feature_loss_weight": 0.0,
42
+ "self_attention": 1,
43
+ "same_backbones": true,
44
+ "use_mask": false,
45
+ "use_wrist": true,
46
+ "data_aug": false,
47
+ "grayscale": false,
48
+ "randomize_color": false,
49
+ "pretrain_image_width": 1276,
50
+ "pretrain_image_height": 480,
51
+ "gpu_id": 0,
52
+ "train_ratio": 0.99,
53
+ "min_val_num": 20,
54
+ "share_cross_attn": false,
55
+ "agg_modalities": false,
56
+ "policy_config": {
57
+ "lr": 0.0001,
58
+ "lr_backbone": 1e-05,
59
+ "embodiment_args_dict": {
60
+ "normalize_state": true,
61
+ "locoman": {
62
+ "tokenizer": {
63
+ "hidden_dim": 128,
64
+ "nhead": 4,
65
+ "dim_head": 32,
66
+ "dropout": 0.1,
67
+ "main_image": {
68
+ "output_dim": 128,
69
+ "weights": "DEFAULT",
70
+ "resnet_model": "resnet18",
71
+ "num_of_copy": 1,
72
+ "token_num": 16
73
+ },
74
+ "wrist_image": {
75
+ "output_dim": 128,
76
+ "weights": "DEFAULT",
77
+ "resnet_model": "resnet18",
78
+ "num_of_copy": 1,
79
+ "token_num": 8
80
+ },
81
+ "body_pose_state": {
82
+ "input_dim": 6,
83
+ "output_dim": 128,
84
+ "widths": [
85
+ 128,
86
+ 128
87
+ ],
88
+ "tanh_end": false,
89
+ "ln": true,
90
+ "token_num": 4
91
+ },
92
+ "eef_pose_state": {
93
+ "input_dim": 12,
94
+ "output_dim": 128,
95
+ "widths": [
96
+ 128,
97
+ 128
98
+ ],
99
+ "tanh_end": false,
100
+ "ln": true,
101
+ "token_num": 4
102
+ },
103
+ "eef_to_body_pose_state": {
104
+ "input_dim": 12,
105
+ "output_dim": 128,
106
+ "widths": [
107
+ 128,
108
+ 128
109
+ ],
110
+ "tanh_end": false,
111
+ "ln": true,
112
+ "token_num": 4
113
+ },
114
+ "gripper_state": {
115
+ "input_dim": 2,
116
+ "output_dim": 128,
117
+ "widths": [
118
+ 128,
119
+ 128
120
+ ],
121
+ "tanh_end": false,
122
+ "ln": true,
123
+ "token_num": 4
124
+ }
125
+ },
126
+ "action_head": {
127
+ "eef_pose": {
128
+ "type": "transformer",
129
+ "input_dim": 128,
130
+ "output_dim": 12,
131
+ "crossattn_modality_dropout": 0.1,
132
+ "crossattn_heads": 4,
133
+ "crossattn_dim_head": 16,
134
+ "action_horizon": 120
135
+ },
136
+ "body_pose": {
137
+ "type": "transformer",
138
+ "input_dim": 128,
139
+ "output_dim": 6,
140
+ "crossattn_modality_dropout": 0.1,
141
+ "crossattn_heads": 4,
142
+ "crossattn_dim_head": 16,
143
+ "action_horizon": 120
144
+ },
145
+ "gripper": {
146
+ "type": "transformer",
147
+ "input_dim": 128,
148
+ "output_dim": 2,
149
+ "crossattn_modality_dropout": 0.1,
150
+ "crossattn_heads": 4,
151
+ "crossattn_dim_head": 16,
152
+ "action_horizon": 120
153
+ }
154
+ }
155
+ },
156
+ "human": {
157
+ "tokenizer": {
158
+ "hidden_dim": 128,
159
+ "nhead": 4,
160
+ "dim_head": 32,
161
+ "dropout": 0.1,
162
+ "main_image": {
163
+ "output_dim": 128,
164
+ "weights": "DEFAULT",
165
+ "resnet_model": "resnet18",
166
+ "num_of_copy": 1,
167
+ "token_num": 16
168
+ },
169
+ "body_pose_state": {
170
+ "input_dim": 6,
171
+ "output_dim": 128,
172
+ "widths": [
173
+ 128,
174
+ 128
175
+ ],
176
+ "tanh_end": false,
177
+ "ln": true,
178
+ "token_num": 4
179
+ },
180
+ "eef_pose_state": {
181
+ "input_dim": 12,
182
+ "output_dim": 128,
183
+ "widths": [
184
+ 128,
185
+ 128
186
+ ],
187
+ "tanh_end": false,
188
+ "ln": true,
189
+ "token_num": 4
190
+ },
191
+ "eef_to_body_pose_state": {
192
+ "input_dim": 12,
193
+ "output_dim": 128,
194
+ "widths": [
195
+ 128,
196
+ 128
197
+ ],
198
+ "tanh_end": false,
199
+ "ln": true,
200
+ "token_num": 4
201
+ },
202
+ "gripper_state": {
203
+ "input_dim": 2,
204
+ "output_dim": 128,
205
+ "widths": [
206
+ 128,
207
+ 128
208
+ ],
209
+ "tanh_end": false,
210
+ "ln": true,
211
+ "token_num": 4
212
+ }
213
+ },
214
+ "action_head": {
215
+ "eef_pose": {
216
+ "type": "transformer",
217
+ "input_dim": 128,
218
+ "output_dim": 12,
219
+ "crossattn_modality_dropout": 0.1,
220
+ "crossattn_heads": 4,
221
+ "crossattn_dim_head": 16,
222
+ "action_horizon": 120
223
+ },
224
+ "body_pose": {
225
+ "type": "transformer",
226
+ "input_dim": 128,
227
+ "output_dim": 6,
228
+ "crossattn_modality_dropout": 0.1,
229
+ "crossattn_heads": 4,
230
+ "crossattn_dim_head": 16,
231
+ "action_horizon": 120
232
+ },
233
+ "gripper": {
234
+ "type": "transformer",
235
+ "input_dim": 128,
236
+ "output_dim": 2,
237
+ "crossattn_modality_dropout": 0.1,
238
+ "crossattn_heads": 4,
239
+ "crossattn_dim_head": 16,
240
+ "action_horizon": 120
241
+ }
242
+ }
243
+ }
244
+ },
245
+ "transformer_args": {
246
+ "output_len": {
247
+ "eef_pose": 6,
248
+ "body_pose": 6,
249
+ "gripper": 6
250
+ },
251
+ "trunk_mode": "encoder_decoder",
252
+ "hidden_dim": 128,
253
+ "dropout": 0.5,
254
+ "nheads": 16,
255
+ "dim_feedforward": 256,
256
+ "enc_layers": 4,
257
+ "dec_layers": 4,
258
+ "pre_norm": false,
259
+ "token_postprocessing": "none"
260
+ },
261
+ "share_cross_attn": false,
262
+ "agg_modalities": false
263
+ }
264
+ }
scoop/scoop.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:494865bb48acddb4542a4e0a91a5f830a7e9064aa6b4e1fe81c6a9dbff2abe39
3
+ size 52442322
shoe_org/config.json ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_steps": 160000,
3
+ "eval_every": 100000,
4
+ "validate_every": 1000,
5
+ "save_every": 8000,
6
+ "resume_ckpt_path": null,
7
+ "lr": 0.0001,
8
+ "policy_class": "cross",
9
+ "onscreen_render": false,
10
+ "seed": 6,
11
+ "temporal_agg": false,
12
+ "real_robot": true,
13
+ "load_pretrain": false,
14
+ "height": 480,
15
+ "width": 1280,
16
+ "normalize_resnet": false,
17
+ "wandb": true,
18
+ "pretrained_path": null,
19
+ "randomize_data_degree": 3,
20
+ "randomize_data": false,
21
+ "config": null,
22
+ "eval": false,
23
+ "batch_size": 32,
24
+ "lr_tokenizer": 0.0001,
25
+ "lr_action_head": 0.0001,
26
+ "lr_trunk": 0.0001,
27
+ "skip_mirrored_data": false,
28
+ "actuator_network_dir": null,
29
+ "history_len": null,
30
+ "future_len": null,
31
+ "prediction_len": null,
32
+ "kl_weight": null,
33
+ "chunk_size": 180,
34
+ "hidden_dim": null,
35
+ "dim_feedforward": null,
36
+ "no_encoder": true,
37
+ "dec_layers": 3,
38
+ "nheads": 8,
39
+ "use_pos_embd_image": 1,
40
+ "use_pos_embd_action": 1,
41
+ "feature_loss_weight": 0.0,
42
+ "self_attention": 1,
43
+ "same_backbones": true,
44
+ "use_mask": false,
45
+ "use_wrist": true,
46
+ "data_aug": false,
47
+ "grayscale": false,
48
+ "randomize_color": false,
49
+ "pretrain_image_width": 1276,
50
+ "pretrain_image_height": 480,
51
+ "gpu_id": 0,
52
+ "train_ratio": 0.99,
53
+ "share_cross_attn": false,
54
+ "policy_config": {
55
+ "lr": 0.0001,
56
+ "lr_backbone": 1e-05,
57
+ "embodiment_args_dict": {
58
+ "normalize_state": true,
59
+ "locoman": {
60
+ "tokenizer": {
61
+ "hidden_dim": 128,
62
+ "nhead": 4,
63
+ "dim_head": 32,
64
+ "dropout": 0.1,
65
+ "main_image": {
66
+ "output_dim": 128,
67
+ "weights": "DEFAULT",
68
+ "resnet_model": "resnet18",
69
+ "num_of_copy": 1,
70
+ "token_num": 16
71
+ },
72
+ "wrist_image": {
73
+ "output_dim": 128,
74
+ "weights": "DEFAULT",
75
+ "resnet_model": "resnet18",
76
+ "num_of_copy": 1,
77
+ "token_num": 8
78
+ },
79
+ "body_pose_state": {
80
+ "input_dim": 6,
81
+ "output_dim": 128,
82
+ "widths": [
83
+ 128,
84
+ 128
85
+ ],
86
+ "tanh_end": false,
87
+ "ln": true,
88
+ "token_num": 4
89
+ },
90
+ "eef_pose_state": {
91
+ "input_dim": 12,
92
+ "output_dim": 128,
93
+ "widths": [
94
+ 128,
95
+ 128
96
+ ],
97
+ "tanh_end": false,
98
+ "ln": true,
99
+ "token_num": 4
100
+ },
101
+ "eef_to_body_pose_state": {
102
+ "input_dim": 12,
103
+ "output_dim": 128,
104
+ "widths": [
105
+ 128,
106
+ 128
107
+ ],
108
+ "tanh_end": false,
109
+ "ln": true,
110
+ "token_num": 4
111
+ },
112
+ "gripper_state": {
113
+ "input_dim": 2,
114
+ "output_dim": 128,
115
+ "widths": [
116
+ 128,
117
+ 128
118
+ ],
119
+ "tanh_end": false,
120
+ "ln": true,
121
+ "token_num": 4
122
+ }
123
+ },
124
+ "action_head": {
125
+ "eef_pose": {
126
+ "type": "transformer",
127
+ "input_dim": 128,
128
+ "output_dim": 12,
129
+ "crossattn_modality_dropout": 0.1,
130
+ "crossattn_heads": 4,
131
+ "crossattn_dim_head": 16,
132
+ "action_horizon": 180
133
+ },
134
+ "body_pose": {
135
+ "type": "transformer",
136
+ "input_dim": 128,
137
+ "output_dim": 6,
138
+ "crossattn_modality_dropout": 0.1,
139
+ "crossattn_heads": 4,
140
+ "crossattn_dim_head": 16,
141
+ "action_horizon": 180
142
+ },
143
+ "gripper": {
144
+ "type": "transformer",
145
+ "input_dim": 128,
146
+ "output_dim": 2,
147
+ "crossattn_modality_dropout": 0.1,
148
+ "crossattn_heads": 4,
149
+ "crossattn_dim_head": 16,
150
+ "action_horizon": 180
151
+ }
152
+ }
153
+ },
154
+ "human": {
155
+ "tokenizer": {
156
+ "hidden_dim": 128,
157
+ "nhead": 4,
158
+ "dim_head": 32,
159
+ "dropout": 0.1,
160
+ "main_image": {
161
+ "output_dim": 128,
162
+ "weights": "DEFAULT",
163
+ "resnet_model": "resnet18",
164
+ "num_of_copy": 1,
165
+ "token_num": 16
166
+ },
167
+ "body_pose_state": {
168
+ "input_dim": 6,
169
+ "output_dim": 128,
170
+ "widths": [
171
+ 128,
172
+ 128
173
+ ],
174
+ "tanh_end": false,
175
+ "ln": true,
176
+ "token_num": 4
177
+ },
178
+ "eef_pose_state": {
179
+ "input_dim": 12,
180
+ "output_dim": 128,
181
+ "widths": [
182
+ 128,
183
+ 128
184
+ ],
185
+ "tanh_end": false,
186
+ "ln": true,
187
+ "token_num": 4
188
+ },
189
+ "eef_to_body_pose_state": {
190
+ "input_dim": 12,
191
+ "output_dim": 128,
192
+ "widths": [
193
+ 128,
194
+ 128
195
+ ],
196
+ "tanh_end": false,
197
+ "ln": true,
198
+ "token_num": 4
199
+ },
200
+ "gripper_state": {
201
+ "input_dim": 2,
202
+ "output_dim": 128,
203
+ "widths": [
204
+ 128,
205
+ 128
206
+ ],
207
+ "tanh_end": false,
208
+ "ln": true,
209
+ "token_num": 4
210
+ }
211
+ },
212
+ "action_head": {
213
+ "eef_pose": {
214
+ "type": "transformer",
215
+ "input_dim": 128,
216
+ "output_dim": 12,
217
+ "crossattn_modality_dropout": 0.1,
218
+ "crossattn_heads": 4,
219
+ "crossattn_dim_head": 16,
220
+ "action_horizon": 180
221
+ },
222
+ "body_pose": {
223
+ "type": "transformer",
224
+ "input_dim": 128,
225
+ "output_dim": 6,
226
+ "crossattn_modality_dropout": 0.1,
227
+ "crossattn_heads": 4,
228
+ "crossattn_dim_head": 16,
229
+ "action_horizon": 180
230
+ },
231
+ "gripper": {
232
+ "type": "transformer",
233
+ "input_dim": 128,
234
+ "output_dim": 2,
235
+ "crossattn_modality_dropout": 0.1,
236
+ "crossattn_heads": 4,
237
+ "crossattn_dim_head": 16,
238
+ "action_horizon": 180
239
+ }
240
+ }
241
+ }
242
+ },
243
+ "transformer_args": {
244
+ "output_len": {
245
+ "eef_pose": 6,
246
+ "body_pose": 6,
247
+ "gripper": 6
248
+ },
249
+ "trunk_mode": "encoder_decoder",
250
+ "hidden_dim": 128,
251
+ "dropout": 0.4,
252
+ "nheads": 16,
253
+ "dim_feedforward": 256,
254
+ "enc_layers": 4,
255
+ "dec_layers": 4,
256
+ "pre_norm": false,
257
+ "token_postprocessing": "none"
258
+ },
259
+ "share_cross_attn": false
260
+ }
261
+ }
shoe_org/shoe_org.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd3d0787aaaeefded91d95f743abcf190abe22d485d39bda3e69c14c17259c9f
3
+ size 52451898
toy_collect/config.json ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_steps": 160000,
3
+ "eval_every": 100000,
4
+ "validate_every": 1000,
5
+ "save_every": 8000,
6
+ "resume_ckpt_path": null,
7
+ "lr": 0.0001,
8
+ "policy_class": "cross",
9
+ "onscreen_render": false,
10
+ "seed": 6,
11
+ "temporal_agg": false,
12
+ "real_robot": true,
13
+ "load_pretrain": false,
14
+ "height": 480,
15
+ "width": 1280,
16
+ "normalize_resnet": false,
17
+ "wandb": true,
18
+ "pretrained_path": null,
19
+ "randomize_data_degree": 3,
20
+ "randomize_data": false,
21
+ "config": null,
22
+ "eval": false,
23
+ "batch_size": 32,
24
+ "lr_tokenizer": 0.0001,
25
+ "lr_action_head": 0.0001,
26
+ "lr_trunk": 0.0001,
27
+ "skip_mirrored_data": false,
28
+ "actuator_network_dir": null,
29
+ "history_len": null,
30
+ "future_len": null,
31
+ "prediction_len": null,
32
+ "kl_weight": null,
33
+ "chunk_size": 60,
34
+ "hidden_dim": null,
35
+ "dim_feedforward": null,
36
+ "no_encoder": true,
37
+ "dec_layers": 3,
38
+ "nheads": 8,
39
+ "use_pos_embd_image": 1,
40
+ "use_pos_embd_action": 1,
41
+ "feature_loss_weight": 0.0,
42
+ "self_attention": 1,
43
+ "same_backbones": true,
44
+ "use_mask": false,
45
+ "use_wrist": true,
46
+ "data_aug": false,
47
+ "grayscale": false,
48
+ "randomize_color": false,
49
+ "pretrain_image_width": 1276,
50
+ "pretrain_image_height": 480,
51
+ "gpu_id": 0,
52
+ "train_ratio": 0.99,
53
+ "share_cross_attn": false,
54
+ "policy_config": {
55
+ "lr": 0.0001,
56
+ "lr_backbone": 1e-05,
57
+ "embodiment_args_dict": {
58
+ "normalize_state": true,
59
+ "locoman": {
60
+ "tokenizer": {
61
+ "hidden_dim": 128,
62
+ "nhead": 4,
63
+ "dim_head": 32,
64
+ "dropout": 0.1,
65
+ "main_image": {
66
+ "output_dim": 128,
67
+ "weights": "DEFAULT",
68
+ "resnet_model": "resnet18",
69
+ "num_of_copy": 1,
70
+ "token_num": 16
71
+ },
72
+ "wrist_image": {
73
+ "output_dim": 128,
74
+ "weights": "DEFAULT",
75
+ "resnet_model": "resnet18",
76
+ "num_of_copy": 1,
77
+ "token_num": 8
78
+ },
79
+ "body_pose_state": {
80
+ "input_dim": 6,
81
+ "output_dim": 128,
82
+ "widths": [
83
+ 128,
84
+ 128
85
+ ],
86
+ "tanh_end": false,
87
+ "ln": true,
88
+ "token_num": 4
89
+ },
90
+ "eef_pose_state": {
91
+ "input_dim": 12,
92
+ "output_dim": 128,
93
+ "widths": [
94
+ 128,
95
+ 128
96
+ ],
97
+ "tanh_end": false,
98
+ "ln": true,
99
+ "token_num": 4
100
+ },
101
+ "eef_to_body_pose_state": {
102
+ "input_dim": 12,
103
+ "output_dim": 128,
104
+ "widths": [
105
+ 128,
106
+ 128
107
+ ],
108
+ "tanh_end": false,
109
+ "ln": true,
110
+ "token_num": 4
111
+ },
112
+ "gripper_state": {
113
+ "input_dim": 2,
114
+ "output_dim": 128,
115
+ "widths": [
116
+ 128,
117
+ 128
118
+ ],
119
+ "tanh_end": false,
120
+ "ln": true,
121
+ "token_num": 4
122
+ }
123
+ },
124
+ "action_head": {
125
+ "eef_pose": {
126
+ "type": "transformer",
127
+ "input_dim": 128,
128
+ "output_dim": 12,
129
+ "crossattn_modality_dropout": 0.1,
130
+ "crossattn_heads": 4,
131
+ "crossattn_dim_head": 16,
132
+ "action_horizon": 60
133
+ },
134
+ "body_pose": {
135
+ "type": "transformer",
136
+ "input_dim": 128,
137
+ "output_dim": 6,
138
+ "crossattn_modality_dropout": 0.1,
139
+ "crossattn_heads": 4,
140
+ "crossattn_dim_head": 16,
141
+ "action_horizon": 60
142
+ },
143
+ "gripper": {
144
+ "type": "transformer",
145
+ "input_dim": 128,
146
+ "output_dim": 2,
147
+ "crossattn_modality_dropout": 0.1,
148
+ "crossattn_heads": 4,
149
+ "crossattn_dim_head": 16,
150
+ "action_horizon": 60
151
+ }
152
+ }
153
+ },
154
+ "human": {
155
+ "tokenizer": {
156
+ "hidden_dim": 128,
157
+ "nhead": 4,
158
+ "dim_head": 32,
159
+ "dropout": 0.1,
160
+ "main_image": {
161
+ "output_dim": 128,
162
+ "weights": "DEFAULT",
163
+ "resnet_model": "resnet18",
164
+ "num_of_copy": 1,
165
+ "token_num": 16
166
+ },
167
+ "body_pose_state": {
168
+ "input_dim": 6,
169
+ "output_dim": 128,
170
+ "widths": [
171
+ 128,
172
+ 128
173
+ ],
174
+ "tanh_end": false,
175
+ "ln": true,
176
+ "token_num": 4
177
+ },
178
+ "eef_pose_state": {
179
+ "input_dim": 12,
180
+ "output_dim": 128,
181
+ "widths": [
182
+ 128,
183
+ 128
184
+ ],
185
+ "tanh_end": false,
186
+ "ln": true,
187
+ "token_num": 4
188
+ },
189
+ "eef_to_body_pose_state": {
190
+ "input_dim": 12,
191
+ "output_dim": 128,
192
+ "widths": [
193
+ 128,
194
+ 128
195
+ ],
196
+ "tanh_end": false,
197
+ "ln": true,
198
+ "token_num": 4
199
+ },
200
+ "gripper_state": {
201
+ "input_dim": 2,
202
+ "output_dim": 128,
203
+ "widths": [
204
+ 128,
205
+ 128
206
+ ],
207
+ "tanh_end": false,
208
+ "ln": true,
209
+ "token_num": 4
210
+ }
211
+ },
212
+ "action_head": {
213
+ "eef_pose": {
214
+ "type": "transformer",
215
+ "input_dim": 128,
216
+ "output_dim": 12,
217
+ "crossattn_modality_dropout": 0.1,
218
+ "crossattn_heads": 4,
219
+ "crossattn_dim_head": 16,
220
+ "action_horizon": 60
221
+ },
222
+ "body_pose": {
223
+ "type": "transformer",
224
+ "input_dim": 128,
225
+ "output_dim": 6,
226
+ "crossattn_modality_dropout": 0.1,
227
+ "crossattn_heads": 4,
228
+ "crossattn_dim_head": 16,
229
+ "action_horizon": 60
230
+ },
231
+ "gripper": {
232
+ "type": "transformer",
233
+ "input_dim": 128,
234
+ "output_dim": 2,
235
+ "crossattn_modality_dropout": 0.1,
236
+ "crossattn_heads": 4,
237
+ "crossattn_dim_head": 16,
238
+ "action_horizon": 60
239
+ }
240
+ }
241
+ }
242
+ },
243
+ "transformer_args": {
244
+ "output_len": {
245
+ "eef_pose": 6,
246
+ "body_pose": 6,
247
+ "gripper": 6
248
+ },
249
+ "trunk_mode": "encoder_decoder",
250
+ "hidden_dim": 128,
251
+ "dropout": 0.4,
252
+ "nheads": 16,
253
+ "dim_feedforward": 256,
254
+ "enc_layers": 4,
255
+ "dec_layers": 4,
256
+ "pre_norm": false,
257
+ "token_postprocessing": "none"
258
+ },
259
+ "share_cross_attn": false
260
+ }
261
+ }
toy_collect/toy_collect.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b183e4bfa0db8efeff596bd3f122ae8f45657aa756f67d7e83a5b928a14bcd6
3
+ size 52441829