Upload 40 files
Browse files- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/.DS_Store +0 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easydense-v0_ddpm_lora_/.DS_Store +0 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easydense-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easydense-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easydense-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/.DS_Store +0 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/ddpm_lora/.DS_Store +0 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easysparse-v0_ddpm_lora_/.DS_Store +0 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easysparse-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easysparse-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easysparse-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-harddense-v0_ddpm_lora_/.DS_Store +0 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-harddense-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-harddense-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-harddense-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/.DS_Store +0 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/ddpm_lora/.DS_Store +0 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardsparse-v0_ddpm_lora_/.DS_Store +0 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardsparse-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardsparse-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardsparse-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumdense-v0_ddpm_lora_/.DS_Store +0 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumdense-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumdense-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumdense-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/.DS_Store +0 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/ddpm_lora/.DS_Store +0 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumsparse-v0_ddpm_lora_/.DS_Store +0 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumsparse-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumsparse-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
- LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumsparse-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/.DS_Store
ADDED
Binary file (8.2 kB). View file
|
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easydense-v0_ddpm_lora_/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easydense-v0_ddpm_lora_/ddpm_lora/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easydense-v0",
|
61 |
+
"group": "OfflineMetadrive-easydense-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easydense-v0_ddpm_lora_/ddpm_lora/model50000.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac9543bf2faf5dbfb20c480bac3c3afc9be279ec156cfb5f03c1e70d9586305d
|
3 |
+
size 41309313
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easydense-v0_ddpm_lora_/ddpm_lora_bc/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easydense-v0",
|
61 |
+
"group": "OfflineMetadrive-easydense-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/ddpm_lora/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/ddpm_lora/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easymean-v0",
|
61 |
+
"group": "OfflineMetadrive-easymean-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/ddpm_lora/model50000.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24fa28b232f7ae9a7a1a2408e71f847c3b039ec1c3d8932d94df67cd132d46db
|
3 |
+
size 41309313
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/ddpm_lora_bc/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easymean-v0",
|
61 |
+
"group": "OfflineMetadrive-easymean-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easysparse-v0_ddpm_lora_/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easysparse-v0_ddpm_lora_/ddpm_lora/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easysparse-v0",
|
61 |
+
"group": "OfflineMetadrive-easysparse-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easysparse-v0_ddpm_lora_/ddpm_lora/model50000.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9289c63384bc52b98c9640d91a00bd718910c43dff71e934456618cacf054ba
|
3 |
+
size 41309313
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easysparse-v0_ddpm_lora_/ddpm_lora_bc/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easysparse-v0",
|
61 |
+
"group": "OfflineMetadrive-easysparse-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-harddense-v0_ddpm_lora_/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-harddense-v0_ddpm_lora_/ddpm_lora/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easydense-v0",
|
61 |
+
"group": "OfflineMetadrive-easydense-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-harddense-v0_ddpm_lora_/ddpm_lora/model50000.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0c0d3efb072c6b2ef0809ce64d7543ed5d939b7394b43d6a741db0e83d4d1c7
|
3 |
+
size 41309313
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-harddense-v0_ddpm_lora_/ddpm_lora_bc/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easydense-v0",
|
61 |
+
"group": "OfflineMetadrive-easydense-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/ddpm_lora/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/ddpm_lora/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easymean-v0",
|
61 |
+
"group": "OfflineMetadrive-easymean-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/ddpm_lora/model50000.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eaa477e837e36c77fa7cff36292ef4915d9f8656342ae83fb25f9f0209bb87d9
|
3 |
+
size 41309313
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/ddpm_lora_bc/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easymean-v0",
|
61 |
+
"group": "OfflineMetadrive-easymean-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardsparse-v0_ddpm_lora_/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardsparse-v0_ddpm_lora_/ddpm_lora/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easysparse-v0",
|
61 |
+
"group": "OfflineMetadrive-easysparse-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardsparse-v0_ddpm_lora_/ddpm_lora/model50000.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef8263489f90229a59fbd39f82712f65c4e81183c1a78678ac0a969ebbda56b5
|
3 |
+
size 41309313
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardsparse-v0_ddpm_lora_/ddpm_lora_bc/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easysparse-v0",
|
61 |
+
"group": "OfflineMetadrive-easysparse-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumdense-v0_ddpm_lora_/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumdense-v0_ddpm_lora_/ddpm_lora/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easydense-v0",
|
61 |
+
"group": "OfflineMetadrive-easydense-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumdense-v0_ddpm_lora_/ddpm_lora/model50000.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b4f71a9b8cc7a4cd3c9b7f1ffabb3529ae17c8e1cfc7416a8ef2e7e9c0bbf42
|
3 |
+
size 41309313
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumdense-v0_ddpm_lora_/ddpm_lora_bc/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easydense-v0",
|
61 |
+
"group": "OfflineMetadrive-easydense-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/ddpm_lora/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/ddpm_lora/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easymean-v0",
|
61 |
+
"group": "OfflineMetadrive-easymean-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/ddpm_lora/model50000.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:655f5596257d25839439abe7c52bee11a7c5902a4f49dfe1dfca7ee3bbd3baef
|
3 |
+
size 41309313
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/ddpm_lora_bc/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easymean-v0",
|
61 |
+
"group": "OfflineMetadrive-easymean-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumsparse-v0_ddpm_lora_/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumsparse-v0_ddpm_lora_/ddpm_lora/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easysparse-v0",
|
61 |
+
"group": "OfflineMetadrive-easysparse-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumsparse-v0_ddpm_lora_/ddpm_lora/model50000.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c51a5ee4bf2317f64cd79e82f4bd2e7d718fa5eb4f36b1d1130607754bef610d
|
3 |
+
size 41309313
|
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumsparse-v0_ddpm_lora_/ddpm_lora_bc/config.json
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"project": "PSEC",
|
3 |
+
"experiment_name": "ddpm_lora",
|
4 |
+
"timestamp": "LoRA-rank8-alpha16-reward1-cost1",
|
5 |
+
"max_steps": 1,
|
6 |
+
"pretrain_model": "",
|
7 |
+
"lora0": "",
|
8 |
+
"com_method": 0,
|
9 |
+
"batch_size": 2048,
|
10 |
+
"eval_episodes": 10,
|
11 |
+
"log_interval": 1000,
|
12 |
+
"save_steps": 50000,
|
13 |
+
"eval_interval": 50000,
|
14 |
+
"save_video": false,
|
15 |
+
"filter_threshold": null,
|
16 |
+
"take_top": null,
|
17 |
+
"online_max_steps": 0,
|
18 |
+
"unsquash_actions": false,
|
19 |
+
"normalize_returns": true,
|
20 |
+
"ratio": 1.0,
|
21 |
+
"training_time_inference_params": {
|
22 |
+
"N": 64,
|
23 |
+
"clip_sampler": true,
|
24 |
+
"M": 1
|
25 |
+
},
|
26 |
+
"rl_config": {
|
27 |
+
"model_cls": "LoRALearner",
|
28 |
+
"actor_lr": 0.0003,
|
29 |
+
"critic_lr": 0.0003,
|
30 |
+
"value_lr": 0.0003,
|
31 |
+
"T": 5,
|
32 |
+
"N": 64,
|
33 |
+
"M": 0,
|
34 |
+
"actor_dropout_rate": 0.1,
|
35 |
+
"actor_num_blocks": 3,
|
36 |
+
"decay_steps": 1000000,
|
37 |
+
"actor_layer_norm": true,
|
38 |
+
"value_layer_norm": true,
|
39 |
+
"actor_tau": 0.001,
|
40 |
+
"critic_objective": "expectile",
|
41 |
+
"critic_hyperparam": 0.7,
|
42 |
+
"cost_critic_hyperparam": 0.9,
|
43 |
+
"critic_type": "hj",
|
44 |
+
"cost_ub": 100,
|
45 |
+
"beta_schedule": "vp",
|
46 |
+
"cost_temperature": 1,
|
47 |
+
"reward_temperature": 1,
|
48 |
+
"cost_limit": 10,
|
49 |
+
"actor_objective": "bc",
|
50 |
+
"sampling_method": "ddpm",
|
51 |
+
"extract_method": "minqc",
|
52 |
+
"rank": 8,
|
53 |
+
"alpha_r": 16
|
54 |
+
},
|
55 |
+
"dataset_kwargs": {
|
56 |
+
"cost_scale": 25,
|
57 |
+
"pr_data": "data/point_robot-expert-random-100k.hdf5"
|
58 |
+
},
|
59 |
+
"seed": 42,
|
60 |
+
"env_name": "OfflineMetadrive-easysparse-v0",
|
61 |
+
"group": "OfflineMetadrive-easysparse-v0_ddpm_lora_",
|
62 |
+
"inference_variants": [
|
63 |
+
{
|
64 |
+
"N": 1,
|
65 |
+
"clip_sampler": true,
|
66 |
+
"M": 0
|
67 |
+
}
|
68 |
+
]
|
69 |
+
}
|