LTL07 commited on
Commit
fc1a7f4
·
verified ·
1 Parent(s): a835af0

Upload 40 files

Browse files
Files changed (40) hide show
  1. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/.DS_Store +0 -0
  2. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easydense-v0_ddpm_lora_/.DS_Store +0 -0
  3. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easydense-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
  4. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easydense-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
  5. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easydense-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
  6. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/.DS_Store +0 -0
  7. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/ddpm_lora/.DS_Store +0 -0
  8. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
  9. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
  10. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
  11. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easysparse-v0_ddpm_lora_/.DS_Store +0 -0
  12. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easysparse-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
  13. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easysparse-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
  14. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easysparse-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
  15. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-harddense-v0_ddpm_lora_/.DS_Store +0 -0
  16. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-harddense-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
  17. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-harddense-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
  18. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-harddense-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
  19. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/.DS_Store +0 -0
  20. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/ddpm_lora/.DS_Store +0 -0
  21. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
  22. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
  23. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
  24. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardsparse-v0_ddpm_lora_/.DS_Store +0 -0
  25. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardsparse-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
  26. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardsparse-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
  27. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardsparse-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
  28. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumdense-v0_ddpm_lora_/.DS_Store +0 -0
  29. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumdense-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
  30. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumdense-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
  31. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumdense-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
  32. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/.DS_Store +0 -0
  33. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/ddpm_lora/.DS_Store +0 -0
  34. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
  35. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
  36. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
  37. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumsparse-v0_ddpm_lora_/.DS_Store +0 -0
  38. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumsparse-v0_ddpm_lora_/ddpm_lora/config.json +69 -0
  39. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumsparse-v0_ddpm_lora_/ddpm_lora/model50000.pickle +3 -0
  40. LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumsparse-v0_ddpm_lora_/ddpm_lora_bc/config.json +69 -0
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/.DS_Store ADDED
Binary file (8.2 kB). View file
 
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easydense-v0_ddpm_lora_/.DS_Store ADDED
Binary file (6.15 kB). View file
 
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easydense-v0_ddpm_lora_/ddpm_lora/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easydense-v0",
61
+ "group": "OfflineMetadrive-easydense-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easydense-v0_ddpm_lora_/ddpm_lora/model50000.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac9543bf2faf5dbfb20c480bac3c3afc9be279ec156cfb5f03c1e70d9586305d
3
+ size 41309313
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easydense-v0_ddpm_lora_/ddpm_lora_bc/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easydense-v0",
61
+ "group": "OfflineMetadrive-easydense-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/.DS_Store ADDED
Binary file (6.15 kB). View file
 
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/ddpm_lora/.DS_Store ADDED
Binary file (6.15 kB). View file
 
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/ddpm_lora/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easymean-v0",
61
+ "group": "OfflineMetadrive-easymean-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/ddpm_lora/model50000.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24fa28b232f7ae9a7a1a2408e71f847c3b039ec1c3d8932d94df67cd132d46db
3
+ size 41309313
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easymean-v0_ddpm_lora_/ddpm_lora_bc/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easymean-v0",
61
+ "group": "OfflineMetadrive-easymean-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easysparse-v0_ddpm_lora_/.DS_Store ADDED
Binary file (6.15 kB). View file
 
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easysparse-v0_ddpm_lora_/ddpm_lora/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easysparse-v0",
61
+ "group": "OfflineMetadrive-easysparse-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easysparse-v0_ddpm_lora_/ddpm_lora/model50000.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9289c63384bc52b98c9640d91a00bd718910c43dff71e934456618cacf054ba
3
+ size 41309313
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-easysparse-v0_ddpm_lora_/ddpm_lora_bc/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easysparse-v0",
61
+ "group": "OfflineMetadrive-easysparse-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-harddense-v0_ddpm_lora_/.DS_Store ADDED
Binary file (6.15 kB). View file
 
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-harddense-v0_ddpm_lora_/ddpm_lora/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easydense-v0",
61
+ "group": "OfflineMetadrive-easydense-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-harddense-v0_ddpm_lora_/ddpm_lora/model50000.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0c0d3efb072c6b2ef0809ce64d7543ed5d939b7394b43d6a741db0e83d4d1c7
3
+ size 41309313
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-harddense-v0_ddpm_lora_/ddpm_lora_bc/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easydense-v0",
61
+ "group": "OfflineMetadrive-easydense-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/.DS_Store ADDED
Binary file (6.15 kB). View file
 
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/ddpm_lora/.DS_Store ADDED
Binary file (6.15 kB). View file
 
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/ddpm_lora/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easymean-v0",
61
+ "group": "OfflineMetadrive-easymean-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/ddpm_lora/model50000.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaa477e837e36c77fa7cff36292ef4915d9f8656342ae83fb25f9f0209bb87d9
3
+ size 41309313
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardmean-v0_ddpm_lora_/ddpm_lora_bc/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easymean-v0",
61
+ "group": "OfflineMetadrive-easymean-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardsparse-v0_ddpm_lora_/.DS_Store ADDED
Binary file (6.15 kB). View file
 
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardsparse-v0_ddpm_lora_/ddpm_lora/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easysparse-v0",
61
+ "group": "OfflineMetadrive-easysparse-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardsparse-v0_ddpm_lora_/ddpm_lora/model50000.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef8263489f90229a59fbd39f82712f65c4e81183c1a78678ac0a969ebbda56b5
3
+ size 41309313
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-hardsparse-v0_ddpm_lora_/ddpm_lora_bc/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easysparse-v0",
61
+ "group": "OfflineMetadrive-easysparse-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumdense-v0_ddpm_lora_/.DS_Store ADDED
Binary file (6.15 kB). View file
 
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumdense-v0_ddpm_lora_/ddpm_lora/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easydense-v0",
61
+ "group": "OfflineMetadrive-easydense-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumdense-v0_ddpm_lora_/ddpm_lora/model50000.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b4f71a9b8cc7a4cd3c9b7f1ffabb3529ae17c8e1cfc7416a8ef2e7e9c0bbf42
3
+ size 41309313
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumdense-v0_ddpm_lora_/ddpm_lora_bc/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easydense-v0",
61
+ "group": "OfflineMetadrive-easydense-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/.DS_Store ADDED
Binary file (6.15 kB). View file
 
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/ddpm_lora/.DS_Store ADDED
Binary file (6.15 kB). View file
 
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/ddpm_lora/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easymean-v0",
61
+ "group": "OfflineMetadrive-easymean-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/ddpm_lora/model50000.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:655f5596257d25839439abe7c52bee11a7c5902a4f49dfe1dfca7ee3bbd3baef
3
+ size 41309313
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediummean-v0_ddpm_lora_/ddpm_lora_bc/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easymean-v0",
61
+ "group": "OfflineMetadrive-easymean-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumsparse-v0_ddpm_lora_/.DS_Store ADDED
Binary file (6.15 kB). View file
 
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumsparse-v0_ddpm_lora_/ddpm_lora/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easysparse-v0",
61
+ "group": "OfflineMetadrive-easysparse-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumsparse-v0_ddpm_lora_/ddpm_lora/model50000.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c51a5ee4bf2317f64cd79e82f4bd2e7d718fa5eb4f36b1d1130607754bef610d
3
+ size 41309313
LoRA-finetune/LoRA-rank8-alpha16-reward1-cost1/LoRA-rank8-alpha16-reward1-cost1_OfflineMetadrive-mediumsparse-v0_ddpm_lora_/ddpm_lora_bc/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "project": "PSEC",
3
+ "experiment_name": "ddpm_lora",
4
+ "timestamp": "LoRA-rank8-alpha16-reward1-cost1",
5
+ "max_steps": 1,
6
+ "pretrain_model": "",
7
+ "lora0": "",
8
+ "com_method": 0,
9
+ "batch_size": 2048,
10
+ "eval_episodes": 10,
11
+ "log_interval": 1000,
12
+ "save_steps": 50000,
13
+ "eval_interval": 50000,
14
+ "save_video": false,
15
+ "filter_threshold": null,
16
+ "take_top": null,
17
+ "online_max_steps": 0,
18
+ "unsquash_actions": false,
19
+ "normalize_returns": true,
20
+ "ratio": 1.0,
21
+ "training_time_inference_params": {
22
+ "N": 64,
23
+ "clip_sampler": true,
24
+ "M": 1
25
+ },
26
+ "rl_config": {
27
+ "model_cls": "LoRALearner",
28
+ "actor_lr": 0.0003,
29
+ "critic_lr": 0.0003,
30
+ "value_lr": 0.0003,
31
+ "T": 5,
32
+ "N": 64,
33
+ "M": 0,
34
+ "actor_dropout_rate": 0.1,
35
+ "actor_num_blocks": 3,
36
+ "decay_steps": 1000000,
37
+ "actor_layer_norm": true,
38
+ "value_layer_norm": true,
39
+ "actor_tau": 0.001,
40
+ "critic_objective": "expectile",
41
+ "critic_hyperparam": 0.7,
42
+ "cost_critic_hyperparam": 0.9,
43
+ "critic_type": "hj",
44
+ "cost_ub": 100,
45
+ "beta_schedule": "vp",
46
+ "cost_temperature": 1,
47
+ "reward_temperature": 1,
48
+ "cost_limit": 10,
49
+ "actor_objective": "bc",
50
+ "sampling_method": "ddpm",
51
+ "extract_method": "minqc",
52
+ "rank": 8,
53
+ "alpha_r": 16
54
+ },
55
+ "dataset_kwargs": {
56
+ "cost_scale": 25,
57
+ "pr_data": "data/point_robot-expert-random-100k.hdf5"
58
+ },
59
+ "seed": 42,
60
+ "env_name": "OfflineMetadrive-easysparse-v0",
61
+ "group": "OfflineMetadrive-easysparse-v0_ddpm_lora_",
62
+ "inference_variants": [
63
+ {
64
+ "N": 1,
65
+ "clip_sampler": true,
66
+ "M": 0
67
+ }
68
+ ]
69
+ }