PPO playing AntBulletEnv-v0 from https://github.com/sgoodfriend/rl-algo-impls/tree/983cb75e43e51cf4ef57f177194ab9a4a1a8808b
23190a6
| CartPole-v1: &cartpole-defaults | |
| n_timesteps: !!float 5e5 | |
| env_hyperparams: | |
| n_envs: 8 | |
| CartPole-v0: | |
| <<: *cartpole-defaults | |
| MountainCar-v0: | |
| n_timesteps: !!float 1e6 | |
| env_hyperparams: | |
| n_envs: 16 | |
| normalize: true | |
| MountainCarContinuous-v0: | |
| n_timesteps: !!float 1e5 | |
| env_hyperparams: | |
| n_envs: 4 | |
| normalize: true | |
| # policy_hyperparams: | |
| # use_sde: true | |
| # log_std_init: 0.0 | |
| # init_layers_orthogonal: false | |
| algo_hyperparams: | |
| n_steps: 100 | |
| sde_sample_freq: 16 | |
| Acrobot-v1: | |
| n_timesteps: !!float 5e5 | |
| env_hyperparams: | |
| normalize: true | |
| n_envs: 16 | |
| # Tuned | |
| LunarLander-v2: | |
| device: cpu | |
| n_timesteps: !!float 1e6 | |
| env_hyperparams: | |
| n_envs: 4 | |
| normalize: true | |
| algo_hyperparams: | |
| n_steps: 2 | |
| gamma: 0.9955517404308908 | |
| gae_lambda: 0.9875340918797773 | |
| learning_rate: 0.0013814130817068916 | |
| learning_rate_decay: linear | |
| ent_coef: !!float 3.388369146384422e-7 | |
| ent_coef_decay: none | |
| max_grad_norm: 3.33982095073364 | |
| normalize_advantage: true | |
| vf_coef: 0.1667838310548184 | |
| BipedalWalker-v3: | |
| n_timesteps: !!float 5e6 | |
| env_hyperparams: | |
| n_envs: 16 | |
| normalize: true | |
| policy_hyperparams: | |
| use_sde: true | |
| log_std_init: -2 | |
| init_layers_orthogonal: false | |
| algo_hyperparams: | |
| ent_coef: 0 | |
| max_grad_norm: 0.5 | |
| n_steps: 8 | |
| gae_lambda: 0.9 | |
| vf_coef: 0.4 | |
| gamma: 0.99 | |
| learning_rate: !!float 9.6e-4 | |
| learning_rate_decay: linear | |
| HalfCheetahBulletEnv-v0: &pybullet-defaults | |
| n_timesteps: !!float 2e6 | |
| env_hyperparams: | |
| n_envs: 4 | |
| normalize: true | |
| policy_hyperparams: | |
| use_sde: true | |
| log_std_init: -2 | |
| init_layers_orthogonal: false | |
| algo_hyperparams: &pybullet-algo-defaults | |
| n_steps: 8 | |
| ent_coef: 0 | |
| max_grad_norm: 0.5 | |
| gae_lambda: 0.9 | |
| gamma: 0.99 | |
| vf_coef: 0.4 | |
| learning_rate: !!float 9.6e-4 | |
| learning_rate_decay: linear | |
| AntBulletEnv-v0: | |
| <<: *pybullet-defaults | |
| Walker2DBulletEnv-v0: | |
| <<: *pybullet-defaults | |
| HopperBulletEnv-v0: | |
| <<: *pybullet-defaults | |
| # Tuned | |
| CarRacing-v0: | |
| n_timesteps: !!float 4e6 | |
| env_hyperparams: | |
| n_envs: 4 | |
| frame_stack: 4 | |
| normalize: true | |
| normalize_kwargs: | |
| norm_obs: false | |
| norm_reward: true | |
| policy_hyperparams: | |
| use_sde: true | |
| log_std_init: -4.839609092563 | |
| init_layers_orthogonal: true | |
| activation_fn: tanh | |
| share_features_extractor: false | |
| cnn_flatten_dim: 256 | |
| hidden_sizes: [256] | |
| algo_hyperparams: | |
| n_steps: 64 | |
| learning_rate: 0.000018971962220405576 | |
| gamma: 0.9942776405534832 | |
| gae_lambda: 0.9549244758833236 | |
| ent_coef: 0.0000015666550584860516 | |
| ent_coef_decay: linear | |
| vf_coef: 0.12164696385898476 | |
| max_grad_norm: 2.2574480552177127 | |
| normalize_advantage: false | |
| use_rms_prop: false | |
| sde_sample_freq: 16 | |
| _atari: &atari-defaults | |
| n_timesteps: !!float 1e7 | |
| env_hyperparams: &atari-env-defaults | |
| n_envs: 16 | |
| frame_stack: 4 | |
| no_reward_timeout_steps: 1000 | |
| no_reward_fire_steps: 500 | |
| vec_env_class: async | |
| policy_hyperparams: &atari-policy-defaults | |
| activation_fn: relu | |
| algo_hyperparams: | |
| ent_coef: 0.01 | |
| vf_coef: 0.25 | |