File size: 5,004 Bytes
44fa4a0 b10dd53 cf96535 b10dd53 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
---
tags:
- deep-reinforcement-learning
- reinforcement-learning
- stable-baselines3
---
# PPO Agent playing PongNoFrameskip-v4
This is a trained model of a PPO agent playing PongNoFrameskip-v4 using the stable-baselines3 library.
<video src="https://huggingface.co/ThomasSimonini/ppo-PongNoFrameskip-v4/resolve/main/output.mp4" controls autoplay loop></video>
# Usage (with Stable-baselines3)
## Evaluation Results
Mean_reward = 21.00 +/- 0.0
## Watch your agent interacts
- You need to use `gym==0.19` since it **includes Atari Roms**.
- The Actor Space is 6 since we use only **legit actions**.
```python
# Install these libraries (don't forget to restart the runtime after installing the librairies)
!pip install stable-baselines3[extra]
!pip install huggingface_sb3
!pip install huggingface_hub
!pip install pickle5
# Import the libraries
import os
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecNormalize
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import CheckpointCallback
from huggingface_sb3 import load_from_hub, push_to_hub
import gym
from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
# Load the model
checkpoint = load_from_hub("ThomasSimonini/ppo-PongNoFrameskip-v4", "ppo-PongNoFrameskip-v4.zip")
# Because we using 3.7 on Colab and this agent was trained with 3.8 to avoid Pickle errors:
custom_objects = {
"learning_rate": 0.0,
"lr_schedule": lambda _: 0.0,
"clip_range": lambda _: 0.0,
}
model= PPO.load(checkpoint, custom_objects=custom_objects)
## Evaluate the agent
env = make_atari_env('PongNoFrameskip-v4', n_envs=1)
env = VecFrameStack(env, n_stack=4)
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")
## Generate a video of your agent performing with Colab
!pip install gym pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1
!pip install colabgymrender==1.0.2
observation = env.reset()
terminal = False
while not terminal:
action, _state = model.predict(observation)
observation, reward, terminal, info = env.step(action)
env.play()
```
## Training Code
- You need to use `gym==0.19` since it **includes Atari Roms**.
- The Actor Space is 6 since we use only **legit actions**.
```python
import wandb
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack, VecVideoRecorder
from stable_baselines3.common.callbacks import CheckpointCallback
from wandb.integration.sb3 import WandbCallback
from huggingface_sb3 import load_from_hub, push_to_hub
config = {
"env_name": "PongNoFrameskip-v4",
"num_envs": 8,
"total_timesteps": int(10e6),
"seed": 4089164106,
}
run = wandb.init(
project="HFxSB3",
config = config,
sync_tensorboard = True, # Auto-upload sb3's tensorboard metrics
monitor_gym = True, # Auto-upload the videos of agents playing the game
save_code = True, # Save the code to W&B
)
# There already exists an environment generator
# that will make and wrap atari environments correctly.
# Here we are also multi-worker training (n_envs=8 => 8 environments)
env = make_atari_env(config["env_name"], n_envs=config["num_envs"], seed=config["seed"]) #PongNoFrameskip-v4
print("ENV ACTION SPACE: ", env.action_space.n)
# Frame-stacking with 4 frames
env = VecFrameStack(env, n_stack=4)
# Video recorder
env = VecVideoRecorder(env, "videos", record_video_trigger=lambda x: x % 100000 == 0, video_length=2000)
# https://github.com/DLR-RM/rl-trained-agents/blob/10a9c31e806820d59b20d8b85ca67090338ea912/ppo/PongNoFrameskip-v4_1/PongNoFrameskip-v4/config.yml
model = PPO(policy = "CnnPolicy",
env = env,
batch_size = 256,
clip_range = 0.1,
ent_coef = 0.01,
gae_lambda = 0.9,
gamma = 0.99,
learning_rate = 2.5e-4,
max_grad_norm = 0.5,
n_epochs = 4,
n_steps = 128,
vf_coef = 0.5,
tensorboard_log = f"runs",
verbose=1,
)
model.learn(
total_timesteps = config["total_timesteps"],
callback = [
WandbCallback(
gradient_save_freq = 1000,
model_save_path = f"models/{run.id}",
),
CheckpointCallback(save_freq=10000, save_path='./pong',
name_prefix=config["env_name"]),
]
)
model.save("ppo-PongNoFrameskip-v4.zip")
push_to_hub(repo_id="ThomasSimonini/ppo-PongNoFrameskip-v4",
filename="ppo-PongNoFrameskip-v4.zip",
commit_message="Added Pong trained agent")
```
|