Spaces:
Running
Running
yuanmingqi
commited on
Commit
·
e313dc5
1
Parent(s):
eaa89f4
update
Browse files- app.py +77 -0
- benchmarks.py +33 -0
- model.py +61 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from benchmarks import benchmarks, update_environments
|
3 |
+
from model import submit_model
|
4 |
+
|
5 |
+
def clear_form():
|
6 |
+
return [None, None, None, None, None, None]
|
7 |
+
|
8 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
9 |
+
gr.Markdown("<center><h1 style='font-size: 40px;'>⚔️Reinforcement Learning Agent Arena⚔️</h1></center>")
|
10 |
+
gr.Markdown("""
|
11 |
+
# 📜📜📜 The workflow of RLArena
|
12 |
+
- Select a benchmark, an environment, and a specific version.
|
13 |
+
- Enter your github username and the link to your code.
|
14 |
+
- Click "Submit" to evaluate your agent.
|
15 |
+
# 🏆🏆🏆 Visit the leaderboard
|
16 |
+
- Accelerate your RL research with the well-organized benchmark scores.
|
17 |
+
# 💪💪💪 Submit your agent now!
|
18 |
+
""")
|
19 |
+
with gr.Row():
|
20 |
+
with gr.Column(scale=2):
|
21 |
+
with gr.Row():
|
22 |
+
github_username = gr.Textbox(label="Github Username", info="Please enter your github username, e.g., username.")
|
23 |
+
benchmark = gr.Dropdown(label="Benchmark", choices=list(benchmarks.keys()), info="Please select a benchmark, e.g., Procgen.")
|
24 |
+
with gr.Row():
|
25 |
+
environment = gr.Dropdown(label="Environment", choices=[], value=None, info="Please select an environment, e.g., Miner.")
|
26 |
+
version = gr.Dropdown(label="Version", choices=[], value=None, info="Please select a version, e.g., v0.")
|
27 |
+
with gr.Row():
|
28 |
+
training_steps = gr.Number(label="Training Steps", precision=0, info="Please enter the training steps, e.g., 1000000.")
|
29 |
+
code_link = gr.Textbox(label="Code Link", info="Example: https://github.com/username/repo, the link should be accessible.")
|
30 |
+
with gr.Row():
|
31 |
+
submit_button = gr.Button("Submit", variant="primary")
|
32 |
+
clear_button = gr.Button("Clear", variant="secondary")
|
33 |
+
with gr.Column(scale=1):
|
34 |
+
# file uploader
|
35 |
+
model_uploader = gr.File(label="Upload the agent here!")
|
36 |
+
output = gr.Textbox(label="Evaluation Result")
|
37 |
+
|
38 |
+
benchmark.change(
|
39 |
+
fn=update_environments,
|
40 |
+
inputs=benchmark,
|
41 |
+
outputs=[environment, version]
|
42 |
+
)
|
43 |
+
|
44 |
+
submit_button.click(submit_model, inputs=[github_username, benchmark, environment, version, training_steps, code_link, model_uploader], outputs=output)
|
45 |
+
clear_button.click(clear_form, inputs=[], outputs=[github_username, benchmark, environment, version, training_steps, code_link])
|
46 |
+
|
47 |
+
with gr.Row():
|
48 |
+
# add multiple images with html
|
49 |
+
html_images = """
|
50 |
+
<div><br><br></div>
|
51 |
+
|
52 |
+
# 🔥🔥🔥 Powered by
|
53 |
+
<div style="display: flex; flex-wrap: wrap; gap: 10px; justify-content: left;">
|
54 |
+
<div style="flex: 0 0 calc(25% - 10px); display: flex; justify-content: center; align-items: center;">
|
55 |
+
<img src="/file=static/logo_polyu.png" alt="Image 1" style="max-width: 100%; height: auto;">
|
56 |
+
</div>
|
57 |
+
<div style="flex: 0 0 calc(25% - 10px); display: flex; justify-content: center; align-items: center;">
|
58 |
+
<img src="/file=static/logo_sjtu.png" alt="Image 2" style="max-width: 100%; height: auto;">
|
59 |
+
</div>
|
60 |
+
<div style="flex: 0 0 calc(25% - 10px); display: flex; justify-content: center; align-items: center;">
|
61 |
+
<img src="/file=static/logo_eias.png" alt="Image 3" style="max-width: 100%; height: auto;">
|
62 |
+
</div>
|
63 |
+
<div style="flex: 0 0 calc(25% - 10px); display: flex; justify-content: center; align-items: center;">
|
64 |
+
<img src="/file=static/logo_idt.png" alt="Image 4" style="max-width: 100%; height: auto;">
|
65 |
+
</div>
|
66 |
+
<div style="flex: 0 0 calc(25% - 10px); display: flex; justify-content: center; align-items: center;">
|
67 |
+
<img src="/file=static/logo_ustc.png" alt="Image 5" style="max-width: 100%; height: auto;">
|
68 |
+
</div>
|
69 |
+
<div style="flex: 0 0 calc(25% - 10px); display: flex; justify-content: center; align-items: center;">
|
70 |
+
<img src="/file=static/logo_purdue.png" alt="Image 6" style="max-width: 100%; height: auto;">
|
71 |
+
</div>
|
72 |
+
</div>
|
73 |
+
"""
|
74 |
+
gr.Markdown(html_images)
|
75 |
+
|
76 |
+
|
77 |
+
demo.launch(allowed_paths=["./"])
|
benchmarks.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import gymnasium as gym
|
3 |
+
|
4 |
+
benchmarks = {
|
5 |
+
"Arcade Learning Environment": {
|
6 |
+
'env': ['Breakout', 'Pong', 'Qbert', 'Seaquest', 'SpaceInvaders'],
|
7 |
+
'version': ['NoFrameskip-v4'],
|
8 |
+
},
|
9 |
+
"Procgen": {
|
10 |
+
'env': ['BigFish', 'BossFight', 'Chaser', 'Climber', 'CaveFlyer', 'CoinRun', 'Dodgeball', 'FruitBot',
|
11 |
+
'Heist', 'Jumper', 'Leaper', 'Maze', 'Miner', 'Ninja', 'Plunder', 'Starpilot'],
|
12 |
+
'version': ['Easy', 'Hard'],
|
13 |
+
},
|
14 |
+
'DeepMind Control Suite': {
|
15 |
+
'env': ['Humanoid_Stand', 'Humanoid_Walk', 'Humanoid_Run'],
|
16 |
+
'version': ['State-based', 'Image-based'],
|
17 |
+
},
|
18 |
+
}
|
19 |
+
|
20 |
+
def update_environments(benchmark):
|
21 |
+
if benchmark in benchmarks:
|
22 |
+
print(f"Updating environments for {benchmark}: {benchmarks[benchmark]}")
|
23 |
+
return gr.update(choices=benchmarks[benchmark]['env'], value=None), gr.update(choices=benchmarks[benchmark]['version'], value=None)
|
24 |
+
else:
|
25 |
+
print("No environments found for the selected benchmark")
|
26 |
+
return gr.update(choices=[], value=None), gr.update(choices=[], value=None)
|
27 |
+
|
28 |
+
def build_env(benchmark, environment, version):
|
29 |
+
# if benchmark == "Arcade Learning Environment":
|
30 |
+
# env = gym.make(f"{environment}-{version}")
|
31 |
+
# elif benchmark == "Procgen":
|
32 |
+
# env = gym.make(f"{environment}-{version}")
|
33 |
+
return gym.make("CartPole-v1")
|
model.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
# import torch as th
|
3 |
+
import numpy as np
|
4 |
+
from benchmarks import build_env
|
5 |
+
|
6 |
+
def submit_model(github_username,
|
7 |
+
benchmark,
|
8 |
+
environment,
|
9 |
+
version,
|
10 |
+
training_steps,
|
11 |
+
code_link,
|
12 |
+
model_uploader
|
13 |
+
):
|
14 |
+
avg_episode_rewards = 0
|
15 |
+
success_msg = f"""
|
16 |
+
INFO: Submitted by {github_username}:
|
17 |
+
INFO: Benchmark: {benchmark}
|
18 |
+
INFO: Environment: {environment}
|
19 |
+
INFO: Version: {version}
|
20 |
+
INFO: Training Steps: {training_steps}
|
21 |
+
INFO: Code Link: {code_link}
|
22 |
+
INFO: Final Score: {avg_episode_rewards}
|
23 |
+
"""
|
24 |
+
username_error_msg = f"""
|
25 |
+
ERROR: The GitHub username should be consistent with the code link!
|
26 |
+
"""
|
27 |
+
|
28 |
+
model_none_error_msg = f"""
|
29 |
+
ERROR: No model uploaded!
|
30 |
+
"""
|
31 |
+
|
32 |
+
# check if username is valid
|
33 |
+
if github_username.lower() not in code_link.lower():
|
34 |
+
return username_error_msg
|
35 |
+
if model_uploader is None:
|
36 |
+
return model_none_error_msg
|
37 |
+
|
38 |
+
episode_rewards = evaluate_model(model_uploader, benchmark, environment, version)
|
39 |
+
avg_episode_rewards = np.mean(episode_rewards)
|
40 |
+
|
41 |
+
return success_msg
|
42 |
+
|
43 |
+
def evaluate_model(model_uploader, benchmark, environment, version):
|
44 |
+
env = build_env(benchmark, environment, version)
|
45 |
+
print(env)
|
46 |
+
|
47 |
+
episode_rewards = []
|
48 |
+
|
49 |
+
obs, info = env.reset()
|
50 |
+
while len(episode_rewards) < 100:
|
51 |
+
# action = model_uploader.predict(obs)
|
52 |
+
action = env.action_space.sample()
|
53 |
+
obs, reward, terminated, truncated, info = env.step(action)
|
54 |
+
print(reward)
|
55 |
+
episode_rewards.append(reward)
|
56 |
+
|
57 |
+
if terminated or truncated:
|
58 |
+
obs, info = env.reset()
|
59 |
+
|
60 |
+
return episode_rewards
|
61 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
gymnasium
|
3 |
+
numpy
|
4 |
+
gradio
|