Spaces:
Sleeping
Sleeping
Update app with improved UI and markdown formatting
Browse files- .gitignore +53 -1
- .python-version +1 -0
- README.md +1 -1
- __pycache__/ddpg.cpython-310.pyc +0 -0
- app.py +183 -53
- pyproject.toml +23 -0
- requirements_hf.txt +13 -0
- uv.lock +0 -0
.gitignore
CHANGED
@@ -1 +1,53 @@
|
|
1 |
-
.DS_Store
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.DS_Store
|
2 |
+
|
3 |
+
# Python
|
4 |
+
__pycache__/
|
5 |
+
*.py[cod]
|
6 |
+
*$py.class
|
7 |
+
*.so
|
8 |
+
.Python
|
9 |
+
build/
|
10 |
+
develop-eggs/
|
11 |
+
dist/
|
12 |
+
downloads/
|
13 |
+
eggs/
|
14 |
+
.eggs/
|
15 |
+
lib/
|
16 |
+
lib64/
|
17 |
+
parts/
|
18 |
+
sdist/
|
19 |
+
var/
|
20 |
+
wheels/
|
21 |
+
*.egg-info/
|
22 |
+
.installed.cfg
|
23 |
+
*.egg
|
24 |
+
|
25 |
+
# Virtual environments
|
26 |
+
.venv/
|
27 |
+
venv/
|
28 |
+
ENV/
|
29 |
+
env/
|
30 |
+
|
31 |
+
# IDE
|
32 |
+
.vscode/
|
33 |
+
.idea/
|
34 |
+
*.swp
|
35 |
+
*.swo
|
36 |
+
|
37 |
+
# Jupyter Notebook
|
38 |
+
.ipynb_checkpoints
|
39 |
+
|
40 |
+
# Model files (if large)
|
41 |
+
*.pth
|
42 |
+
*.pt
|
43 |
+
*.h5
|
44 |
+
*.pkl
|
45 |
+
*.joblib
|
46 |
+
|
47 |
+
# Logs
|
48 |
+
*.log
|
49 |
+
logs/
|
50 |
+
|
51 |
+
# Temporary files
|
52 |
+
tmp/
|
53 |
+
temp/
|
.python-version
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
3.10
|
README.md
CHANGED
@@ -54,4 +54,4 @@ For each step, the reward:
|
|
54 |
|
55 |
## `train()` and `load_trained()`
|
56 |
|
57 |
-
`load_trained()` function loads a pre-trained model that ran through 1000 episodes of training, while `train()` does training from scratch. You can edit which one of the functions is running from the bottom of the main.py file. If you set render_mode=False, the program will train a lot faster.
|
|
|
54 |
|
55 |
## `train()` and `load_trained()`
|
56 |
|
57 |
+
`load_trained()` function loads a pre-trained model that ran through 1000 episodes of training, while `train()` does training from scratch. You can edit which one of the functions is running from the bottom of the main.py file. If you set render_mode=False, the program will train a lot faster.
|
__pycache__/ddpg.cpython-310.pyc
CHANGED
Binary files a/__pycache__/ddpg.cpython-310.pyc and b/__pycache__/ddpg.cpython-310.pyc differ
|
|
app.py
CHANGED
@@ -3,14 +3,14 @@ from train import TrainingLoop
|
|
3 |
from scipy.special import softmax
|
4 |
import numpy as np
|
5 |
|
6 |
-
|
7 |
train = None
|
8 |
-
|
9 |
frames, attributions = None, None
|
10 |
|
11 |
-
|
|
|
12 |
0: "X-coordinate",
|
13 |
-
1: "Y-coordinate",
|
14 |
2: "Linear velocity in the X-axis",
|
15 |
3: "Linear velocity in the Y-axis",
|
16 |
4: "Angle",
|
@@ -20,76 +20,206 @@ lunar_lander_spec_conversion = {
|
|
20 |
}
|
21 |
|
22 |
def create_training_loop(env_spec):
|
|
|
23 |
global train
|
24 |
train = TrainingLoop(env_spec=env_spec)
|
25 |
train.create_agent()
|
26 |
-
|
27 |
return train.env.spec
|
28 |
|
29 |
def display_softmax(inputs):
|
|
|
30 |
inputs = np.array(inputs)
|
31 |
probabilities = softmax(inputs)
|
32 |
-
|
33 |
-
softmax_dict = {
|
34 |
-
|
|
|
|
|
35 |
return softmax_dict
|
36 |
|
37 |
def generate_output(num_iterations, option):
|
|
|
38 |
global frames, attributions
|
39 |
-
frames, attributions = train.explain_trained(
|
|
|
|
|
|
|
40 |
slider.maximum = len(frames)
|
41 |
|
42 |
def get_frame_and_attribution(slider_value):
|
|
|
43 |
global frames, attributions
|
44 |
slider_value = min(slider_value, len(frames) - 1)
|
45 |
frame = frames[slider_value]
|
46 |
-
|
47 |
-
print(f"{frame.shape
|
48 |
-
|
49 |
attribution = display_softmax(attributions[slider_value])
|
50 |
-
|
51 |
return frame, attribution
|
52 |
|
53 |
-
with gr.Blocks(
|
54 |
-
|
55 |
-
gr.
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
""")
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
with gr.Row():
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
-
|
|
|
|
3 |
from scipy.special import softmax
|
4 |
import numpy as np
|
5 |
|
6 |
+
# Global variables for training and data storage
|
7 |
train = None
|
|
|
8 |
frames, attributions = None, None
|
9 |
|
10 |
+
# Lunar Lander environment state feature mapping
|
11 |
+
LUNAR_LANDER_FEATURES = {
|
12 |
0: "X-coordinate",
|
13 |
+
1: "Y-coordinate",
|
14 |
2: "Linear velocity in the X-axis",
|
15 |
3: "Linear velocity in the Y-axis",
|
16 |
4: "Angle",
|
|
|
20 |
}
|
21 |
|
22 |
def create_training_loop(env_spec):
|
23 |
+
"""Initialize the training loop with the specified environment."""
|
24 |
global train
|
25 |
train = TrainingLoop(env_spec=env_spec)
|
26 |
train.create_agent()
|
|
|
27 |
return train.env.spec
|
28 |
|
29 |
def display_softmax(inputs):
|
30 |
+
"""Convert raw attribution values to softmax probabilities for visualization."""
|
31 |
inputs = np.array(inputs)
|
32 |
probabilities = softmax(inputs)
|
33 |
+
|
34 |
+
softmax_dict = {
|
35 |
+
name: float(prob)
|
36 |
+
for name, prob in zip(LUNAR_LANDER_FEATURES.values(), probabilities)
|
37 |
+
}
|
38 |
return softmax_dict
|
39 |
|
40 |
def generate_output(num_iterations, option):
|
41 |
+
"""Generate attribution explanations for the trained agent."""
|
42 |
global frames, attributions
|
43 |
+
frames, attributions = train.explain_trained(
|
44 |
+
num_iterations=num_iterations,
|
45 |
+
option=option
|
46 |
+
)
|
47 |
slider.maximum = len(frames)
|
48 |
|
49 |
def get_frame_and_attribution(slider_value):
|
50 |
+
"""Get frame and attribution data for the selected timestep."""
|
51 |
global frames, attributions
|
52 |
slider_value = min(slider_value, len(frames) - 1)
|
53 |
frame = frames[slider_value]
|
54 |
+
|
55 |
+
print(f"Frame shape: {frame.shape}")
|
56 |
+
|
57 |
attribution = display_softmax(attributions[slider_value])
|
|
|
58 |
return frame, attribution
|
59 |
|
60 |
+
with gr.Blocks(
|
61 |
+
title="Deep RL Explainability",
|
62 |
+
theme=gr.themes.Soft(),
|
63 |
+
css="""
|
64 |
+
.gradio-container {
|
65 |
+
max-width: 1200px !important;
|
66 |
+
}
|
67 |
+
.tab-nav {
|
68 |
+
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
|
69 |
+
}
|
70 |
+
"""
|
71 |
+
) as demo:
|
72 |
+
|
73 |
+
# Header section
|
74 |
+
gr.Markdown("""
|
75 |
+
# ๐ Deep Reinforcement Learning Explainability
|
76 |
+
|
77 |
+
**Exploring AI decision-making through Integrated Gradients in RL environments**
|
78 |
+
|
79 |
+
---
|
80 |
""")
|
81 |
+
|
82 |
+
# Introduction section
|
83 |
+
gr.Markdown("""
|
84 |
+
## ๐ How This Works
|
85 |
+
|
86 |
+
This application demonstrates the application of **[Integrated Gradients](https://captum.ai/docs/extension/integrated_gradients)**
|
87 |
+
to Deep Reinforcement Learning scenarios. We use PyTorch's Captum library for interpretability
|
88 |
+
and Gymnasium for the continuous Lunar Lander environment.
|
89 |
+
|
90 |
+
### ๐ง Training Algorithm: [DDPG](https://arxiv.org/abs/1509.02971)
|
91 |
+
|
92 |
+
The agent is trained using **Deep Deterministic Policy Gradients** and achieves an average reward
|
93 |
+
of **260.8** per episode (successful landings).
|
94 |
+
|
95 |
+
### ๐ฏ How to Use This Space
|
96 |
+
|
97 |
+
1. **Select Environment**: Choose the Lunar Lander environment
|
98 |
+
2. **Choose Baseline**: Select between zero tensor or running average baseline
|
99 |
+
3. **Generate Attributions**: Click "ATTRIBUTE" and wait ~20-25 seconds
|
100 |
+
4. **Explore Results**: Use the slider to examine attributions at different timesteps
|
101 |
+
|
102 |
+
The attributions are normalized using Softmax to provide interpretable probability distributions.
|
103 |
+
""")
|
104 |
+
|
105 |
+
# Main interface tab
|
106 |
+
with gr.Tab("๐ Attribution Analysis", elem_id="attribution-tab"):
|
107 |
+
|
108 |
+
# Environment setup
|
109 |
+
gr.Markdown("### ๐ Environment Setup")
|
110 |
+
env_spec = gr.Dropdown(
|
111 |
+
choices=["LunarLander-v2"],
|
112 |
+
type="value",
|
113 |
+
multiselect=False,
|
114 |
+
label="Environment Specification",
|
115 |
+
value="LunarLander-v2",
|
116 |
+
info="Select the RL environment to analyze"
|
117 |
+
)
|
118 |
+
|
119 |
+
env_interface = gr.Interface(
|
120 |
+
title="Initialize Environment",
|
121 |
+
allow_flagging="never",
|
122 |
+
inputs=env_spec,
|
123 |
+
fn=create_training_loop,
|
124 |
+
outputs=gr.JSON(label="Environment Spec"),
|
125 |
+
description="Click to initialize the training environment"
|
126 |
+
)
|
127 |
+
|
128 |
+
# Attribution controls
|
129 |
+
gr.Markdown("### โ๏ธ Attribution Configuration")
|
130 |
+
|
131 |
with gr.Row():
|
132 |
+
with gr.Column(scale=1):
|
133 |
+
option = gr.Dropdown(
|
134 |
+
choices=["Torch Tensor of 0's", "Running Average"],
|
135 |
+
type="index",
|
136 |
+
label="Baseline Method",
|
137 |
+
info="Choose the baseline for Integrated Gradients"
|
138 |
+
)
|
139 |
+
|
140 |
+
with gr.Column(scale=1):
|
141 |
+
baselines = gr.Slider(
|
142 |
+
label="Number of Baseline Iterations",
|
143 |
+
interactive=True,
|
144 |
+
minimum=0,
|
145 |
+
maximum=100,
|
146 |
+
value=10,
|
147 |
+
step=5,
|
148 |
+
info="Number of baseline inputs to collect for averaging"
|
149 |
+
)
|
150 |
+
|
151 |
+
# Generate button
|
152 |
+
generate_btn = gr.Button(
|
153 |
+
"๐ GENERATE ATTRIBUTIONS",
|
154 |
+
variant="primary",
|
155 |
+
size="lg"
|
156 |
+
)
|
157 |
+
generate_btn.click(
|
158 |
+
fn=generate_output,
|
159 |
+
inputs=[baselines, option],
|
160 |
+
outputs=[]
|
161 |
+
)
|
162 |
+
|
163 |
+
# Results section
|
164 |
+
gr.Markdown("### ๐ Results Visualization")
|
165 |
+
|
166 |
+
slider = gr.Slider(
|
167 |
+
label="๐ฌ Key Frame Selector",
|
168 |
+
minimum=0,
|
169 |
+
maximum=1000,
|
170 |
+
step=1,
|
171 |
+
value=0,
|
172 |
+
info="Navigate through different timesteps to see attributions"
|
173 |
+
)
|
174 |
+
|
175 |
+
results_interface = gr.Interface(
|
176 |
+
fn=get_frame_and_attribution,
|
177 |
+
inputs=slider,
|
178 |
+
live=True,
|
179 |
+
outputs=[
|
180 |
+
gr.Image(label="๐ฎ Environment State", type="numpy"),
|
181 |
+
gr.Label(label="๐ Feature Attributions", num_top_classes=8)
|
182 |
+
],
|
183 |
+
title="Real-time Attribution Analysis"
|
184 |
+
)
|
185 |
+
|
186 |
+
gr.Markdown("""
|
187 |
+
---
|
188 |
+
|
189 |
+
## ๐ ๏ธ Local Usage & Installation
|
190 |
+
|
191 |
+
### Required Packages
|
192 |
+
```bash
|
193 |
+
pip install torch gymnasium 'gymnasium[box2d]'
|
194 |
+
```
|
195 |
+
|
196 |
+
### Box2D Installation (macOS)
|
197 |
+
```bash
|
198 |
+
brew install swig
|
199 |
+
pip install box2d
|
200 |
+
```
|
201 |
+
|
202 |
+
## ๐ฏ Lunar Lander Environment Details
|
203 |
+
|
204 |
+
### Reward Structure
|
205 |
+
- **Position**: Increased/decreased based on distance to landing pad
|
206 |
+
- **Velocity**: Increased/decreased based on speed (slower is better)
|
207 |
+
- **Angle**: Decreased when lander is tilted (horizontal is ideal)
|
208 |
+
- **Landing**: +10 points for each leg touching ground
|
209 |
+
- **Fuel**: -0.03 points per frame for side engine, -0.3 for main engine
|
210 |
+
- **Episode End**: -100 for crash, +100 for safe landing
|
211 |
+
|
212 |
+
**Success Threshold**: 200+ points per episode
|
213 |
+
|
214 |
+
### Training Functions
|
215 |
+
- `load_trained()`: Loads pre-trained model (1000 episodes)
|
216 |
+
- `train()`: Trains from scratch
|
217 |
+
- Set `render_mode=False` for faster training
|
218 |
+
|
219 |
+
---
|
220 |
+
|
221 |
+
*Built with โค๏ธ using Gradio, PyTorch, and Captum*
|
222 |
+
""")
|
223 |
|
224 |
+
if __name__ == "__main__":
|
225 |
+
demo.launch()
|
pyproject.toml
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "deep-rl-explainability"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "Add your description here"
|
5 |
+
requires-python = ">=3.10"
|
6 |
+
dependencies = [
|
7 |
+
"ale-py==0.8.1",
|
8 |
+
"autorom==0.4.2",
|
9 |
+
"autorom-accept-rom-license==0.6.1",
|
10 |
+
"captum==0.6.0",
|
11 |
+
"gradio>=5.44.1",
|
12 |
+
"gymnasium[box2d]==0.29.1",
|
13 |
+
"huggingface-hub>=0.34.4",
|
14 |
+
"imageio==2.31.5",
|
15 |
+
"imageio-ffmpeg==0.4.9",
|
16 |
+
"matplotlib==3.8.0",
|
17 |
+
"matplotlib-inline==0.1.6",
|
18 |
+
"moviepy==1.0.3",
|
19 |
+
"mujoco==2.3.7",
|
20 |
+
"numpy==1.26.0",
|
21 |
+
"scipy>=1.15.3",
|
22 |
+
"torch==2.1.0",
|
23 |
+
]
|
requirements_hf.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch==2.1.0
|
2 |
+
gymnasium==0.29.1
|
3 |
+
gymnasium[box2d]
|
4 |
+
gradio==5.44.1
|
5 |
+
captum==0.6.0
|
6 |
+
numpy==1.26.0
|
7 |
+
scipy
|
8 |
+
matplotlib==3.8.0
|
9 |
+
moviepy==1.0.3
|
10 |
+
imageio==2.31.5
|
11 |
+
imageio-ffmpeg==0.4.9
|
12 |
+
box2d-py==2.3.5
|
13 |
+
swig==4.*
|
uv.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|