arad1367 commited on
Commit
b18c436
·
verified ·
1 Parent(s): 5779997

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +577 -18
index.html CHANGED
@@ -1,19 +1,578 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  </html>
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Simple Q-Learning Grid World Simulation</title>
7
+ <style>
8
+ body {
9
+ font-family: Arial, sans-serif;
10
+ max-width: 800px;
11
+ margin: 0 auto;
12
+ padding: 20px;
13
+ }
14
+ .grid {
15
+ display: grid;
16
+ grid-template-columns: repeat(4, 80px);
17
+ grid-template-rows: repeat(4, 80px);
18
+ gap: 2px;
19
+ margin: 20px 0;
20
+ }
21
+ .cell {
22
+ width: 80px;
23
+ height: 80px;
24
+ border: 1px solid #ccc;
25
+ display: flex;
26
+ align-items: center;
27
+ justify-content: center;
28
+ position: relative;
29
+ }
30
+ .agent {
31
+ width: 30px;
32
+ height: 30px;
33
+ background-color: blue;
34
+ border-radius: 50%;
35
+ position: absolute;
36
+ }
37
+ .goal {
38
+ background-color: green;
39
+ color: white;
40
+ }
41
+ .obstacle {
42
+ background-color: gray;
43
+ }
44
+ .controls {
45
+ margin: 20px 0;
46
+ }
47
+ button {
48
+ padding: 8px 16px;
49
+ margin-right: 10px;
50
+ cursor: pointer;
51
+ }
52
+ .info {
53
+ margin: 20px 0;
54
+ padding: 10px;
55
+ background-color: #f0f0f0;
56
+ border-radius: 5px;
57
+ }
58
+ .parameters {
59
+ display: grid;
60
+ grid-template-columns: auto 1fr auto;
61
+ gap: 10px;
62
+ align-items: center;
63
+ margin-bottom: 10px;
64
+ }
65
+ table {
66
+ border-collapse: collapse;
67
+ margin-top: 20px;
68
+ width: 100%;
69
+ }
70
+ th,
71
+ td {
72
+ border: 1px solid #ddd;
73
+ padding: 8px;
74
+ text-align: center;
75
+ }
76
+ .chart {
77
+ width: 100%;
78
+ height: 200px;
79
+ margin-top: 20px;
80
+ }
81
+ .signature {
82
+ text-align: center; /* Changed from 'right' to 'center' */
83
+ font-style: italic;
84
+ margin-top: 30px;
85
+ }
86
+ </style>
87
+ </head>
88
+ <body>
89
+ <h1>Simple Q-Learning Grid World Simulation - Designed by Pejman</h1>
90
+
91
+ <div class="info">
92
+ <p>
93
+ This simulation demonstrates Q-learning - a reinforcement learning
94
+ algorithm where an agent learns to navigate a grid world to reach a goal
95
+ while avoiding obstacles.
96
+ </p>
97
+ </div>
98
+
99
+ <div class="parameters">
100
+ <label for="alpha">Learning Rate (α):</label>
101
+ <input type="range" id="alpha" min="0.1" max="1" step="0.1" value="0.5" />
102
+ <span id="alpha-value">0.5</span>
103
+
104
+ <label for="gamma">Discount Factor (γ):</label>
105
+ <input type="range" id="gamma" min="0.1" max="1" step="0.1" value="0.9" />
106
+ <span id="gamma-value">0.9</span>
107
+
108
+ <label for="epsilon">Exploration Rate (ε):</label>
109
+ <input type="range" id="epsilon" min="0" max="1" step="0.1" value="0.3" />
110
+ <span id="epsilon-value">0.3</span>
111
+ </div>
112
+
113
+ <div class="controls">
114
+ <button id="step-btn">Step</button>
115
+ <button id="train-btn">Train Episode</button>
116
+ <button id="auto-btn">Auto Train</button>
117
+ <button id="stop-btn" disabled>Stop</button>
118
+ <button id="reset-btn">Reset</button>
119
+ </div>
120
+
121
+ <div class="info" id="status">Episode: 1 | Step: 0 | Total Reward: 0</div>
122
+
123
+ <div class="grid" id="grid"></div>
124
+
125
+ <h2>Q-Table</h2>
126
+ <div id="q-table"></div>
127
+
128
+ <h2>Learning Progress</h2>
129
+ <canvas id="chart" class="chart"></canvas>
130
+
131
+ <div class="signature">
132
+ *© 2025 Pejman Ebrahimi - Basic Q-Learning Simulation*
133
+ </div>
134
+
135
+ <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
136
+ <script>
137
+ // Grid setup
138
+ const grid = document.getElementById("grid");
139
+ const gridSize = 4;
140
+ let agentPos = { x: 0, y: 0 };
141
+ const goalPos = { x: 3, y: 3 };
142
+ const obstacles = [
143
+ { x: 1, y: 1 },
144
+ { x: 2, y: 1 },
145
+ { x: 1, y: 2 },
146
+ ];
147
+
148
+ // Learning parameters
149
+ let alpha = 0.5;
150
+ let gamma = 0.9;
151
+ let epsilon = 0.3;
152
+ let qTable = {};
153
+
154
+ // Training variables
155
+ let episode = 1;
156
+ let step = 0;
157
+ let totalReward = 0;
158
+ let rewards = [];
159
+ let running = false;
160
+
161
+ // Actions
162
+ const actions = ["up", "right", "down", "left"];
163
+
164
+ // Initialize grid
165
+ function createGrid() {
166
+ grid.innerHTML = "";
167
+ for (let y = 0; y < gridSize; y++) {
168
+ for (let x = 0; x < gridSize; x++) {
169
+ const cell = document.createElement("div");
170
+ cell.className = "cell";
171
+ cell.id = `cell-${x}-${y}`;
172
+
173
+ if (x === goalPos.x && y === goalPos.y) {
174
+ cell.classList.add("goal");
175
+ cell.textContent = "GOAL";
176
+ } else if (obstacles.some((o) => o.x === x && o.y === y)) {
177
+ cell.classList.add("obstacle");
178
+ }
179
+
180
+ grid.appendChild(cell);
181
+ }
182
+ }
183
+ updateAgentPosition();
184
+ }
185
+
186
+ // Update agent position
187
+ function updateAgentPosition() {
188
+ const agent = document.querySelector(".agent");
189
+ if (agent) agent.remove();
190
+
191
+ const cell = document.getElementById(
192
+ `cell-${agentPos.x}-${agentPos.y}`
193
+ );
194
+ const agentElement = document.createElement("div");
195
+ agentElement.className = "agent";
196
+ cell.appendChild(agentElement);
197
+ }
198
+
199
+ // Initialize Q-Table
200
+ function initQTable() {
201
+ qTable = {};
202
+ for (let y = 0; y < gridSize; y++) {
203
+ for (let x = 0; x < gridSize; x++) {
204
+ if (obstacles.some((o) => o.x === x && o.y === y)) continue;
205
+ qTable[`${x},${y}`] = {
206
+ up: 0,
207
+ right: 0,
208
+ down: 0,
209
+ left: 0,
210
+ };
211
+ }
212
+ }
213
+ updateQTableDisplay();
214
+ }
215
+
216
+ // Update Q-Table display
217
+ function updateQTableDisplay() {
218
+ const tableContainer = document.getElementById("q-table");
219
+ tableContainer.innerHTML = "";
220
+
221
+ const table = document.createElement("table");
222
+
223
+ // Create header row
224
+ const thead = document.createElement("thead");
225
+ const headerRow = document.createElement("tr");
226
+ headerRow.appendChild(document.createElement("th"));
227
+ for (let x = 0; x < gridSize; x++) {
228
+ const th = document.createElement("th");
229
+ th.textContent = x;
230
+ headerRow.appendChild(th);
231
+ }
232
+ thead.appendChild(headerRow);
233
+ table.appendChild(thead);
234
+
235
+ // Create table body
236
+ const tbody = document.createElement("tbody");
237
+ for (let y = 0; y < gridSize; y++) {
238
+ const row = document.createElement("tr");
239
+
240
+ const th = document.createElement("th");
241
+ th.textContent = y;
242
+ row.appendChild(th);
243
+
244
+ for (let x = 0; x < gridSize; x++) {
245
+ const cell = document.createElement("td");
246
+
247
+ if (obstacles.some((o) => o.x === x && o.y === y)) {
248
+ cell.textContent = "X";
249
+ cell.style.backgroundColor = "lightgray";
250
+ } else if (x === goalPos.x && y === goalPos.y) {
251
+ cell.textContent = "GOAL";
252
+ cell.style.backgroundColor = "lightgreen";
253
+ } else {
254
+ const state = `${x},${y}`;
255
+ const stateQ = qTable[state];
256
+
257
+ // Find best action
258
+ let bestAction = actions[0];
259
+ let bestValue = stateQ[bestAction];
260
+ for (const action of actions) {
261
+ if (stateQ[action] > bestValue) {
262
+ bestValue = stateQ[action];
263
+ bestAction = action;
264
+ }
265
+ }
266
+
267
+ let actionSymbol = "";
268
+ switch (bestAction) {
269
+ case "up":
270
+ actionSymbol = "↑";
271
+ break;
272
+ case "right":
273
+ actionSymbol = "→";
274
+ break;
275
+ case "down":
276
+ actionSymbol = "↓";
277
+ break;
278
+ case "left":
279
+ actionSymbol = "←";
280
+ break;
281
+ }
282
+
283
+ cell.textContent = `${actionSymbol} (${bestValue.toFixed(1)})`;
284
+
285
+ // Color based on value
286
+ const normalizedValue = Math.max(
287
+ 0,
288
+ Math.min(1, (bestValue + 5) / 10)
289
+ );
290
+ cell.style.backgroundColor = `rgba(0, 128, 0, ${
291
+ normalizedValue * 0.5
292
+ })`;
293
+ }
294
+
295
+ row.appendChild(cell);
296
+ }
297
+
298
+ tbody.appendChild(row);
299
+ }
300
+ table.appendChild(tbody);
301
+ tableContainer.appendChild(table);
302
+ }
303
+
304
+ // Choose action using epsilon-greedy policy
305
+ function chooseAction() {
306
+ const state = `${agentPos.x},${agentPos.y}`;
307
+ const validActions = getValidActions();
308
+
309
+ // Exploration
310
+ if (Math.random() < epsilon) {
311
+ return validActions[Math.floor(Math.random() * validActions.length)];
312
+ }
313
+
314
+ // Exploitation
315
+ const stateQ = qTable[state];
316
+ let bestAction = validActions[0];
317
+ let bestValue = stateQ[bestAction];
318
+
319
+ for (const action of validActions) {
320
+ if (stateQ[action] > bestValue) {
321
+ bestValue = stateQ[action];
322
+ bestAction = action;
323
+ }
324
+ }
325
+
326
+ return bestAction;
327
+ }
328
+
329
+ // Get valid actions for current state
330
+ function getValidActions() {
331
+ const validActions = [];
332
+
333
+ // Check up
334
+ if (agentPos.y > 0 && !isObstacle(agentPos.x, agentPos.y - 1)) {
335
+ validActions.push("up");
336
+ }
337
+
338
+ // Check right
339
+ if (
340
+ agentPos.x < gridSize - 1 &&
341
+ !isObstacle(agentPos.x + 1, agentPos.y)
342
+ ) {
343
+ validActions.push("right");
344
+ }
345
+
346
+ // Check down
347
+ if (
348
+ agentPos.y < gridSize - 1 &&
349
+ !isObstacle(agentPos.x, agentPos.y + 1)
350
+ ) {
351
+ validActions.push("down");
352
+ }
353
+
354
+ // Check left
355
+ if (agentPos.x > 0 && !isObstacle(agentPos.x - 1, agentPos.y)) {
356
+ validActions.push("left");
357
+ }
358
+
359
+ return validActions;
360
+ }
361
+
362
+ // Check if position is an obstacle
363
+ function isObstacle(x, y) {
364
+ return obstacles.some((o) => o.x === x && o.y === y);
365
+ }
366
+
367
+ // Take action and get reward
368
+ function takeAction(action) {
369
+ const oldPos = { ...agentPos };
370
+
371
+ // Update position based on action
372
+ switch (action) {
373
+ case "up":
374
+ agentPos.y = Math.max(0, agentPos.y - 1);
375
+ break;
376
+ case "right":
377
+ agentPos.x = Math.min(gridSize - 1, agentPos.x + 1);
378
+ break;
379
+ case "down":
380
+ agentPos.y = Math.min(gridSize - 1, agentPos.y + 1);
381
+ break;
382
+ case "left":
383
+ agentPos.x = Math.max(0, agentPos.x - 1);
384
+ break;
385
+ }
386
+
387
+ // Check if position is valid
388
+ if (isObstacle(agentPos.x, agentPos.y)) {
389
+ agentPos = oldPos;
390
+ return -10; // Hitting obstacle penalty
391
+ }
392
+
393
+ // Calculate reward
394
+ if (agentPos.x === goalPos.x && agentPos.y === goalPos.y) {
395
+ return 10; // Goal reward
396
+ }
397
+
398
+ return -1; // Step penalty
399
+ }
400
+
401
+ // Update Q-value for state-action pair
402
+ function updateQValue(state, action, reward, nextState) {
403
+ const currQ = qTable[state][action];
404
+
405
+ // Find max Q-value for next state
406
+ const nextStateQ = qTable[nextState];
407
+ const maxNextQ = Math.max(...Object.values(nextStateQ));
408
+
409
+ // Q-learning formula
410
+ const newQ = currQ + alpha * (reward + gamma * maxNextQ - currQ);
411
+ qTable[state][action] = newQ;
412
+ }
413
+
414
+ // Perform one training step
415
+ function performStep() {
416
+ const state = `${agentPos.x},${agentPos.y}`;
417
+ const action = chooseAction();
418
+ const reward = takeAction(action);
419
+ updateAgentPosition();
420
+
421
+ const nextState = `${agentPos.x},${agentPos.y}`;
422
+ updateQValue(state, action, reward, nextState);
423
+
424
+ step++;
425
+ totalReward += reward;
426
+ document.getElementById(
427
+ "status"
428
+ ).textContent = `Episode: ${episode} | Step: ${step} | Total Reward: ${totalReward}`;
429
+
430
+ updateQTableDisplay();
431
+
432
+ // Check if episode is done
433
+ if (agentPos.x === goalPos.x && agentPos.y === goalPos.y) {
434
+ rewards.push(totalReward);
435
+
436
+ // Update chart
437
+ chart.data.labels.push(episode);
438
+ chart.data.datasets[0].data.push(totalReward);
439
+ chart.update();
440
+
441
+ // Start new episode
442
+ episode++;
443
+ resetAgentPosition();
444
+ return true; // Episode completed
445
+ }
446
+
447
+ return false; // Episode not completed
448
+ }
449
+
450
+ // Train a complete episode
451
+ function trainEpisode() {
452
+ let episodeDone = false;
453
+ while (!episodeDone) {
454
+ episodeDone = performStep();
455
+ }
456
+ }
457
+
458
+ // Auto-train function
459
+ function autoTrain() {
460
+ if (!running) return;
461
+
462
+ const episodeDone = performStep();
463
+ if (episodeDone) {
464
+ setTimeout(autoTrain, 200);
465
+ } else {
466
+ requestAnimationFrame(autoTrain);
467
+ }
468
+ }
469
+
470
+ // Reset agent position
471
+ function resetAgentPosition() {
472
+ agentPos = { x: 0, y: 0 };
473
+ updateAgentPosition();
474
+ step = 0;
475
+ totalReward = 0;
476
+ document.getElementById(
477
+ "status"
478
+ ).textContent = `Episode: ${episode} | Step: ${step} | Total Reward: ${totalReward}`;
479
+ }
480
+
481
+ // Reset environment
482
+ function resetEnvironment() {
483
+ agentPos = { x: 0, y: 0 };
484
+ updateAgentPosition();
485
+ initQTable();
486
+ episode = 1;
487
+ step = 0;
488
+ totalReward = 0;
489
+ rewards = [];
490
+
491
+ document.getElementById(
492
+ "status"
493
+ ).textContent = `Episode: ${episode} | Step: ${step} | Total Reward: ${totalReward}`;
494
+
495
+ // Reset chart
496
+ chart.data.labels = [];
497
+ chart.data.datasets[0].data = [];
498
+ chart.update();
499
+ }
500
+
501
+ // Initialize chart
502
+ const ctx = document.getElementById("chart").getContext("2d");
503
+ const chart = new Chart(ctx, {
504
+ type: "line",
505
+ data: {
506
+ labels: [],
507
+ datasets: [
508
+ {
509
+ label: "Total Reward",
510
+ data: [],
511
+ borderColor: "blue",
512
+ backgroundColor: "rgba(0, 0, 255, 0.1)",
513
+ tension: 0.1,
514
+ fill: true,
515
+ },
516
+ ],
517
+ },
518
+ options: {
519
+ responsive: true,
520
+ scales: {
521
+ y: {
522
+ beginAtZero: false,
523
+ },
524
+ },
525
+ },
526
+ });
527
+
528
+ // Event listeners
529
+ document
530
+ .getElementById("step-btn")
531
+ .addEventListener("click", performStep);
532
+ document
533
+ .getElementById("train-btn")
534
+ .addEventListener("click", trainEpisode);
535
+
536
+ document
537
+ .getElementById("auto-btn")
538
+ .addEventListener("click", function () {
539
+ running = true;
540
+ this.disabled = true;
541
+ document.getElementById("stop-btn").disabled = false;
542
+ autoTrain();
543
+ });
544
+
545
+ document
546
+ .getElementById("stop-btn")
547
+ .addEventListener("click", function () {
548
+ running = false;
549
+ this.disabled = true;
550
+ document.getElementById("auto-btn").disabled = false;
551
+ });
552
+
553
+ document
554
+ .getElementById("reset-btn")
555
+ .addEventListener("click", resetEnvironment);
556
+
557
+ document.getElementById("alpha").addEventListener("input", function () {
558
+ alpha = parseFloat(this.value);
559
+ document.getElementById("alpha-value").textContent = alpha.toFixed(1);
560
+ });
561
+
562
+ document.getElementById("gamma").addEventListener("input", function () {
563
+ gamma = parseFloat(this.value);
564
+ document.getElementById("gamma-value").textContent = gamma.toFixed(1);
565
+ });
566
+
567
+ document.getElementById("epsilon").addEventListener("input", function () {
568
+ epsilon = parseFloat(this.value);
569
+ document.getElementById("epsilon-value").textContent =
570
+ epsilon.toFixed(1);
571
+ });
572
+
573
+ // Initialize environment
574
+ createGrid();
575
+ initQTable();
576
+ </script>
577
+ </body>
578
  </html>