Q-Table / index.html
arad1367's picture
Update index.html
b18c436 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Simple Q-Learning Grid World Simulation</title>
<style>
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
}
.grid {
display: grid;
grid-template-columns: repeat(4, 80px);
grid-template-rows: repeat(4, 80px);
gap: 2px;
margin: 20px 0;
}
.cell {
width: 80px;
height: 80px;
border: 1px solid #ccc;
display: flex;
align-items: center;
justify-content: center;
position: relative;
}
.agent {
width: 30px;
height: 30px;
background-color: blue;
border-radius: 50%;
position: absolute;
}
.goal {
background-color: green;
color: white;
}
.obstacle {
background-color: gray;
}
.controls {
margin: 20px 0;
}
button {
padding: 8px 16px;
margin-right: 10px;
cursor: pointer;
}
.info {
margin: 20px 0;
padding: 10px;
background-color: #f0f0f0;
border-radius: 5px;
}
.parameters {
display: grid;
grid-template-columns: auto 1fr auto;
gap: 10px;
align-items: center;
margin-bottom: 10px;
}
table {
border-collapse: collapse;
margin-top: 20px;
width: 100%;
}
th,
td {
border: 1px solid #ddd;
padding: 8px;
text-align: center;
}
.chart {
width: 100%;
height: 200px;
margin-top: 20px;
}
.signature {
text-align: center; /* Changed from 'right' to 'center' */
font-style: italic;
margin-top: 30px;
}
</style>
</head>
<body>
<h1>Simple Q-Learning Grid World Simulation - Designed by Pejman</h1>
<div class="info">
<p>
This simulation demonstrates Q-learning - a reinforcement learning
algorithm where an agent learns to navigate a grid world to reach a goal
while avoiding obstacles.
</p>
</div>
<div class="parameters">
<label for="alpha">Learning Rate (α):</label>
<input type="range" id="alpha" min="0.1" max="1" step="0.1" value="0.5" />
<span id="alpha-value">0.5</span>
<label for="gamma">Discount Factor (γ):</label>
<input type="range" id="gamma" min="0.1" max="1" step="0.1" value="0.9" />
<span id="gamma-value">0.9</span>
<label for="epsilon">Exploration Rate (ε):</label>
<input type="range" id="epsilon" min="0" max="1" step="0.1" value="0.3" />
<span id="epsilon-value">0.3</span>
</div>
<div class="controls">
<button id="step-btn">Step</button>
<button id="train-btn">Train Episode</button>
<button id="auto-btn">Auto Train</button>
<button id="stop-btn" disabled>Stop</button>
<button id="reset-btn">Reset</button>
</div>
<div class="info" id="status">Episode: 1 | Step: 0 | Total Reward: 0</div>
<div class="grid" id="grid"></div>
<h2>Q-Table</h2>
<div id="q-table"></div>
<h2>Learning Progress</h2>
<canvas id="chart" class="chart"></canvas>
<div class="signature">
*© 2025 Pejman Ebrahimi - Basic Q-Learning Simulation*
</div>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script>
// Grid setup
const grid = document.getElementById("grid");
const gridSize = 4;
let agentPos = { x: 0, y: 0 };
const goalPos = { x: 3, y: 3 };
const obstacles = [
{ x: 1, y: 1 },
{ x: 2, y: 1 },
{ x: 1, y: 2 },
];
// Learning parameters
let alpha = 0.5;
let gamma = 0.9;
let epsilon = 0.3;
let qTable = {};
// Training variables
let episode = 1;
let step = 0;
let totalReward = 0;
let rewards = [];
let running = false;
// Actions
const actions = ["up", "right", "down", "left"];
// Initialize grid
function createGrid() {
grid.innerHTML = "";
for (let y = 0; y < gridSize; y++) {
for (let x = 0; x < gridSize; x++) {
const cell = document.createElement("div");
cell.className = "cell";
cell.id = `cell-${x}-${y}`;
if (x === goalPos.x && y === goalPos.y) {
cell.classList.add("goal");
cell.textContent = "GOAL";
} else if (obstacles.some((o) => o.x === x && o.y === y)) {
cell.classList.add("obstacle");
}
grid.appendChild(cell);
}
}
updateAgentPosition();
}
// Update agent position
function updateAgentPosition() {
const agent = document.querySelector(".agent");
if (agent) agent.remove();
const cell = document.getElementById(
`cell-${agentPos.x}-${agentPos.y}`
);
const agentElement = document.createElement("div");
agentElement.className = "agent";
cell.appendChild(agentElement);
}
// Initialize Q-Table
function initQTable() {
qTable = {};
for (let y = 0; y < gridSize; y++) {
for (let x = 0; x < gridSize; x++) {
if (obstacles.some((o) => o.x === x && o.y === y)) continue;
qTable[`${x},${y}`] = {
up: 0,
right: 0,
down: 0,
left: 0,
};
}
}
updateQTableDisplay();
}
// Update Q-Table display
function updateQTableDisplay() {
const tableContainer = document.getElementById("q-table");
tableContainer.innerHTML = "";
const table = document.createElement("table");
// Create header row
const thead = document.createElement("thead");
const headerRow = document.createElement("tr");
headerRow.appendChild(document.createElement("th"));
for (let x = 0; x < gridSize; x++) {
const th = document.createElement("th");
th.textContent = x;
headerRow.appendChild(th);
}
thead.appendChild(headerRow);
table.appendChild(thead);
// Create table body
const tbody = document.createElement("tbody");
for (let y = 0; y < gridSize; y++) {
const row = document.createElement("tr");
const th = document.createElement("th");
th.textContent = y;
row.appendChild(th);
for (let x = 0; x < gridSize; x++) {
const cell = document.createElement("td");
if (obstacles.some((o) => o.x === x && o.y === y)) {
cell.textContent = "X";
cell.style.backgroundColor = "lightgray";
} else if (x === goalPos.x && y === goalPos.y) {
cell.textContent = "GOAL";
cell.style.backgroundColor = "lightgreen";
} else {
const state = `${x},${y}`;
const stateQ = qTable[state];
// Find best action
let bestAction = actions[0];
let bestValue = stateQ[bestAction];
for (const action of actions) {
if (stateQ[action] > bestValue) {
bestValue = stateQ[action];
bestAction = action;
}
}
let actionSymbol = "";
switch (bestAction) {
case "up":
actionSymbol = "↑";
break;
case "right":
actionSymbol = "→";
break;
case "down":
actionSymbol = "↓";
break;
case "left":
actionSymbol = "←";
break;
}
cell.textContent = `${actionSymbol} (${bestValue.toFixed(1)})`;
// Color based on value
const normalizedValue = Math.max(
0,
Math.min(1, (bestValue + 5) / 10)
);
cell.style.backgroundColor = `rgba(0, 128, 0, ${
normalizedValue * 0.5
})`;
}
row.appendChild(cell);
}
tbody.appendChild(row);
}
table.appendChild(tbody);
tableContainer.appendChild(table);
}
// Choose action using epsilon-greedy policy
function chooseAction() {
const state = `${agentPos.x},${agentPos.y}`;
const validActions = getValidActions();
// Exploration
if (Math.random() < epsilon) {
return validActions[Math.floor(Math.random() * validActions.length)];
}
// Exploitation
const stateQ = qTable[state];
let bestAction = validActions[0];
let bestValue = stateQ[bestAction];
for (const action of validActions) {
if (stateQ[action] > bestValue) {
bestValue = stateQ[action];
bestAction = action;
}
}
return bestAction;
}
// Get valid actions for current state
function getValidActions() {
const validActions = [];
// Check up
if (agentPos.y > 0 && !isObstacle(agentPos.x, agentPos.y - 1)) {
validActions.push("up");
}
// Check right
if (
agentPos.x < gridSize - 1 &&
!isObstacle(agentPos.x + 1, agentPos.y)
) {
validActions.push("right");
}
// Check down
if (
agentPos.y < gridSize - 1 &&
!isObstacle(agentPos.x, agentPos.y + 1)
) {
validActions.push("down");
}
// Check left
if (agentPos.x > 0 && !isObstacle(agentPos.x - 1, agentPos.y)) {
validActions.push("left");
}
return validActions;
}
// Check if position is an obstacle
function isObstacle(x, y) {
return obstacles.some((o) => o.x === x && o.y === y);
}
// Take action and get reward
function takeAction(action) {
const oldPos = { ...agentPos };
// Update position based on action
switch (action) {
case "up":
agentPos.y = Math.max(0, agentPos.y - 1);
break;
case "right":
agentPos.x = Math.min(gridSize - 1, agentPos.x + 1);
break;
case "down":
agentPos.y = Math.min(gridSize - 1, agentPos.y + 1);
break;
case "left":
agentPos.x = Math.max(0, agentPos.x - 1);
break;
}
// Check if position is valid
if (isObstacle(agentPos.x, agentPos.y)) {
agentPos = oldPos;
return -10; // Hitting obstacle penalty
}
// Calculate reward
if (agentPos.x === goalPos.x && agentPos.y === goalPos.y) {
return 10; // Goal reward
}
return -1; // Step penalty
}
// Update Q-value for state-action pair
function updateQValue(state, action, reward, nextState) {
const currQ = qTable[state][action];
// Find max Q-value for next state
const nextStateQ = qTable[nextState];
const maxNextQ = Math.max(...Object.values(nextStateQ));
// Q-learning formula
const newQ = currQ + alpha * (reward + gamma * maxNextQ - currQ);
qTable[state][action] = newQ;
}
// Perform one training step
function performStep() {
const state = `${agentPos.x},${agentPos.y}`;
const action = chooseAction();
const reward = takeAction(action);
updateAgentPosition();
const nextState = `${agentPos.x},${agentPos.y}`;
updateQValue(state, action, reward, nextState);
step++;
totalReward += reward;
document.getElementById(
"status"
).textContent = `Episode: ${episode} | Step: ${step} | Total Reward: ${totalReward}`;
updateQTableDisplay();
// Check if episode is done
if (agentPos.x === goalPos.x && agentPos.y === goalPos.y) {
rewards.push(totalReward);
// Update chart
chart.data.labels.push(episode);
chart.data.datasets[0].data.push(totalReward);
chart.update();
// Start new episode
episode++;
resetAgentPosition();
return true; // Episode completed
}
return false; // Episode not completed
}
// Train a complete episode
function trainEpisode() {
let episodeDone = false;
while (!episodeDone) {
episodeDone = performStep();
}
}
// Auto-train function
function autoTrain() {
if (!running) return;
const episodeDone = performStep();
if (episodeDone) {
setTimeout(autoTrain, 200);
} else {
requestAnimationFrame(autoTrain);
}
}
// Reset agent position
function resetAgentPosition() {
agentPos = { x: 0, y: 0 };
updateAgentPosition();
step = 0;
totalReward = 0;
document.getElementById(
"status"
).textContent = `Episode: ${episode} | Step: ${step} | Total Reward: ${totalReward}`;
}
// Reset environment
function resetEnvironment() {
agentPos = { x: 0, y: 0 };
updateAgentPosition();
initQTable();
episode = 1;
step = 0;
totalReward = 0;
rewards = [];
document.getElementById(
"status"
).textContent = `Episode: ${episode} | Step: ${step} | Total Reward: ${totalReward}`;
// Reset chart
chart.data.labels = [];
chart.data.datasets[0].data = [];
chart.update();
}
// Initialize chart
const ctx = document.getElementById("chart").getContext("2d");
const chart = new Chart(ctx, {
type: "line",
data: {
labels: [],
datasets: [
{
label: "Total Reward",
data: [],
borderColor: "blue",
backgroundColor: "rgba(0, 0, 255, 0.1)",
tension: 0.1,
fill: true,
},
],
},
options: {
responsive: true,
scales: {
y: {
beginAtZero: false,
},
},
},
});
// Event listeners
document
.getElementById("step-btn")
.addEventListener("click", performStep);
document
.getElementById("train-btn")
.addEventListener("click", trainEpisode);
document
.getElementById("auto-btn")
.addEventListener("click", function () {
running = true;
this.disabled = true;
document.getElementById("stop-btn").disabled = false;
autoTrain();
});
document
.getElementById("stop-btn")
.addEventListener("click", function () {
running = false;
this.disabled = true;
document.getElementById("auto-btn").disabled = false;
});
document
.getElementById("reset-btn")
.addEventListener("click", resetEnvironment);
document.getElementById("alpha").addEventListener("input", function () {
alpha = parseFloat(this.value);
document.getElementById("alpha-value").textContent = alpha.toFixed(1);
});
document.getElementById("gamma").addEventListener("input", function () {
gamma = parseFloat(this.value);
document.getElementById("gamma-value").textContent = gamma.toFixed(1);
});
document.getElementById("epsilon").addEventListener("input", function () {
epsilon = parseFloat(this.value);
document.getElementById("epsilon-value").textContent =
epsilon.toFixed(1);
});
// Initialize environment
createGrid();
initQTable();
</script>
</body>
</html>