|
<!DOCTYPE html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8" /> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> |
|
<title>Simple Q-Learning Grid World Simulation</title> |
|
<style> |
|
body { |
|
font-family: Arial, sans-serif; |
|
max-width: 800px; |
|
margin: 0 auto; |
|
padding: 20px; |
|
} |
|
.grid { |
|
display: grid; |
|
grid-template-columns: repeat(4, 80px); |
|
grid-template-rows: repeat(4, 80px); |
|
gap: 2px; |
|
margin: 20px 0; |
|
} |
|
.cell { |
|
width: 80px; |
|
height: 80px; |
|
border: 1px solid #ccc; |
|
display: flex; |
|
align-items: center; |
|
justify-content: center; |
|
position: relative; |
|
} |
|
.agent { |
|
width: 30px; |
|
height: 30px; |
|
background-color: blue; |
|
border-radius: 50%; |
|
position: absolute; |
|
} |
|
.goal { |
|
background-color: green; |
|
color: white; |
|
} |
|
.obstacle { |
|
background-color: gray; |
|
} |
|
.controls { |
|
margin: 20px 0; |
|
} |
|
button { |
|
padding: 8px 16px; |
|
margin-right: 10px; |
|
cursor: pointer; |
|
} |
|
.info { |
|
margin: 20px 0; |
|
padding: 10px; |
|
background-color: #f0f0f0; |
|
border-radius: 5px; |
|
} |
|
.parameters { |
|
display: grid; |
|
grid-template-columns: auto 1fr auto; |
|
gap: 10px; |
|
align-items: center; |
|
margin-bottom: 10px; |
|
} |
|
table { |
|
border-collapse: collapse; |
|
margin-top: 20px; |
|
width: 100%; |
|
} |
|
th, |
|
td { |
|
border: 1px solid #ddd; |
|
padding: 8px; |
|
text-align: center; |
|
} |
|
.chart { |
|
width: 100%; |
|
height: 200px; |
|
margin-top: 20px; |
|
} |
|
.signature { |
|
text-align: center; |
|
font-style: italic; |
|
margin-top: 30px; |
|
} |
|
</style> |
|
</head> |
|
<body> |
|
<h1>Simple Q-Learning Grid World Simulation - Designed by Pejman</h1> |
|
|
|
<div class="info"> |
|
<p> |
|
This simulation demonstrates Q-learning - a reinforcement learning |
|
algorithm where an agent learns to navigate a grid world to reach a goal |
|
while avoiding obstacles. |
|
</p> |
|
</div> |
|
|
|
<div class="parameters"> |
|
<label for="alpha">Learning Rate (α):</label> |
|
<input type="range" id="alpha" min="0.1" max="1" step="0.1" value="0.5" /> |
|
<span id="alpha-value">0.5</span> |
|
|
|
<label for="gamma">Discount Factor (γ):</label> |
|
<input type="range" id="gamma" min="0.1" max="1" step="0.1" value="0.9" /> |
|
<span id="gamma-value">0.9</span> |
|
|
|
<label for="epsilon">Exploration Rate (ε):</label> |
|
<input type="range" id="epsilon" min="0" max="1" step="0.1" value="0.3" /> |
|
<span id="epsilon-value">0.3</span> |
|
</div> |
|
|
|
<div class="controls"> |
|
<button id="step-btn">Step</button> |
|
<button id="train-btn">Train Episode</button> |
|
<button id="auto-btn">Auto Train</button> |
|
<button id="stop-btn" disabled>Stop</button> |
|
<button id="reset-btn">Reset</button> |
|
</div> |
|
|
|
<div class="info" id="status">Episode: 1 | Step: 0 | Total Reward: 0</div> |
|
|
|
<div class="grid" id="grid"></div> |
|
|
|
<h2>Q-Table</h2> |
|
<div id="q-table"></div> |
|
|
|
<h2>Learning Progress</h2> |
|
<canvas id="chart" class="chart"></canvas> |
|
|
|
<div class="signature"> |
|
*© 2025 Pejman Ebrahimi - Basic Q-Learning Simulation* |
|
</div> |
|
|
|
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script> |
|
<script> |
|
|
|
const grid = document.getElementById("grid"); |
|
const gridSize = 4; |
|
let agentPos = { x: 0, y: 0 }; |
|
const goalPos = { x: 3, y: 3 }; |
|
const obstacles = [ |
|
{ x: 1, y: 1 }, |
|
{ x: 2, y: 1 }, |
|
{ x: 1, y: 2 }, |
|
]; |
|
|
|
|
|
let alpha = 0.5; |
|
let gamma = 0.9; |
|
let epsilon = 0.3; |
|
let qTable = {}; |
|
|
|
|
|
let episode = 1; |
|
let step = 0; |
|
let totalReward = 0; |
|
let rewards = []; |
|
let running = false; |
|
|
|
|
|
const actions = ["up", "right", "down", "left"]; |
|
|
|
|
|
function createGrid() { |
|
grid.innerHTML = ""; |
|
for (let y = 0; y < gridSize; y++) { |
|
for (let x = 0; x < gridSize; x++) { |
|
const cell = document.createElement("div"); |
|
cell.className = "cell"; |
|
cell.id = `cell-${x}-${y}`; |
|
|
|
if (x === goalPos.x && y === goalPos.y) { |
|
cell.classList.add("goal"); |
|
cell.textContent = "GOAL"; |
|
} else if (obstacles.some((o) => o.x === x && o.y === y)) { |
|
cell.classList.add("obstacle"); |
|
} |
|
|
|
grid.appendChild(cell); |
|
} |
|
} |
|
updateAgentPosition(); |
|
} |
|
|
|
|
|
function updateAgentPosition() { |
|
const agent = document.querySelector(".agent"); |
|
if (agent) agent.remove(); |
|
|
|
const cell = document.getElementById( |
|
`cell-${agentPos.x}-${agentPos.y}` |
|
); |
|
const agentElement = document.createElement("div"); |
|
agentElement.className = "agent"; |
|
cell.appendChild(agentElement); |
|
} |
|
|
|
|
|
function initQTable() { |
|
qTable = {}; |
|
for (let y = 0; y < gridSize; y++) { |
|
for (let x = 0; x < gridSize; x++) { |
|
if (obstacles.some((o) => o.x === x && o.y === y)) continue; |
|
qTable[`${x},${y}`] = { |
|
up: 0, |
|
right: 0, |
|
down: 0, |
|
left: 0, |
|
}; |
|
} |
|
} |
|
updateQTableDisplay(); |
|
} |
|
|
|
|
|
function updateQTableDisplay() { |
|
const tableContainer = document.getElementById("q-table"); |
|
tableContainer.innerHTML = ""; |
|
|
|
const table = document.createElement("table"); |
|
|
|
|
|
const thead = document.createElement("thead"); |
|
const headerRow = document.createElement("tr"); |
|
headerRow.appendChild(document.createElement("th")); |
|
for (let x = 0; x < gridSize; x++) { |
|
const th = document.createElement("th"); |
|
th.textContent = x; |
|
headerRow.appendChild(th); |
|
} |
|
thead.appendChild(headerRow); |
|
table.appendChild(thead); |
|
|
|
|
|
const tbody = document.createElement("tbody"); |
|
for (let y = 0; y < gridSize; y++) { |
|
const row = document.createElement("tr"); |
|
|
|
const th = document.createElement("th"); |
|
th.textContent = y; |
|
row.appendChild(th); |
|
|
|
for (let x = 0; x < gridSize; x++) { |
|
const cell = document.createElement("td"); |
|
|
|
if (obstacles.some((o) => o.x === x && o.y === y)) { |
|
cell.textContent = "X"; |
|
cell.style.backgroundColor = "lightgray"; |
|
} else if (x === goalPos.x && y === goalPos.y) { |
|
cell.textContent = "GOAL"; |
|
cell.style.backgroundColor = "lightgreen"; |
|
} else { |
|
const state = `${x},${y}`; |
|
const stateQ = qTable[state]; |
|
|
|
|
|
let bestAction = actions[0]; |
|
let bestValue = stateQ[bestAction]; |
|
for (const action of actions) { |
|
if (stateQ[action] > bestValue) { |
|
bestValue = stateQ[action]; |
|
bestAction = action; |
|
} |
|
} |
|
|
|
let actionSymbol = ""; |
|
switch (bestAction) { |
|
case "up": |
|
actionSymbol = "↑"; |
|
break; |
|
case "right": |
|
actionSymbol = "→"; |
|
break; |
|
case "down": |
|
actionSymbol = "↓"; |
|
break; |
|
case "left": |
|
actionSymbol = "←"; |
|
break; |
|
} |
|
|
|
cell.textContent = `${actionSymbol} (${bestValue.toFixed(1)})`; |
|
|
|
|
|
const normalizedValue = Math.max( |
|
0, |
|
Math.min(1, (bestValue + 5) / 10) |
|
); |
|
cell.style.backgroundColor = `rgba(0, 128, 0, ${ |
|
normalizedValue * 0.5 |
|
})`; |
|
} |
|
|
|
row.appendChild(cell); |
|
} |
|
|
|
tbody.appendChild(row); |
|
} |
|
table.appendChild(tbody); |
|
tableContainer.appendChild(table); |
|
} |
|
|
|
|
|
function chooseAction() { |
|
const state = `${agentPos.x},${agentPos.y}`; |
|
const validActions = getValidActions(); |
|
|
|
|
|
if (Math.random() < epsilon) { |
|
return validActions[Math.floor(Math.random() * validActions.length)]; |
|
} |
|
|
|
|
|
const stateQ = qTable[state]; |
|
let bestAction = validActions[0]; |
|
let bestValue = stateQ[bestAction]; |
|
|
|
for (const action of validActions) { |
|
if (stateQ[action] > bestValue) { |
|
bestValue = stateQ[action]; |
|
bestAction = action; |
|
} |
|
} |
|
|
|
return bestAction; |
|
} |
|
|
|
|
|
function getValidActions() { |
|
const validActions = []; |
|
|
|
|
|
if (agentPos.y > 0 && !isObstacle(agentPos.x, agentPos.y - 1)) { |
|
validActions.push("up"); |
|
} |
|
|
|
|
|
if ( |
|
agentPos.x < gridSize - 1 && |
|
!isObstacle(agentPos.x + 1, agentPos.y) |
|
) { |
|
validActions.push("right"); |
|
} |
|
|
|
|
|
if ( |
|
agentPos.y < gridSize - 1 && |
|
!isObstacle(agentPos.x, agentPos.y + 1) |
|
) { |
|
validActions.push("down"); |
|
} |
|
|
|
|
|
if (agentPos.x > 0 && !isObstacle(agentPos.x - 1, agentPos.y)) { |
|
validActions.push("left"); |
|
} |
|
|
|
return validActions; |
|
} |
|
|
|
|
|
function isObstacle(x, y) { |
|
return obstacles.some((o) => o.x === x && o.y === y); |
|
} |
|
|
|
|
|
function takeAction(action) { |
|
const oldPos = { ...agentPos }; |
|
|
|
|
|
switch (action) { |
|
case "up": |
|
agentPos.y = Math.max(0, agentPos.y - 1); |
|
break; |
|
case "right": |
|
agentPos.x = Math.min(gridSize - 1, agentPos.x + 1); |
|
break; |
|
case "down": |
|
agentPos.y = Math.min(gridSize - 1, agentPos.y + 1); |
|
break; |
|
case "left": |
|
agentPos.x = Math.max(0, agentPos.x - 1); |
|
break; |
|
} |
|
|
|
|
|
if (isObstacle(agentPos.x, agentPos.y)) { |
|
agentPos = oldPos; |
|
return -10; |
|
} |
|
|
|
|
|
if (agentPos.x === goalPos.x && agentPos.y === goalPos.y) { |
|
return 10; |
|
} |
|
|
|
return -1; |
|
} |
|
|
|
|
|
function updateQValue(state, action, reward, nextState) { |
|
const currQ = qTable[state][action]; |
|
|
|
|
|
const nextStateQ = qTable[nextState]; |
|
const maxNextQ = Math.max(...Object.values(nextStateQ)); |
|
|
|
|
|
const newQ = currQ + alpha * (reward + gamma * maxNextQ - currQ); |
|
qTable[state][action] = newQ; |
|
} |
|
|
|
|
|
function performStep() { |
|
const state = `${agentPos.x},${agentPos.y}`; |
|
const action = chooseAction(); |
|
const reward = takeAction(action); |
|
updateAgentPosition(); |
|
|
|
const nextState = `${agentPos.x},${agentPos.y}`; |
|
updateQValue(state, action, reward, nextState); |
|
|
|
step++; |
|
totalReward += reward; |
|
document.getElementById( |
|
"status" |
|
).textContent = `Episode: ${episode} | Step: ${step} | Total Reward: ${totalReward}`; |
|
|
|
updateQTableDisplay(); |
|
|
|
|
|
if (agentPos.x === goalPos.x && agentPos.y === goalPos.y) { |
|
rewards.push(totalReward); |
|
|
|
|
|
chart.data.labels.push(episode); |
|
chart.data.datasets[0].data.push(totalReward); |
|
chart.update(); |
|
|
|
|
|
episode++; |
|
resetAgentPosition(); |
|
return true; |
|
} |
|
|
|
return false; |
|
} |
|
|
|
|
|
function trainEpisode() { |
|
let episodeDone = false; |
|
while (!episodeDone) { |
|
episodeDone = performStep(); |
|
} |
|
} |
|
|
|
|
|
function autoTrain() { |
|
if (!running) return; |
|
|
|
const episodeDone = performStep(); |
|
if (episodeDone) { |
|
setTimeout(autoTrain, 200); |
|
} else { |
|
requestAnimationFrame(autoTrain); |
|
} |
|
} |
|
|
|
|
|
function resetAgentPosition() { |
|
agentPos = { x: 0, y: 0 }; |
|
updateAgentPosition(); |
|
step = 0; |
|
totalReward = 0; |
|
document.getElementById( |
|
"status" |
|
).textContent = `Episode: ${episode} | Step: ${step} | Total Reward: ${totalReward}`; |
|
} |
|
|
|
|
|
function resetEnvironment() { |
|
agentPos = { x: 0, y: 0 }; |
|
updateAgentPosition(); |
|
initQTable(); |
|
episode = 1; |
|
step = 0; |
|
totalReward = 0; |
|
rewards = []; |
|
|
|
document.getElementById( |
|
"status" |
|
).textContent = `Episode: ${episode} | Step: ${step} | Total Reward: ${totalReward}`; |
|
|
|
|
|
chart.data.labels = []; |
|
chart.data.datasets[0].data = []; |
|
chart.update(); |
|
} |
|
|
|
|
|
const ctx = document.getElementById("chart").getContext("2d"); |
|
const chart = new Chart(ctx, { |
|
type: "line", |
|
data: { |
|
labels: [], |
|
datasets: [ |
|
{ |
|
label: "Total Reward", |
|
data: [], |
|
borderColor: "blue", |
|
backgroundColor: "rgba(0, 0, 255, 0.1)", |
|
tension: 0.1, |
|
fill: true, |
|
}, |
|
], |
|
}, |
|
options: { |
|
responsive: true, |
|
scales: { |
|
y: { |
|
beginAtZero: false, |
|
}, |
|
}, |
|
}, |
|
}); |
|
|
|
|
|
document |
|
.getElementById("step-btn") |
|
.addEventListener("click", performStep); |
|
document |
|
.getElementById("train-btn") |
|
.addEventListener("click", trainEpisode); |
|
|
|
document |
|
.getElementById("auto-btn") |
|
.addEventListener("click", function () { |
|
running = true; |
|
this.disabled = true; |
|
document.getElementById("stop-btn").disabled = false; |
|
autoTrain(); |
|
}); |
|
|
|
document |
|
.getElementById("stop-btn") |
|
.addEventListener("click", function () { |
|
running = false; |
|
this.disabled = true; |
|
document.getElementById("auto-btn").disabled = false; |
|
}); |
|
|
|
document |
|
.getElementById("reset-btn") |
|
.addEventListener("click", resetEnvironment); |
|
|
|
document.getElementById("alpha").addEventListener("input", function () { |
|
alpha = parseFloat(this.value); |
|
document.getElementById("alpha-value").textContent = alpha.toFixed(1); |
|
}); |
|
|
|
document.getElementById("gamma").addEventListener("input", function () { |
|
gamma = parseFloat(this.value); |
|
document.getElementById("gamma-value").textContent = gamma.toFixed(1); |
|
}); |
|
|
|
document.getElementById("epsilon").addEventListener("input", function () { |
|
epsilon = parseFloat(this.value); |
|
document.getElementById("epsilon-value").textContent = |
|
epsilon.toFixed(1); |
|
}); |
|
|
|
|
|
createGrid(); |
|
initQTable(); |
|
</script> |
|
</body> |
|
</html> |
|
|