Spaces:
Running
Running
Update index.html
Browse files- index.html +47 -3
index.html
CHANGED
@@ -1253,10 +1253,43 @@
|
|
1253 |
}, duration);
|
1254 |
}
|
1255 |
|
|
|
1256 |
// Update leaderboard
|
1257 |
function updateLeaderboard() {
|
1258 |
-
//
|
1259 |
-
leaderboard.sort((a, b) =>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1260 |
|
1261 |
// Keep only top 5
|
1262 |
if (leaderboard.length > 5) {
|
@@ -1292,6 +1325,12 @@
|
|
1292 |
|
1293 |
const rewardCell = document.createElement("td");
|
1294 |
rewardCell.textContent = entry.reward.toFixed(1);
|
|
|
|
|
|
|
|
|
|
|
|
|
1295 |
row.appendChild(rewardCell);
|
1296 |
|
1297 |
leaderboardBody.appendChild(row);
|
@@ -1341,7 +1380,12 @@
|
|
1341 |
function getAction(state) {
|
1342 |
// In optimal mode, always choose best action
|
1343 |
if (isOptimalMode) {
|
1344 |
-
|
|
|
|
|
|
|
|
|
|
|
1345 |
}
|
1346 |
|
1347 |
// Exploration (random action)
|
|
|
1253 |
}, duration);
|
1254 |
}
|
1255 |
|
1256 |
+
// Update leaderboard
|
1257 |
// Update leaderboard
|
1258 |
function updateLeaderboard() {
|
1259 |
+
// First sort by positive vs negative reward, then by steps
|
1260 |
+
leaderboard.sort((a, b) => {
|
1261 |
+
// First, prioritize positive rewards over negative ones
|
1262 |
+
if (
|
1263 |
+
(a.reward > 0 && b.reward < 0) ||
|
1264 |
+
(a.reward >= 0 && b.reward < 0)
|
1265 |
+
) {
|
1266 |
+
return -1;
|
1267 |
+
}
|
1268 |
+
if (
|
1269 |
+
(a.reward < 0 && b.reward > 0) ||
|
1270 |
+
(a.reward < 0 && b.reward >= 0)
|
1271 |
+
) {
|
1272 |
+
return 1;
|
1273 |
+
}
|
1274 |
+
|
1275 |
+
// If both are positive, higher reward wins
|
1276 |
+
if (a.reward > 0 && b.reward > 0) {
|
1277 |
+
// If rewards are close, sort by steps
|
1278 |
+
if (Math.abs(a.reward - b.reward) < 1) {
|
1279 |
+
return a.steps - b.steps;
|
1280 |
+
}
|
1281 |
+
// Otherwise, higher reward wins
|
1282 |
+
return b.reward - a.reward;
|
1283 |
+
}
|
1284 |
+
|
1285 |
+
// If both are negative, less negative reward wins
|
1286 |
+
if (a.reward < 0 && b.reward < 0) {
|
1287 |
+
return b.reward - a.reward;
|
1288 |
+
}
|
1289 |
+
|
1290 |
+
// If both rewards are exactly the same, sort by steps
|
1291 |
+
return a.steps - b.steps;
|
1292 |
+
});
|
1293 |
|
1294 |
// Keep only top 5
|
1295 |
if (leaderboard.length > 5) {
|
|
|
1325 |
|
1326 |
const rewardCell = document.createElement("td");
|
1327 |
rewardCell.textContent = entry.reward.toFixed(1);
|
1328 |
+
// Add color to reward based on value
|
1329 |
+
if (entry.reward > 0) {
|
1330 |
+
rewardCell.style.color = "var(--success)";
|
1331 |
+
} else if (entry.reward < 0) {
|
1332 |
+
rewardCell.style.color = "var(--danger)";
|
1333 |
+
}
|
1334 |
row.appendChild(rewardCell);
|
1335 |
|
1336 |
leaderboardBody.appendChild(row);
|
|
|
1380 |
function getAction(state) {
|
1381 |
// In optimal mode, always choose best action
|
1382 |
if (isOptimalMode) {
|
1383 |
+
const maxQ = Math.max(...qTable[state]);
|
1384 |
+
// If all values are 0, take a random action instead
|
1385 |
+
if (maxQ === 0 && qTable[state].every((val) => val === 0)) {
|
1386 |
+
return Math.floor(Math.random() * numActions);
|
1387 |
+
}
|
1388 |
+
return qTable[state].indexOf(maxQ);
|
1389 |
}
|
1390 |
|
1391 |
// Exploration (random action)
|