Spaces:
Running
Running
Andy Lee
commited on
Commit
·
87e2629
1
Parent(s):
b79fff8
fix: stealth
Browse files- benchmark.py +20 -14
- mapcrunch_controller.py +18 -14
benchmark.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# benchmark.py (Final
|
| 2 |
|
| 3 |
import os
|
| 4 |
import json
|
|
@@ -18,6 +18,7 @@ class MapGuesserBenchmark:
|
|
| 18 |
self.golden_labels = self.load_golden_labels()
|
| 19 |
print(f"📊 Loaded {len(self.golden_labels)} golden label samples")
|
| 20 |
|
|
|
|
| 21 |
def load_golden_labels(self) -> List[Dict]:
|
| 22 |
try:
|
| 23 |
with open(DATA_PATHS["golden_labels"], "r") as f:
|
|
@@ -47,29 +48,27 @@ class MapGuesserBenchmark:
|
|
| 47 |
def calculate_distance(
|
| 48 |
self, true_coords: Dict, predicted_coords: Optional[Tuple[float, float]]
|
| 49 |
) -> Optional[float]:
|
| 50 |
-
"""Calculates distance between true (lat,lon) and predicted (lat,lon)."""
|
| 51 |
if not predicted_coords:
|
| 52 |
return None
|
| 53 |
try:
|
| 54 |
true_lat, true_lng = true_coords["lat"], true_coords["lng"]
|
| 55 |
pred_lat, pred_lng = predicted_coords
|
| 56 |
-
|
| 57 |
R = 6371
|
| 58 |
lat1, lon1, lat2, lon2 = map(
|
| 59 |
math.radians, [true_lat, true_lng, pred_lat, pred_lng]
|
| 60 |
)
|
| 61 |
-
dlat = lat2 - lat1
|
| 62 |
-
dlon = lon2 - lon1
|
| 63 |
a = (
|
| 64 |
-
math.sin(
|
| 65 |
-
+ math.cos(lat1)
|
|
|
|
|
|
|
| 66 |
)
|
| 67 |
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
|
| 68 |
return R * c
|
| 69 |
-
except
|
| 70 |
-
print(f"Error in distance calculation: {e}")
|
| 71 |
return None
|
| 72 |
|
|
|
|
| 73 |
def run_benchmark(
|
| 74 |
self,
|
| 75 |
models: Optional[List[str]] = None,
|
|
@@ -114,7 +113,6 @@ class MapGuesserBenchmark:
|
|
| 114 |
print(f" {status} (Distance: {dist_str})")
|
| 115 |
|
| 116 |
except KeyboardInterrupt:
|
| 117 |
-
print("\n⏹️ Benchmark inner loop interrupted.")
|
| 118 |
raise
|
| 119 |
except Exception as e:
|
| 120 |
print(f" ❌ Test failed with unhandled exception: {e}")
|
|
@@ -128,16 +126,20 @@ class MapGuesserBenchmark:
|
|
| 128 |
)
|
| 129 |
|
| 130 |
except KeyboardInterrupt:
|
| 131 |
-
print("\n⏹️ Benchmark outer loop interrupted.")
|
| 132 |
break
|
| 133 |
|
| 134 |
self.save_results(all_results)
|
| 135 |
return self.generate_summary(all_results)
|
| 136 |
|
|
|
|
| 137 |
def run_single_test_with_bot(self, bot: GeoBot, location_data: Dict) -> Dict:
|
|
|
|
| 138 |
start_time = time.time()
|
| 139 |
|
| 140 |
assert bot.controller is not None
|
|
|
|
|
|
|
| 141 |
if not bot.controller.load_location_from_data(location_data):
|
| 142 |
return {
|
| 143 |
"success": False,
|
|
@@ -146,6 +148,10 @@ class MapGuesserBenchmark:
|
|
| 146 |
"sample_id": location_data["id"],
|
| 147 |
}
|
| 148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
screenshot = bot.take_screenshot()
|
| 150 |
if not screenshot:
|
| 151 |
return {
|
|
@@ -155,7 +161,9 @@ class MapGuesserBenchmark:
|
|
| 155 |
"sample_id": location_data["id"],
|
| 156 |
}
|
| 157 |
|
|
|
|
| 158 |
predicted_lat_lon = bot.analyze_image(screenshot)
|
|
|
|
| 159 |
inference_time = time.time() - start_time
|
| 160 |
|
| 161 |
true_coords = location_data["coordinates"]
|
|
@@ -173,6 +181,7 @@ class MapGuesserBenchmark:
|
|
| 173 |
"success": is_success,
|
| 174 |
}
|
| 175 |
|
|
|
|
| 176 |
def save_results(self, results: List[Dict]):
|
| 177 |
if not results:
|
| 178 |
return
|
|
@@ -199,7 +208,6 @@ class MapGuesserBenchmark:
|
|
| 199 |
if model not in by_model:
|
| 200 |
by_model[model] = []
|
| 201 |
by_model[model].append(r)
|
| 202 |
-
|
| 203 |
for model, model_results in by_model.items():
|
| 204 |
successful_runs = [r for r in model_results if r.get("success")]
|
| 205 |
distances = [
|
|
@@ -207,10 +215,8 @@ class MapGuesserBenchmark:
|
|
| 207 |
for r in model_results
|
| 208 |
if r.get("distance_km") is not None
|
| 209 |
]
|
| 210 |
-
|
| 211 |
if not model_results:
|
| 212 |
continue
|
| 213 |
-
|
| 214 |
summary[model] = {
|
| 215 |
"success_rate": len(successful_runs) / len(model_results)
|
| 216 |
if model_results
|
|
|
|
| 1 |
+
# benchmark.py (Final Corrected Logic)
|
| 2 |
|
| 3 |
import os
|
| 4 |
import json
|
|
|
|
| 18 |
self.golden_labels = self.load_golden_labels()
|
| 19 |
print(f"📊 Loaded {len(self.golden_labels)} golden label samples")
|
| 20 |
|
| 21 |
+
# ... load_golden_labels, get_model_class, calculate_distance 函数保持不变 ...
|
| 22 |
def load_golden_labels(self) -> List[Dict]:
|
| 23 |
try:
|
| 24 |
with open(DATA_PATHS["golden_labels"], "r") as f:
|
|
|
|
| 48 |
def calculate_distance(
|
| 49 |
self, true_coords: Dict, predicted_coords: Optional[Tuple[float, float]]
|
| 50 |
) -> Optional[float]:
|
|
|
|
| 51 |
if not predicted_coords:
|
| 52 |
return None
|
| 53 |
try:
|
| 54 |
true_lat, true_lng = true_coords["lat"], true_coords["lng"]
|
| 55 |
pred_lat, pred_lng = predicted_coords
|
|
|
|
| 56 |
R = 6371
|
| 57 |
lat1, lon1, lat2, lon2 = map(
|
| 58 |
math.radians, [true_lat, true_lng, pred_lat, pred_lng]
|
| 59 |
)
|
|
|
|
|
|
|
| 60 |
a = (
|
| 61 |
+
math.sin((lat2 - lat1) / 2) ** 2
|
| 62 |
+
+ math.cos(lat1)
|
| 63 |
+
* math.cos(lat2)
|
| 64 |
+
* math.sin((dlon := lon2 - lon1) / 2) ** 2
|
| 65 |
)
|
| 66 |
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
|
| 67 |
return R * c
|
| 68 |
+
except Exception:
|
|
|
|
| 69 |
return None
|
| 70 |
|
| 71 |
+
# **run_benchmark 保持不变,它只负责管理循环和浏览器生命周期**
|
| 72 |
def run_benchmark(
|
| 73 |
self,
|
| 74 |
models: Optional[List[str]] = None,
|
|
|
|
| 113 |
print(f" {status} (Distance: {dist_str})")
|
| 114 |
|
| 115 |
except KeyboardInterrupt:
|
|
|
|
| 116 |
raise
|
| 117 |
except Exception as e:
|
| 118 |
print(f" ❌ Test failed with unhandled exception: {e}")
|
|
|
|
| 126 |
)
|
| 127 |
|
| 128 |
except KeyboardInterrupt:
|
| 129 |
+
print("\n⏹️ Benchmark outer loop interrupted by user.")
|
| 130 |
break
|
| 131 |
|
| 132 |
self.save_results(all_results)
|
| 133 |
return self.generate_summary(all_results)
|
| 134 |
|
| 135 |
+
# **修改**: run_single_test_with_bot 的内部逻辑顺序
|
| 136 |
def run_single_test_with_bot(self, bot: GeoBot, location_data: Dict) -> Dict:
|
| 137 |
+
"""Runs a test using an existing GeoBot instance with the correct logic order."""
|
| 138 |
start_time = time.time()
|
| 139 |
|
| 140 |
assert bot.controller is not None
|
| 141 |
+
|
| 142 |
+
# 步骤 1: 加载新地点 (这会刷新页面)
|
| 143 |
if not bot.controller.load_location_from_data(location_data):
|
| 144 |
return {
|
| 145 |
"success": False,
|
|
|
|
| 148 |
"sample_id": location_data["id"],
|
| 149 |
}
|
| 150 |
|
| 151 |
+
# 步骤 2: **关键修复**: 在新页面加载完成后,重新设置干净的“隐身”环境
|
| 152 |
+
bot.controller.setup_clean_environment()
|
| 153 |
+
|
| 154 |
+
# 步骤 3: 现在,对这个干净的页面进行截图
|
| 155 |
screenshot = bot.take_screenshot()
|
| 156 |
if not screenshot:
|
| 157 |
return {
|
|
|
|
| 161 |
"sample_id": location_data["id"],
|
| 162 |
}
|
| 163 |
|
| 164 |
+
# 步骤 4: AI 分析
|
| 165 |
predicted_lat_lon = bot.analyze_image(screenshot)
|
| 166 |
+
|
| 167 |
inference_time = time.time() - start_time
|
| 168 |
|
| 169 |
true_coords = location_data["coordinates"]
|
|
|
|
| 181 |
"success": is_success,
|
| 182 |
}
|
| 183 |
|
| 184 |
+
# ... save_results 和 generate_summary 函数保持不变 ...
|
| 185 |
def save_results(self, results: List[Dict]):
|
| 186 |
if not results:
|
| 187 |
return
|
|
|
|
| 208 |
if model not in by_model:
|
| 209 |
by_model[model] = []
|
| 210 |
by_model[model].append(r)
|
|
|
|
| 211 |
for model, model_results in by_model.items():
|
| 212 |
successful_runs = [r for r in model_results if r.get("success")]
|
| 213 |
distances = [
|
|
|
|
| 215 |
for r in model_results
|
| 216 |
if r.get("distance_km") is not None
|
| 217 |
]
|
|
|
|
| 218 |
if not model_results:
|
| 219 |
continue
|
|
|
|
| 220 |
summary[model] = {
|
| 221 |
"success_rate": len(successful_runs) / len(model_results)
|
| 222 |
if model_results
|
mapcrunch_controller.py
CHANGED
|
@@ -53,23 +53,27 @@ class MapCrunchController:
|
|
| 53 |
"""
|
| 54 |
Forcefully enables stealth mode and hides UI elements for a clean benchmark environment.
|
| 55 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
try:
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
)
|
|
|
|
|
|
|
|
|
|
| 63 |
)
|
| 64 |
-
if not stealth_checkbox.is_selected():
|
| 65 |
-
# 使用JS点击更可靠,可以避免元素被遮挡的问题
|
| 66 |
-
self.driver.execute_script("arguments[0].click();", stealth_checkbox)
|
| 67 |
-
print("✅ Stealth mode programmatically enabled for benchmark.")
|
| 68 |
|
| 69 |
-
# 2.
|
| 70 |
-
# 这一步确保截图区域干净
|
| 71 |
self.driver.execute_script("""
|
| 72 |
-
const elementsToHide = ['#menu', '#
|
| 73 |
elementsToHide.forEach(sel => {
|
| 74 |
const el = document.querySelector(sel);
|
| 75 |
if (el) el.style.display = 'none';
|
|
@@ -77,7 +81,7 @@ class MapCrunchController:
|
|
| 77 |
const panoBox = document.querySelector('#pano-box');
|
| 78 |
if (panoBox) panoBox.style.height = '100vh';
|
| 79 |
""")
|
| 80 |
-
print("✅
|
| 81 |
|
| 82 |
except Exception as e:
|
| 83 |
print(f"⚠️ Warning: Could not fully configure clean environment: {e}")
|
|
|
|
| 53 |
"""
|
| 54 |
Forcefully enables stealth mode and hides UI elements for a clean benchmark environment.
|
| 55 |
"""
|
| 56 |
+
|
| 57 |
+
def setup_clean_environment(self):
|
| 58 |
+
"""
|
| 59 |
+
Forcefully enables FULL stealth mode by directly calling the site's own
|
| 60 |
+
JavaScript functions, ensuring a clean benchmark environment.
|
| 61 |
+
"""
|
| 62 |
try:
|
| 63 |
+
assert self.driver is not None
|
| 64 |
+
|
| 65 |
+
# 1. 直接调用网站自己的 hideLoc() 函数,这是最核心和最可靠的方法
|
| 66 |
+
# 它会隐藏地址栏和图像内的街道标签
|
| 67 |
+
self.driver.execute_script(
|
| 68 |
+
"if(typeof hideLoc === 'function') { hideLoc(); }"
|
| 69 |
+
)
|
| 70 |
+
print(
|
| 71 |
+
"✅ Stealth mode (in-image labels hidden) forced via JS function call."
|
| 72 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
+
# 2. 额外隐藏其他我们不想要的UI元素,确保截图绝对干净
|
|
|
|
| 75 |
self.driver.execute_script("""
|
| 76 |
+
const elementsToHide = ['#menu', '#social', '#bottom-box', '#topbar'];
|
| 77 |
elementsToHide.forEach(sel => {
|
| 78 |
const el = document.querySelector(sel);
|
| 79 |
if (el) el.style.display = 'none';
|
|
|
|
| 81 |
const panoBox = document.querySelector('#pano-box');
|
| 82 |
if (panoBox) panoBox.style.height = '100vh';
|
| 83 |
""")
|
| 84 |
+
print("✅ UI elements hidden for clean screenshot.")
|
| 85 |
|
| 86 |
except Exception as e:
|
| 87 |
print(f"⚠️ Warning: Could not fully configure clean environment: {e}")
|