Spaces:
Running
on
Zero
Running
on
Zero
Upload 14 files
Browse files- content_generator.py +569 -0
- functional_zone_detector.py +298 -0
- object_description_generator.py +76 -1191
- object_group_processor.py +397 -0
- pattern_analyzer.py +371 -0
- prominence_calculator.py +147 -0
- scene_zone_identifier.py +35 -1121
- spatial_location_handler.py +346 -0
- specialized_scene_processor.py +527 -0
- statistics_processor.py +343 -0
- template_manager.py +0 -0
- template_processor.py +429 -0
- template_repository.py +834 -0
- text_optimizer.py +616 -0
content_generator.py
ADDED
|
@@ -0,0 +1,569 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import random
|
| 3 |
+
import re
|
| 4 |
+
from typing import Dict, List, Optional, Union, Any
|
| 5 |
+
|
| 6 |
+
class ContentGenerator:
|
| 7 |
+
"""
|
| 8 |
+
內容生成器 - 負責基礎內容生成和佔位符替換邏輯
|
| 9 |
+
|
| 10 |
+
此類別專門處理模板中的動態內容生成,包括物件摘要、
|
| 11 |
+
場景特定內容生成,以及提供默認的替換字典。
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
def __init__(self):
|
| 15 |
+
"""初始化內容生成器"""
|
| 16 |
+
self.logger = logging.getLogger(self.__class__.__name__)
|
| 17 |
+
|
| 18 |
+
# 預載入默認替換內容
|
| 19 |
+
self.default_replacements = self._generate_default_replacements()
|
| 20 |
+
|
| 21 |
+
self.logger.debug("ContentGenerator initialized successfully")
|
| 22 |
+
|
| 23 |
+
def _generate_default_replacements(self) -> Dict[str, str]:
|
| 24 |
+
"""
|
| 25 |
+
生成默認的模板替換內容
|
| 26 |
+
|
| 27 |
+
Returns:
|
| 28 |
+
Dict[str, str]: 默認替換內容字典
|
| 29 |
+
"""
|
| 30 |
+
return {
|
| 31 |
+
# 場景介紹相關
|
| 32 |
+
"scene_introduction": "this scene",
|
| 33 |
+
"location_prefix": "this location",
|
| 34 |
+
"setting_description": "this setting",
|
| 35 |
+
"area_description": "this area",
|
| 36 |
+
"environment_description": "this environment",
|
| 37 |
+
"spatial_introduction": "this space",
|
| 38 |
+
|
| 39 |
+
# 室內相關
|
| 40 |
+
"furniture": "various furniture pieces",
|
| 41 |
+
"seating": "comfortable seating",
|
| 42 |
+
"electronics": "entertainment devices",
|
| 43 |
+
"bed_type": "a bed",
|
| 44 |
+
"bed_location": "room",
|
| 45 |
+
"bed_description": "sleeping arrangements",
|
| 46 |
+
"extras": "personal items",
|
| 47 |
+
"table_setup": "a dining table and chairs",
|
| 48 |
+
"table_description": "a dining surface",
|
| 49 |
+
"dining_items": "dining furniture and tableware",
|
| 50 |
+
"appliances": "kitchen appliances",
|
| 51 |
+
"kitchen_items": "cooking utensils and dishware",
|
| 52 |
+
"cooking_equipment": "cooking equipment",
|
| 53 |
+
"office_equipment": "work-related furniture and devices",
|
| 54 |
+
"desk_setup": "a desk and chair",
|
| 55 |
+
"computer_equipment": "electronic devices",
|
| 56 |
+
|
| 57 |
+
# 室外/城市相關
|
| 58 |
+
"traffic_description": "vehicles and pedestrians",
|
| 59 |
+
"people_and_vehicles": "people and various vehicles",
|
| 60 |
+
"street_elements": "urban infrastructure",
|
| 61 |
+
"park_features": "benches and greenery",
|
| 62 |
+
"outdoor_elements": "natural features",
|
| 63 |
+
"park_description": "outdoor amenities",
|
| 64 |
+
"store_elements": "merchandise displays",
|
| 65 |
+
"shopping_activity": "customers browse and shop",
|
| 66 |
+
"store_items": "products for sale",
|
| 67 |
+
|
| 68 |
+
# 高級餐廳相關
|
| 69 |
+
"design_elements": "elegant decor",
|
| 70 |
+
"lighting": "stylish lighting fixtures",
|
| 71 |
+
|
| 72 |
+
# 亞洲商業街相關
|
| 73 |
+
"storefront_features": "compact shops",
|
| 74 |
+
"pedestrian_flow": "people walking",
|
| 75 |
+
"asian_elements": "distinctive cultural elements",
|
| 76 |
+
"cultural_elements": "traditional design features",
|
| 77 |
+
"signage": "colorful signs",
|
| 78 |
+
"street_activities": "busy urban activity",
|
| 79 |
+
|
| 80 |
+
# 金融區相關
|
| 81 |
+
"buildings": "tall buildings",
|
| 82 |
+
"traffic_elements": "vehicles",
|
| 83 |
+
"skyscrapers": "high-rise buildings",
|
| 84 |
+
"road_features": "wide streets",
|
| 85 |
+
"architectural_elements": "modern architecture",
|
| 86 |
+
"city_landmarks": "prominent structures",
|
| 87 |
+
|
| 88 |
+
# 十字路口相關
|
| 89 |
+
"crossing_pattern": "clearly marked pedestrian crossings",
|
| 90 |
+
"pedestrian_behavior": "careful pedestrian movement",
|
| 91 |
+
"pedestrian_density": "multiple groups of pedestrians",
|
| 92 |
+
"traffic_pattern": "well-regulated traffic flow",
|
| 93 |
+
"pedestrian_flow": "steady pedestrian movement",
|
| 94 |
+
"traffic_description": "active urban traffic",
|
| 95 |
+
"people_and_vehicles": "pedestrians and vehicles",
|
| 96 |
+
"street_elements": "urban infrastructure elements",
|
| 97 |
+
|
| 98 |
+
# 交通相關
|
| 99 |
+
"transit_vehicles": "public transportation vehicles",
|
| 100 |
+
"passenger_activity": "commuter movement",
|
| 101 |
+
"transportation_modes": "various transit options",
|
| 102 |
+
"passenger_needs": "waiting areas",
|
| 103 |
+
"transit_infrastructure": "transit facilities",
|
| 104 |
+
"passenger_movement": "commuter flow",
|
| 105 |
+
|
| 106 |
+
# 購物區相關
|
| 107 |
+
"retail_elements": "shops and displays",
|
| 108 |
+
"store_types": "various retail establishments",
|
| 109 |
+
"walkway_features": "pedestrian pathways",
|
| 110 |
+
"commercial_signage": "store signs",
|
| 111 |
+
"consumer_behavior": "shopping activities",
|
| 112 |
+
|
| 113 |
+
# 空中視角相關
|
| 114 |
+
"commercial_layout": "organized retail areas",
|
| 115 |
+
"pedestrian_pattern": "people movement patterns",
|
| 116 |
+
"gathering_features": "public gathering spaces",
|
| 117 |
+
"movement_pattern": "crowd flow patterns",
|
| 118 |
+
"urban_elements": "city infrastructure",
|
| 119 |
+
"public_activity": "social interaction",
|
| 120 |
+
|
| 121 |
+
# 文化特定元素
|
| 122 |
+
"stall_elements": "vendor booths",
|
| 123 |
+
"lighting_features": "decorative lights",
|
| 124 |
+
"food_elements": "food offerings",
|
| 125 |
+
"vendor_stalls": "market stalls",
|
| 126 |
+
"nighttime_activity": "evening commerce",
|
| 127 |
+
"cultural_lighting": "traditional lighting",
|
| 128 |
+
"night_market_sounds": "lively market sounds",
|
| 129 |
+
"evening_crowd_behavior": "nighttime social activity",
|
| 130 |
+
"architectural_elements": "cultural buildings",
|
| 131 |
+
"religious_structures": "sacred buildings",
|
| 132 |
+
"decorative_features": "ornamental designs",
|
| 133 |
+
"cultural_practices": "traditional activities",
|
| 134 |
+
"temple_architecture": "religious structures",
|
| 135 |
+
"sensory_elements": "atmospheric elements",
|
| 136 |
+
"visitor_activities": "cultural experiences",
|
| 137 |
+
"ritual_activities": "ceremonial practices",
|
| 138 |
+
"cultural_symbols": "meaningful symbols",
|
| 139 |
+
"architectural_style": "historical buildings",
|
| 140 |
+
"historic_elements": "traditional architecture",
|
| 141 |
+
"urban_design": "city planning elements",
|
| 142 |
+
"social_behaviors": "public interactions",
|
| 143 |
+
"european_features": "European architectural details",
|
| 144 |
+
"tourist_activities": "visitor activities",
|
| 145 |
+
"local_customs": "regional practices",
|
| 146 |
+
|
| 147 |
+
# 時間特定元素
|
| 148 |
+
"lighting_effects": "artificial lighting",
|
| 149 |
+
"shadow_patterns": "light and shadow",
|
| 150 |
+
"urban_features": "city elements",
|
| 151 |
+
"illuminated_elements": "lit structures",
|
| 152 |
+
"evening_activities": "nighttime activities",
|
| 153 |
+
"light_sources": "lighting points",
|
| 154 |
+
"lit_areas": "illuminated spaces",
|
| 155 |
+
"shadowed_zones": "darker areas",
|
| 156 |
+
"illuminated_signage": "bright signs",
|
| 157 |
+
"colorful_lighting": "multicolored lights",
|
| 158 |
+
"neon_elements": "neon signs",
|
| 159 |
+
"night_crowd_behavior": "evening social patterns",
|
| 160 |
+
"light_displays": "lighting installations",
|
| 161 |
+
"building_features": "architectural elements",
|
| 162 |
+
"nightlife_activities": "evening entertainment",
|
| 163 |
+
"lighting_modifier": "bright",
|
| 164 |
+
|
| 165 |
+
# 混合環境元素
|
| 166 |
+
"transitional_elements": "connecting features",
|
| 167 |
+
"indoor_features": "interior elements",
|
| 168 |
+
"outdoor_setting": "exterior spaces",
|
| 169 |
+
"interior_amenities": "inside comforts",
|
| 170 |
+
"exterior_features": "outside elements",
|
| 171 |
+
"inside_elements": "interior design",
|
| 172 |
+
"outside_spaces": "outdoor areas",
|
| 173 |
+
"dual_environment_benefits": "combined settings",
|
| 174 |
+
"passenger_activities": "waiting behaviors",
|
| 175 |
+
"transportation_types": "transit vehicles",
|
| 176 |
+
"sheltered_elements": "covered areas",
|
| 177 |
+
"exposed_areas": "open sections",
|
| 178 |
+
"waiting_behaviors": "passenger activities",
|
| 179 |
+
"indoor_facilities": "inside services",
|
| 180 |
+
"platform_features": "transit platform elements",
|
| 181 |
+
"transit_routines": "transportation procedures",
|
| 182 |
+
|
| 183 |
+
# 專門場所元素
|
| 184 |
+
"seating_arrangement": "spectator seating",
|
| 185 |
+
"playing_surface": "athletic field",
|
| 186 |
+
"sporting_activities": "sports events",
|
| 187 |
+
"spectator_facilities": "viewer accommodations",
|
| 188 |
+
"competition_space": "sports arena",
|
| 189 |
+
"sports_events": "athletic competitions",
|
| 190 |
+
"viewing_areas": "audience sections",
|
| 191 |
+
"field_elements": "field markings and equipment",
|
| 192 |
+
"game_activities": "competitive play",
|
| 193 |
+
"construction_equipment": "building machinery",
|
| 194 |
+
"building_materials": "construction supplies",
|
| 195 |
+
"construction_activities": "building work",
|
| 196 |
+
"work_elements": "construction tools",
|
| 197 |
+
"structural_components": "building structures",
|
| 198 |
+
"site_equipment": "construction gear",
|
| 199 |
+
"raw_materials": "building supplies",
|
| 200 |
+
"construction_process": "building phases",
|
| 201 |
+
"medical_elements": "healthcare equipment",
|
| 202 |
+
"clinical_activities": "medical procedures",
|
| 203 |
+
"facility_design": "healthcare layout",
|
| 204 |
+
"healthcare_features": "medical facilities",
|
| 205 |
+
"patient_interactions": "care activities",
|
| 206 |
+
"equipment_types": "medical devices",
|
| 207 |
+
"care_procedures": "health services",
|
| 208 |
+
"treatment_spaces": "clinical areas",
|
| 209 |
+
"educational_furniture": "learning furniture",
|
| 210 |
+
"learning_activities": "educational practices",
|
| 211 |
+
"instructional_design": "teaching layout",
|
| 212 |
+
"classroom_elements": "school equipment",
|
| 213 |
+
"teaching_methods": "educational approaches",
|
| 214 |
+
"student_engagement": "learning participation",
|
| 215 |
+
"learning_spaces": "educational areas",
|
| 216 |
+
"educational_tools": "teaching resources",
|
| 217 |
+
"knowledge_transfer": "learning exchanges"
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
def generate_objects_summary(self, detected_objects: List[Dict]) -> str:
|
| 221 |
+
"""
|
| 222 |
+
基於檢測物件生成自然語言摘要,按重要性排序
|
| 223 |
+
|
| 224 |
+
Args:
|
| 225 |
+
detected_objects: 檢測到的物件列表
|
| 226 |
+
|
| 227 |
+
Returns:
|
| 228 |
+
str: 物件摘要描述
|
| 229 |
+
"""
|
| 230 |
+
try:
|
| 231 |
+
# detected_objects 裡有幾個 traffic light)
|
| 232 |
+
tl_count = len([obj for obj in detected_objects if obj.get("class_name","") == "traffic light"])
|
| 233 |
+
# print(f"[DEBUG] _generate_objects_summary 傳入的 detected_objects 中 traffic light: {tl_count} 個")
|
| 234 |
+
for obj in detected_objects:
|
| 235 |
+
if obj.get("class_name","") == "traffic light":
|
| 236 |
+
print(f" - conf={obj.get('confidence',0):.4f}, bbox={obj.get('bbox')}, region={obj.get('region')}")
|
| 237 |
+
|
| 238 |
+
if not detected_objects:
|
| 239 |
+
return "various elements"
|
| 240 |
+
|
| 241 |
+
# 計算物件統計
|
| 242 |
+
object_counts = {}
|
| 243 |
+
total_confidence = 0
|
| 244 |
+
|
| 245 |
+
for obj in detected_objects:
|
| 246 |
+
class_name = obj.get("class_name", "unknown")
|
| 247 |
+
confidence = obj.get("confidence", 0.5)
|
| 248 |
+
|
| 249 |
+
if class_name not in object_counts:
|
| 250 |
+
object_counts[class_name] = {"count": 0, "total_confidence": 0}
|
| 251 |
+
|
| 252 |
+
object_counts[class_name]["count"] += 1
|
| 253 |
+
object_counts[class_name]["total_confidence"] += confidence
|
| 254 |
+
total_confidence += confidence
|
| 255 |
+
|
| 256 |
+
# 計算平均置信度並排序
|
| 257 |
+
sorted_objects = []
|
| 258 |
+
for class_name, stats in object_counts.items():
|
| 259 |
+
avg_confidence = stats["total_confidence"] / stats["count"]
|
| 260 |
+
count = stats["count"]
|
| 261 |
+
|
| 262 |
+
# 重要性評分:結合數量和置信度
|
| 263 |
+
importance_score = (count * 0.6) + (avg_confidence * 0.4)
|
| 264 |
+
sorted_objects.append((class_name, count, importance_score))
|
| 265 |
+
|
| 266 |
+
# 按重要性排序,取前5個最重要的物件
|
| 267 |
+
sorted_objects.sort(key=lambda x: x[2], reverse=True)
|
| 268 |
+
top_objects = sorted_objects[:5]
|
| 269 |
+
|
| 270 |
+
# 生成自然語言描述
|
| 271 |
+
descriptions = []
|
| 272 |
+
for class_name, count, _ in top_objects:
|
| 273 |
+
clean_name = class_name.replace('_', ' ')
|
| 274 |
+
if count == 1:
|
| 275 |
+
article = "an" if clean_name[0].lower() in 'aeiou' else "a"
|
| 276 |
+
descriptions.append(f"{article} {clean_name}")
|
| 277 |
+
else:
|
| 278 |
+
descriptions.append(f"{count} {clean_name}s")
|
| 279 |
+
|
| 280 |
+
# 組合描述
|
| 281 |
+
if len(descriptions) == 1:
|
| 282 |
+
return descriptions[0]
|
| 283 |
+
elif len(descriptions) == 2:
|
| 284 |
+
return f"{descriptions[0]} and {descriptions[1]}"
|
| 285 |
+
else:
|
| 286 |
+
return ", ".join(descriptions[:-1]) + f", and {descriptions[-1]}"
|
| 287 |
+
|
| 288 |
+
except Exception as e:
|
| 289 |
+
self.logger.warning(f"Error generating objects summary: {str(e)}")
|
| 290 |
+
return "various elements"
|
| 291 |
+
|
| 292 |
+
def get_placeholder_replacement(self, placeholder: str, fillers: Dict,
|
| 293 |
+
all_replacements: Dict, detected_objects: List[Dict],
|
| 294 |
+
scene_type: str) -> str:
|
| 295 |
+
"""
|
| 296 |
+
獲取特定佔位符的替換內容,確保永遠不返回空值
|
| 297 |
+
|
| 298 |
+
Args:
|
| 299 |
+
placeholder: 佔位符名稱
|
| 300 |
+
fillers: 模板填充器字典
|
| 301 |
+
all_replacements: 所有替換內容字典
|
| 302 |
+
detected_objects: 檢測到的物體列表
|
| 303 |
+
scene_type: 場景類型
|
| 304 |
+
|
| 305 |
+
Returns:
|
| 306 |
+
str: 替換內容
|
| 307 |
+
"""
|
| 308 |
+
try:
|
| 309 |
+
# 優先處理動態內容生成的佔位符
|
| 310 |
+
dynamic_placeholders = [
|
| 311 |
+
'primary_objects', 'detected_objects_summary', 'main_objects',
|
| 312 |
+
'functional_area', 'functional_zones_description', 'scene_elements'
|
| 313 |
+
]
|
| 314 |
+
|
| 315 |
+
if placeholder in dynamic_placeholders:
|
| 316 |
+
dynamic_content = self.generate_objects_summary(detected_objects)
|
| 317 |
+
if dynamic_content and dynamic_content.strip():
|
| 318 |
+
return dynamic_content.strip()
|
| 319 |
+
|
| 320 |
+
# 檢查預定義替換內容
|
| 321 |
+
if placeholder in all_replacements:
|
| 322 |
+
replacement = all_replacements[placeholder]
|
| 323 |
+
if replacement and replacement.strip():
|
| 324 |
+
return replacement.strip()
|
| 325 |
+
|
| 326 |
+
# 檢查物體模板填充器
|
| 327 |
+
if placeholder in fillers:
|
| 328 |
+
options = fillers[placeholder]
|
| 329 |
+
if options and isinstance(options, list):
|
| 330 |
+
valid_options = [opt.strip() for opt in options if opt and str(opt).strip()]
|
| 331 |
+
if valid_options:
|
| 332 |
+
num_items = min(len(valid_options), random.randint(1, 3))
|
| 333 |
+
selected_items = random.sample(valid_options, num_items)
|
| 334 |
+
|
| 335 |
+
if len(selected_items) == 1:
|
| 336 |
+
return selected_items[0]
|
| 337 |
+
elif len(selected_items) == 2:
|
| 338 |
+
return f"{selected_items[0]} and {selected_items[1]}"
|
| 339 |
+
else:
|
| 340 |
+
return ", ".join(selected_items[:-1]) + f", and {selected_items[-1]}"
|
| 341 |
+
|
| 342 |
+
# 基於檢測對象生成動態內容
|
| 343 |
+
scene_specific_replacement = self.generate_scene_specific_content(
|
| 344 |
+
placeholder, detected_objects, scene_type
|
| 345 |
+
)
|
| 346 |
+
if scene_specific_replacement and scene_specific_replacement.strip():
|
| 347 |
+
return scene_specific_replacement.strip()
|
| 348 |
+
|
| 349 |
+
# 通用備用字典
|
| 350 |
+
fallback_replacements = {
|
| 351 |
+
# 交通和城市相關
|
| 352 |
+
"crossing_pattern": "pedestrian crosswalks",
|
| 353 |
+
"pedestrian_behavior": "people moving carefully",
|
| 354 |
+
"traffic_pattern": "vehicle movement",
|
| 355 |
+
"urban_elements": "city infrastructure",
|
| 356 |
+
"street_elements": "urban features",
|
| 357 |
+
"intersection_features": "traffic management systems",
|
| 358 |
+
"pedestrian_density": "groups of people",
|
| 359 |
+
"pedestrian_flow": "pedestrian movement",
|
| 360 |
+
"traffic_description": "vehicle traffic",
|
| 361 |
+
"people_and_vehicles": "pedestrians and cars",
|
| 362 |
+
|
| 363 |
+
# 場景設置相關
|
| 364 |
+
"scene_setting": "this urban environment",
|
| 365 |
+
"location_context": "the area",
|
| 366 |
+
"spatial_context": "the scene",
|
| 367 |
+
"environmental_context": "this location",
|
| 368 |
+
|
| 369 |
+
# 常見的家具和設備
|
| 370 |
+
"furniture": "various furniture pieces",
|
| 371 |
+
"seating": "seating arrangements",
|
| 372 |
+
"electronics": "electronic devices",
|
| 373 |
+
"appliances": "household appliances",
|
| 374 |
+
|
| 375 |
+
# 活動和行為
|
| 376 |
+
"activities": "various activities",
|
| 377 |
+
"interactions": "people interacting",
|
| 378 |
+
"movement": "movement patterns",
|
| 379 |
+
|
| 380 |
+
# 照明和氛圍
|
| 381 |
+
"lighting_conditions": "ambient lighting",
|
| 382 |
+
"atmosphere": "the overall atmosphere",
|
| 383 |
+
"ambiance": "environmental ambiance",
|
| 384 |
+
|
| 385 |
+
# 空間描述
|
| 386 |
+
"spatial_arrangement": "spatial organization",
|
| 387 |
+
"layout": "the layout",
|
| 388 |
+
"composition": "visual composition",
|
| 389 |
+
|
| 390 |
+
# 物體和元素
|
| 391 |
+
"objects": "various objects",
|
| 392 |
+
"elements": "scene elements",
|
| 393 |
+
"features": "notable features",
|
| 394 |
+
"details": "observable details"
|
| 395 |
+
}
|
| 396 |
+
|
| 397 |
+
if placeholder in fallback_replacements:
|
| 398 |
+
return fallback_replacements[placeholder]
|
| 399 |
+
|
| 400 |
+
# 基於場景類型的智能默認值
|
| 401 |
+
scene_based_defaults = self.get_scene_based_default(placeholder, scene_type)
|
| 402 |
+
if scene_based_defaults:
|
| 403 |
+
return scene_based_defaults
|
| 404 |
+
|
| 405 |
+
# 最終備用:將下劃線轉換為有意義的短語
|
| 406 |
+
cleaned_placeholder = placeholder.replace('_', ' ')
|
| 407 |
+
|
| 408 |
+
# 對常見模式提供更好的默認值
|
| 409 |
+
if placeholder.endswith('_pattern'):
|
| 410 |
+
return f"{cleaned_placeholder.replace(' pattern', '')} arrangement"
|
| 411 |
+
elif placeholder.endswith('_behavior'):
|
| 412 |
+
return f"{cleaned_placeholder.replace(' behavior', '')} activity"
|
| 413 |
+
elif placeholder.endswith('_description'):
|
| 414 |
+
return f"{cleaned_placeholder.replace(' description', '')} elements"
|
| 415 |
+
elif placeholder.endswith('_elements'):
|
| 416 |
+
return cleaned_placeholder
|
| 417 |
+
elif placeholder.endswith('_features'):
|
| 418 |
+
return cleaned_placeholder
|
| 419 |
+
else:
|
| 420 |
+
return cleaned_placeholder if cleaned_placeholder != placeholder else "various elements"
|
| 421 |
+
|
| 422 |
+
except Exception as e:
|
| 423 |
+
self.logger.warning(f"Error getting replacement for placeholder '{placeholder}': {str(e)}")
|
| 424 |
+
# 確保即使在異常情況下也返回有意義的內容
|
| 425 |
+
return placeholder.replace('_', ' ') if placeholder else "scene elements"
|
| 426 |
+
|
| 427 |
+
def get_scene_based_default(self, placeholder: str, scene_type: str) -> Optional[str]:
|
| 428 |
+
"""
|
| 429 |
+
基於場景類型提供智能默認值
|
| 430 |
+
|
| 431 |
+
Args:
|
| 432 |
+
placeholder: 佔位符名稱
|
| 433 |
+
scene_type: 場景類型
|
| 434 |
+
|
| 435 |
+
Returns:
|
| 436 |
+
Optional[str]: 場景特定的默認值或None
|
| 437 |
+
"""
|
| 438 |
+
try:
|
| 439 |
+
# 針對不同場景類型的特定默認值
|
| 440 |
+
scene_defaults = {
|
| 441 |
+
"urban_intersection": {
|
| 442 |
+
"crossing_pattern": "marked crosswalks",
|
| 443 |
+
"pedestrian_behavior": "pedestrians crossing carefully",
|
| 444 |
+
"traffic_pattern": "controlled traffic flow"
|
| 445 |
+
},
|
| 446 |
+
"city_street": {
|
| 447 |
+
"traffic_description": "urban vehicle traffic",
|
| 448 |
+
"street_elements": "city infrastructure",
|
| 449 |
+
"people_and_vehicles": "pedestrians and vehicles"
|
| 450 |
+
},
|
| 451 |
+
"living_room": {
|
| 452 |
+
"furniture": "comfortable living room furniture",
|
| 453 |
+
"seating": "sofas and chairs",
|
| 454 |
+
"electronics": "entertainment equipment"
|
| 455 |
+
},
|
| 456 |
+
"kitchen": {
|
| 457 |
+
"appliances": "kitchen appliances",
|
| 458 |
+
"cooking_equipment": "cooking tools and equipment"
|
| 459 |
+
},
|
| 460 |
+
"office_workspace": {
|
| 461 |
+
"office_equipment": "work furniture and devices",
|
| 462 |
+
"desk_setup": "desk and office chair"
|
| 463 |
+
}
|
| 464 |
+
}
|
| 465 |
+
|
| 466 |
+
if scene_type in scene_defaults and placeholder in scene_defaults[scene_type]:
|
| 467 |
+
return scene_defaults[scene_type][placeholder]
|
| 468 |
+
|
| 469 |
+
return None
|
| 470 |
+
|
| 471 |
+
except Exception as e:
|
| 472 |
+
self.logger.warning(f"Error getting scene-based default for '{placeholder}' in '{scene_type}': {str(e)}")
|
| 473 |
+
return None
|
| 474 |
+
|
| 475 |
+
def generate_scene_specific_content(self, placeholder: str, detected_objects: List[Dict],
|
| 476 |
+
scene_type: str) -> Optional[str]:
|
| 477 |
+
"""
|
| 478 |
+
基於場景特定邏輯生成佔位符內容
|
| 479 |
+
|
| 480 |
+
Args:
|
| 481 |
+
placeholder: 佔位符名稱
|
| 482 |
+
detected_objects: 檢測到的物體列表
|
| 483 |
+
scene_type: 場景類型
|
| 484 |
+
|
| 485 |
+
Returns:
|
| 486 |
+
Optional[str]: 生成的內容或None
|
| 487 |
+
"""
|
| 488 |
+
try:
|
| 489 |
+
if placeholder == "furniture":
|
| 490 |
+
# 提取家具物品
|
| 491 |
+
furniture_ids = [56, 57, 58, 59, 60, 61] # 家具類別ID
|
| 492 |
+
furniture_objects = [obj for obj in detected_objects if obj.get("class_id") in furniture_ids]
|
| 493 |
+
|
| 494 |
+
if furniture_objects:
|
| 495 |
+
furniture_names = [obj.get("class_name", "furniture") for obj in furniture_objects[:3]]
|
| 496 |
+
unique_names = list(set(furniture_names))
|
| 497 |
+
return ", ".join(unique_names) if len(unique_names) > 1 else unique_names[0]
|
| 498 |
+
return "various furniture items"
|
| 499 |
+
|
| 500 |
+
elif placeholder == "electronics":
|
| 501 |
+
# 提取電子設備
|
| 502 |
+
electronics_ids = [62, 63, 64, 65, 66, 67, 68, 69, 70] # 電子設備類別ID
|
| 503 |
+
electronics_objects = [obj for obj in detected_objects if obj.get("class_id") in electronics_ids]
|
| 504 |
+
|
| 505 |
+
if electronics_objects:
|
| 506 |
+
electronics_names = [obj.get("class_name", "electronic device") for obj in electronics_objects[:3]]
|
| 507 |
+
unique_names = list(set(electronics_names))
|
| 508 |
+
return ", ".join(unique_names) if len(unique_names) > 1 else unique_names[0]
|
| 509 |
+
return "electronic devices"
|
| 510 |
+
|
| 511 |
+
elif placeholder == "people_count":
|
| 512 |
+
# 計算人數
|
| 513 |
+
people_count = len([obj for obj in detected_objects if obj.get("class_id") == 0])
|
| 514 |
+
|
| 515 |
+
if people_count == 0:
|
| 516 |
+
return "no people"
|
| 517 |
+
elif people_count == 1:
|
| 518 |
+
return "one person"
|
| 519 |
+
elif people_count < 5:
|
| 520 |
+
return f"{people_count} people"
|
| 521 |
+
else:
|
| 522 |
+
return "several people"
|
| 523 |
+
|
| 524 |
+
elif placeholder == "seating":
|
| 525 |
+
# 提取座位物品
|
| 526 |
+
seating_ids = [56, 57] # chair, sofa
|
| 527 |
+
seating_objects = [obj for obj in detected_objects if obj.get("class_id") in seating_ids]
|
| 528 |
+
|
| 529 |
+
if seating_objects:
|
| 530 |
+
seating_names = [obj.get("class_name", "seating") for obj in seating_objects[:2]]
|
| 531 |
+
unique_names = list(set(seating_names))
|
| 532 |
+
return ", ".join(unique_names) if len(unique_names) > 1 else unique_names[0]
|
| 533 |
+
return "seating arrangements"
|
| 534 |
+
|
| 535 |
+
# 如果沒有匹配的特定邏輯,返回None
|
| 536 |
+
return None
|
| 537 |
+
|
| 538 |
+
except Exception as e:
|
| 539 |
+
self.logger.warning(f"Error generating scene-specific content for '{placeholder}': {str(e)}")
|
| 540 |
+
return None
|
| 541 |
+
|
| 542 |
+
def get_emergency_replacement(self, placeholder: str) -> str:
|
| 543 |
+
"""
|
| 544 |
+
獲取緊急替換值,確保不會產生語法錯誤
|
| 545 |
+
|
| 546 |
+
Args:
|
| 547 |
+
placeholder: 佔位符名稱
|
| 548 |
+
|
| 549 |
+
Returns:
|
| 550 |
+
str: 安全的替換值
|
| 551 |
+
"""
|
| 552 |
+
emergency_replacements = {
|
| 553 |
+
"crossing_pattern": "pedestrian walkways",
|
| 554 |
+
"pedestrian_behavior": "people moving through the area",
|
| 555 |
+
"traffic_pattern": "vehicle movement",
|
| 556 |
+
"scene_setting": "this location",
|
| 557 |
+
"urban_elements": "city features",
|
| 558 |
+
"street_elements": "urban components"
|
| 559 |
+
}
|
| 560 |
+
|
| 561 |
+
if placeholder in emergency_replacements:
|
| 562 |
+
return emergency_replacements[placeholder]
|
| 563 |
+
|
| 564 |
+
# 基於佔位符名稱生成合理的替換
|
| 565 |
+
cleaned = placeholder.replace('_', ' ')
|
| 566 |
+
if len(cleaned.split()) > 1:
|
| 567 |
+
return cleaned
|
| 568 |
+
else:
|
| 569 |
+
return f"various {cleaned}"
|
functional_zone_detector.py
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import logging
|
| 3 |
+
import traceback
|
| 4 |
+
from typing import Dict, List, Any, Optional
|
| 5 |
+
|
| 6 |
+
logger = logging.getLogger(__name__)
|
| 7 |
+
|
| 8 |
+
class FunctionalZoneDetector:
|
| 9 |
+
"""
|
| 10 |
+
負責基於物件關聯性的功能區域識別
|
| 11 |
+
處理物件組合分析和描述性區域命名
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
def __init__(self):
|
| 15 |
+
"""初始化功能區域檢測器"""
|
| 16 |
+
try:
|
| 17 |
+
logger.info("FunctionalZoneDetector initialized successfully")
|
| 18 |
+
except Exception as e:
|
| 19 |
+
logger.error(f"Failed to initialize FunctionalZoneDetector: {str(e)}")
|
| 20 |
+
logger.error(traceback.format_exc())
|
| 21 |
+
raise
|
| 22 |
+
|
| 23 |
+
def identify_primary_functional_area(self, detected_objects: List[Dict]) -> Dict:
|
| 24 |
+
"""
|
| 25 |
+
識別主要功能區域,基於最強的物件關聯性組合
|
| 26 |
+
採用通用邏輯處理各種室內場景
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
detected_objects: 檢測到的物件列表
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
主要功能區域字典或None
|
| 33 |
+
"""
|
| 34 |
+
try:
|
| 35 |
+
# 用餐區域檢測(桌椅組合)
|
| 36 |
+
dining_area = self.detect_functional_combination(
|
| 37 |
+
detected_objects,
|
| 38 |
+
primary_objects=[60], # dining table
|
| 39 |
+
supporting_objects=[56, 40, 41, 42, 43], # chair, wine glass, cup, fork, knife
|
| 40 |
+
min_supporting=2,
|
| 41 |
+
description_template="Dining area with table and seating arrangement"
|
| 42 |
+
)
|
| 43 |
+
if dining_area:
|
| 44 |
+
return dining_area
|
| 45 |
+
|
| 46 |
+
# 休息區域檢測(沙發電視組合或床)
|
| 47 |
+
seating_area = self.detect_functional_combination(
|
| 48 |
+
detected_objects,
|
| 49 |
+
primary_objects=[57, 59], # sofa, bed
|
| 50 |
+
supporting_objects=[62, 58, 56], # tv, potted plant, chair
|
| 51 |
+
min_supporting=1,
|
| 52 |
+
description_template="Seating and relaxation area"
|
| 53 |
+
)
|
| 54 |
+
if seating_area:
|
| 55 |
+
return seating_area
|
| 56 |
+
|
| 57 |
+
# 工作區域檢測(電子設備與家具組合)
|
| 58 |
+
work_area = self.detect_functional_combination(
|
| 59 |
+
detected_objects,
|
| 60 |
+
primary_objects=[63, 66], # laptop, keyboard
|
| 61 |
+
supporting_objects=[60, 56, 64], # dining table, chair, mouse
|
| 62 |
+
min_supporting=2,
|
| 63 |
+
description_template="Workspace area with electronics and furniture"
|
| 64 |
+
)
|
| 65 |
+
if work_area:
|
| 66 |
+
return work_area
|
| 67 |
+
|
| 68 |
+
return None
|
| 69 |
+
|
| 70 |
+
except Exception as e:
|
| 71 |
+
logger.error(f"Error identifying primary functional area: {str(e)}")
|
| 72 |
+
logger.error(traceback.format_exc())
|
| 73 |
+
return None
|
| 74 |
+
|
| 75 |
+
def identify_secondary_functional_area(self, detected_objects: List[Dict], existing_zones: Dict) -> Dict:
|
| 76 |
+
"""
|
| 77 |
+
識別次要功能區域,避免與主要區域重疊
|
| 78 |
+
|
| 79 |
+
Args:
|
| 80 |
+
detected_objects: 檢測到的物件列表
|
| 81 |
+
existing_zones: 已存在的功能區域
|
| 82 |
+
|
| 83 |
+
Returns:
|
| 84 |
+
次要功能區域字典或None
|
| 85 |
+
"""
|
| 86 |
+
try:
|
| 87 |
+
# 獲取已使用的區域
|
| 88 |
+
used_regions = set(zone.get("region") for zone in existing_zones.values())
|
| 89 |
+
|
| 90 |
+
# 裝飾區域檢測(植物集中區域)
|
| 91 |
+
decorative_area = self.detect_functional_combination(
|
| 92 |
+
detected_objects,
|
| 93 |
+
primary_objects=[58], # potted plant
|
| 94 |
+
supporting_objects=[75], # vase
|
| 95 |
+
min_supporting=0,
|
| 96 |
+
min_primary=3, # 至少需要3個植物
|
| 97 |
+
description_template="Decorative area with plants and ornamental items",
|
| 98 |
+
exclude_regions=used_regions
|
| 99 |
+
)
|
| 100 |
+
if decorative_area:
|
| 101 |
+
return decorative_area
|
| 102 |
+
|
| 103 |
+
# 儲存區域檢測(廚房電器組合)
|
| 104 |
+
storage_area = self.detect_functional_combination(
|
| 105 |
+
detected_objects,
|
| 106 |
+
primary_objects=[72, 68, 69], # refrigerator, microwave, oven
|
| 107 |
+
supporting_objects=[71], # sink
|
| 108 |
+
min_supporting=0,
|
| 109 |
+
min_primary=2,
|
| 110 |
+
description_template="Kitchen appliance and storage area",
|
| 111 |
+
exclude_regions=used_regions
|
| 112 |
+
)
|
| 113 |
+
if storage_area:
|
| 114 |
+
return storage_area
|
| 115 |
+
|
| 116 |
+
return None
|
| 117 |
+
|
| 118 |
+
except Exception as e:
|
| 119 |
+
logger.error(f"Error identifying secondary functional area: {str(e)}")
|
| 120 |
+
logger.error(traceback.format_exc())
|
| 121 |
+
return None
|
| 122 |
+
|
| 123 |
+
def detect_functional_combination(self, detected_objects: List[Dict], primary_objects: List[int],
|
| 124 |
+
supporting_objects: List[int], min_supporting: int,
|
| 125 |
+
description_template: str, min_primary: int = 1,
|
| 126 |
+
exclude_regions: set = None) -> Dict:
|
| 127 |
+
"""
|
| 128 |
+
通用的功能組合檢測方法
|
| 129 |
+
基於主要物件和支持物件的組合判斷��能區域
|
| 130 |
+
|
| 131 |
+
Args:
|
| 132 |
+
detected_objects: 檢測到的物件列表
|
| 133 |
+
primary_objects: 主要物件的class_id列表
|
| 134 |
+
supporting_objects: 支持物件的class_id列表
|
| 135 |
+
min_supporting: 最少需要的支持物件數量
|
| 136 |
+
description_template: 描述模板
|
| 137 |
+
min_primary: 最少需要的主要物件數量
|
| 138 |
+
exclude_regions: 需要排除的區域集合
|
| 139 |
+
|
| 140 |
+
Returns:
|
| 141 |
+
功能區域資訊字典,如果不符合條件則返回None
|
| 142 |
+
"""
|
| 143 |
+
try:
|
| 144 |
+
if exclude_regions is None:
|
| 145 |
+
exclude_regions = set()
|
| 146 |
+
|
| 147 |
+
# 收集主要物件
|
| 148 |
+
primary_objs = [obj for obj in detected_objects
|
| 149 |
+
if obj.get("class_id") in primary_objects and obj.get("confidence", 0) >= 0.4]
|
| 150 |
+
|
| 151 |
+
# 收集支持物件
|
| 152 |
+
supporting_objs = [obj for obj in detected_objects
|
| 153 |
+
if obj.get("class_id") in supporting_objects and obj.get("confidence", 0) >= 0.4]
|
| 154 |
+
|
| 155 |
+
# 檢查是否滿足最少數量要求
|
| 156 |
+
if len(primary_objs) < min_primary or len(supporting_objs) < min_supporting:
|
| 157 |
+
return None
|
| 158 |
+
|
| 159 |
+
# 按區域組織物件
|
| 160 |
+
region_combinations = {}
|
| 161 |
+
all_relevant_objs = primary_objs + supporting_objs
|
| 162 |
+
|
| 163 |
+
for obj in all_relevant_objs:
|
| 164 |
+
region = obj.get("region")
|
| 165 |
+
|
| 166 |
+
# 排除指定區域
|
| 167 |
+
if region in exclude_regions:
|
| 168 |
+
continue
|
| 169 |
+
|
| 170 |
+
if region not in region_combinations:
|
| 171 |
+
region_combinations[region] = {"primary": [], "supporting": [], "all": []}
|
| 172 |
+
|
| 173 |
+
region_combinations[region]["all"].append(obj)
|
| 174 |
+
|
| 175 |
+
if obj.get("class_id") in primary_objects:
|
| 176 |
+
region_combinations[region]["primary"].append(obj)
|
| 177 |
+
else:
|
| 178 |
+
region_combinations[region]["supporting"].append(obj)
|
| 179 |
+
|
| 180 |
+
# 找到最佳區域組合
|
| 181 |
+
best_region = None
|
| 182 |
+
best_score = 0
|
| 183 |
+
|
| 184 |
+
for region, objs in region_combinations.items():
|
| 185 |
+
# 計算該區域的評分
|
| 186 |
+
primary_count = len(objs["primary"])
|
| 187 |
+
supporting_count = len(objs["supporting"])
|
| 188 |
+
|
| 189 |
+
# 必須滿足最低要求
|
| 190 |
+
if primary_count < min_primary or supporting_count < min_supporting:
|
| 191 |
+
continue
|
| 192 |
+
|
| 193 |
+
# 計算組合評分(主要物件權重較高)
|
| 194 |
+
score = primary_count * 2 + supporting_count
|
| 195 |
+
|
| 196 |
+
if score > best_score:
|
| 197 |
+
best_score = score
|
| 198 |
+
best_region = region
|
| 199 |
+
|
| 200 |
+
if best_region is None:
|
| 201 |
+
return None
|
| 202 |
+
|
| 203 |
+
best_combination = region_combinations[best_region]
|
| 204 |
+
all_objects = [obj["class_name"] for obj in best_combination["all"]]
|
| 205 |
+
|
| 206 |
+
return {
|
| 207 |
+
"region": best_region,
|
| 208 |
+
"objects": all_objects,
|
| 209 |
+
"description": description_template
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
except Exception as e:
|
| 213 |
+
logger.error(f"Error detecting functional combination: {str(e)}")
|
| 214 |
+
logger.error(traceback.format_exc())
|
| 215 |
+
return None
|
| 216 |
+
|
| 217 |
+
def generate_descriptive_zone_key_from_data(self, zone_data: Dict, priority_level: str) -> str:
|
| 218 |
+
"""
|
| 219 |
+
基於區域與物品名產生一個比較有描述性的區域
|
| 220 |
+
|
| 221 |
+
Args:
|
| 222 |
+
zone_data: 區域數據字典
|
| 223 |
+
priority_level: 優先級別(primary/secondary)
|
| 224 |
+
|
| 225 |
+
Returns:
|
| 226 |
+
str: 描述性區域鍵名
|
| 227 |
+
"""
|
| 228 |
+
try:
|
| 229 |
+
objects = zone_data.get("objects", [])
|
| 230 |
+
region = zone_data.get("region", "")
|
| 231 |
+
description = zone_data.get("description", "")
|
| 232 |
+
|
| 233 |
+
# 基於物件內容確定功能類型
|
| 234 |
+
if any("dining" in obj.lower() or "table" in obj.lower() for obj in objects):
|
| 235 |
+
base_name = "dining area"
|
| 236 |
+
elif any("chair" in obj.lower() or "sofa" in obj.lower() for obj in objects):
|
| 237 |
+
base_name = "seating area"
|
| 238 |
+
elif any("bed" in obj.lower() for obj in objects):
|
| 239 |
+
base_name = "sleeping area"
|
| 240 |
+
elif any("laptop" in obj.lower() or "keyboard" in obj.lower() for obj in objects):
|
| 241 |
+
base_name = "workspace area"
|
| 242 |
+
elif any("plant" in obj.lower() or "vase" in obj.lower() for obj in objects):
|
| 243 |
+
base_name = "decorative area"
|
| 244 |
+
elif any("refrigerator" in obj.lower() or "microwave" in obj.lower() for obj in objects):
|
| 245 |
+
base_name = "kitchen area"
|
| 246 |
+
else:
|
| 247 |
+
# 基於描述內容推斷
|
| 248 |
+
if "dining" in description.lower():
|
| 249 |
+
base_name = "dining area"
|
| 250 |
+
elif "seating" in description.lower() or "relaxation" in description.lower():
|
| 251 |
+
base_name = "seating area"
|
| 252 |
+
elif "work" in description.lower():
|
| 253 |
+
base_name = "workspace area"
|
| 254 |
+
elif "decorative" in description.lower():
|
| 255 |
+
base_name = "decorative area"
|
| 256 |
+
else:
|
| 257 |
+
base_name = "functional area"
|
| 258 |
+
|
| 259 |
+
# 為次要區域添加位置標識以區分
|
| 260 |
+
if priority_level == "secondary" and region:
|
| 261 |
+
spatial_context = self.get_spatial_context_description(region)
|
| 262 |
+
if spatial_context:
|
| 263 |
+
return f"{spatial_context} {base_name}"
|
| 264 |
+
|
| 265 |
+
return base_name
|
| 266 |
+
|
| 267 |
+
except Exception as e:
|
| 268 |
+
logger.warning(f"Error generating descriptive zone key: {str(e)}")
|
| 269 |
+
return "activity area"
|
| 270 |
+
|
| 271 |
+
def get_spatial_context_description(self, region: str) -> str:
|
| 272 |
+
"""
|
| 273 |
+
獲取空間上下文描述
|
| 274 |
+
|
| 275 |
+
Args:
|
| 276 |
+
region: 區域位置標識
|
| 277 |
+
|
| 278 |
+
Returns:
|
| 279 |
+
str: 空間上下文描述
|
| 280 |
+
"""
|
| 281 |
+
try:
|
| 282 |
+
spatial_mapping = {
|
| 283 |
+
"top_left": "upper left",
|
| 284 |
+
"top_center": "upper",
|
| 285 |
+
"top_right": "upper right",
|
| 286 |
+
"middle_left": "left side",
|
| 287 |
+
"middle_center": "central",
|
| 288 |
+
"middle_right": "right side",
|
| 289 |
+
"bottom_left": "lower left",
|
| 290 |
+
"bottom_center": "lower",
|
| 291 |
+
"bottom_right": "lower right"
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
return spatial_mapping.get(region, "")
|
| 295 |
+
|
| 296 |
+
except Exception as e:
|
| 297 |
+
logger.warning(f"Error getting spatial context for region '{region}': {str(e)}")
|
| 298 |
+
return ""
|
object_description_generator.py
CHANGED
|
@@ -4,6 +4,11 @@ import traceback
|
|
| 4 |
from typing import Dict, List, Tuple, Optional, Any
|
| 5 |
import numpy as np
|
| 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
class ObjectDescriptionError(Exception):
|
| 8 |
"""物件描述生成過程中的自定義異常"""
|
| 9 |
pass
|
|
@@ -12,9 +17,12 @@ class ObjectDescriptionError(Exception):
|
|
| 12 |
class ObjectDescriptionGenerator:
|
| 13 |
"""
|
| 14 |
物件描述生成器 - 負責將檢測到的物件轉換為自然語言描述
|
|
|
|
| 15 |
|
| 16 |
該類別處理物件相關的所有描述生成邏輯,包括重要物件的辨識、
|
| 17 |
空間位置描述、物件列表格式化以及描述文本的優化。
|
|
|
|
|
|
|
| 18 |
"""
|
| 19 |
|
| 20 |
def __init__(self,
|
|
@@ -31,6 +39,7 @@ class ObjectDescriptionGenerator:
|
|
| 31 |
max_categories_to_return: 返回的物件類別最大數量
|
| 32 |
max_total_objects: 返回的物件總數上限
|
| 33 |
confidence_threshold_for_description: 用於描述的置信度閾值
|
|
|
|
| 34 |
"""
|
| 35 |
self.logger = logging.getLogger(self.__class__.__name__)
|
| 36 |
|
|
@@ -40,6 +49,23 @@ class ObjectDescriptionGenerator:
|
|
| 40 |
self.confidence_threshold_for_description = confidence_threshold_for_description
|
| 41 |
self.region_analyzer = region_analyzer
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
self.logger.info("ObjectDescriptionGenerator initialized with prominence_score=%.2f, "
|
| 44 |
"max_categories=%d, max_objects=%d, confidence_threshold=%.2f",
|
| 45 |
min_prominence_score, max_categories_to_return,
|
|
@@ -59,49 +85,11 @@ class ObjectDescriptionGenerator:
|
|
| 59 |
Returns:
|
| 60 |
List[Dict]: 按重要性排序的物件列表
|
| 61 |
"""
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
for obj in detected_objects:
|
| 69 |
-
# 計算重要性評分
|
| 70 |
-
prominence_score = self._calculate_prominence_score(obj)
|
| 71 |
-
|
| 72 |
-
# 只保留超過閾值的物件
|
| 73 |
-
if prominence_score >= min_prominence_score:
|
| 74 |
-
obj_copy = obj.copy()
|
| 75 |
-
obj_copy['prominence_score'] = prominence_score
|
| 76 |
-
prominent_objects.append(obj_copy)
|
| 77 |
-
|
| 78 |
-
# 按重要性評分排序(從高到低)
|
| 79 |
-
prominent_objects.sort(key=lambda x: x.get('prominence_score', 0), reverse=True)
|
| 80 |
-
|
| 81 |
-
# 如果指定了最大類別數量限制,進行過濾
|
| 82 |
-
if max_categories_to_return is not None and max_categories_to_return > 0:
|
| 83 |
-
categories_seen = set()
|
| 84 |
-
filtered_objects = []
|
| 85 |
-
|
| 86 |
-
for obj in prominent_objects:
|
| 87 |
-
class_name = obj.get("class_name", "unknown")
|
| 88 |
-
|
| 89 |
-
# 如果是新類別且未達到限制
|
| 90 |
-
if class_name not in categories_seen:
|
| 91 |
-
if len(categories_seen) < max_categories_to_return:
|
| 92 |
-
categories_seen.add(class_name)
|
| 93 |
-
filtered_objects.append(obj)
|
| 94 |
-
else:
|
| 95 |
-
# 已見過的類別,直接添加
|
| 96 |
-
filtered_objects.append(obj)
|
| 97 |
-
|
| 98 |
-
return filtered_objects
|
| 99 |
-
|
| 100 |
-
return prominent_objects
|
| 101 |
-
|
| 102 |
-
except Exception as e:
|
| 103 |
-
self.logger.error(f"Error calculating prominent objects: {str(e)}")
|
| 104 |
-
return []
|
| 105 |
|
| 106 |
def set_region_analyzer(self, region_analyzer: Any) -> None:
|
| 107 |
"""
|
|
@@ -112,107 +100,11 @@ class ObjectDescriptionGenerator:
|
|
| 112 |
"""
|
| 113 |
try:
|
| 114 |
self.region_analyzer = region_analyzer
|
|
|
|
| 115 |
self.logger.info("RegionAnalyzer instance set for ObjectDescriptionGenerator")
|
| 116 |
except Exception as e:
|
| 117 |
self.logger.warning(f"Error setting RegionAnalyzer: {str(e)}")
|
| 118 |
|
| 119 |
-
def _get_standardized_spatial_description(self, obj: Dict) -> str:
|
| 120 |
-
"""
|
| 121 |
-
使用RegionAnalyzer生成標準化空間描述的內部方法
|
| 122 |
-
|
| 123 |
-
Args:
|
| 124 |
-
obj: 物件字典
|
| 125 |
-
|
| 126 |
-
Returns:
|
| 127 |
-
str: 標準化空間描述,失敗時返回空字串
|
| 128 |
-
"""
|
| 129 |
-
try:
|
| 130 |
-
if hasattr(self, 'region_analyzer') and self.region_analyzer:
|
| 131 |
-
region = obj.get("region", "")
|
| 132 |
-
object_type = obj.get("class_name", "")
|
| 133 |
-
|
| 134 |
-
if hasattr(self.region_analyzer, 'get_contextual_spatial_description'):
|
| 135 |
-
return self.region_analyzer.get_contextual_spatial_description(region, object_type)
|
| 136 |
-
elif hasattr(self.region_analyzer, 'get_spatial_description_phrase'):
|
| 137 |
-
return self.region_analyzer.get_spatial_description_phrase(region)
|
| 138 |
-
|
| 139 |
-
return ""
|
| 140 |
-
|
| 141 |
-
except Exception as e:
|
| 142 |
-
self.logger.warning(f"Error getting standardized spatial description: {str(e)}")
|
| 143 |
-
if object_type:
|
| 144 |
-
return f"visible in the scene"
|
| 145 |
-
return "present in the view"
|
| 146 |
-
|
| 147 |
-
def _calculate_prominence_score(self, obj: Dict) -> float:
|
| 148 |
-
"""
|
| 149 |
-
計算物件的重要性評分
|
| 150 |
-
|
| 151 |
-
Args:
|
| 152 |
-
obj: 物件字典,包含檢測信息
|
| 153 |
-
|
| 154 |
-
Returns:
|
| 155 |
-
float: 重要性評分 (0.0-1.0)
|
| 156 |
-
"""
|
| 157 |
-
try:
|
| 158 |
-
# 基礎置信度評分 (權重: 40%)
|
| 159 |
-
confidence = obj.get("confidence", 0.5)
|
| 160 |
-
confidence_score = confidence * 0.4
|
| 161 |
-
|
| 162 |
-
# 大小評分 (權重: 30%)
|
| 163 |
-
normalized_area = obj.get("normalized_area", 0.1)
|
| 164 |
-
# 使用對數縮放避免過大物件主導評分
|
| 165 |
-
size_score = min(np.log(normalized_area * 10 + 1) / np.log(11), 1.0) * 0.3
|
| 166 |
-
|
| 167 |
-
# 位置評分 (權重: 20%)
|
| 168 |
-
# 中心區域的物件通常更重要
|
| 169 |
-
center_x, center_y = obj.get("normalized_center", [0.5, 0.5])
|
| 170 |
-
distance_from_center = np.sqrt((center_x - 0.5)**2 + (center_y - 0.5)**2)
|
| 171 |
-
position_score = (1 - min(distance_from_center * 2, 1.0)) * 0.2
|
| 172 |
-
|
| 173 |
-
# 類別重要性評分 (權重: 10%)
|
| 174 |
-
class_importance = self._get_class_importance(obj.get("class_name", "unknown"))
|
| 175 |
-
class_score = class_importance * 0.1
|
| 176 |
-
|
| 177 |
-
total_score = confidence_score + size_score + position_score + class_score
|
| 178 |
-
|
| 179 |
-
# 確保評分在有效範圍內
|
| 180 |
-
return max(0.0, min(1.0, total_score))
|
| 181 |
-
|
| 182 |
-
except Exception as e:
|
| 183 |
-
self.logger.warning(f"Error calculating prominence score for object: {str(e)}")
|
| 184 |
-
return 0.5 # 返回中等評分作為備用
|
| 185 |
-
|
| 186 |
-
def _get_class_importance(self, class_name: str) -> float:
|
| 187 |
-
"""
|
| 188 |
-
根據物件類別返回重要性係數
|
| 189 |
-
|
| 190 |
-
Args:
|
| 191 |
-
class_name: 物件類別名稱
|
| 192 |
-
|
| 193 |
-
Returns:
|
| 194 |
-
float: 類別重要性係數 (0.0-1.0)
|
| 195 |
-
"""
|
| 196 |
-
# 高重要性物件(人、車輛、建築)
|
| 197 |
-
high_importance = ["person", "car", "truck", "bus", "motorcycle", "bicycle", "building"]
|
| 198 |
-
|
| 199 |
-
# 中等重要性物件(家具、電器)
|
| 200 |
-
medium_importance = ["chair", "couch", "tv", "laptop", "refrigerator", "dining table", "bed"]
|
| 201 |
-
|
| 202 |
-
# 低重要性物件(小物品、配件)
|
| 203 |
-
low_importance = ["handbag", "backpack", "umbrella", "cell phone", "remote", "mouse"]
|
| 204 |
-
|
| 205 |
-
class_name_lower = class_name.lower()
|
| 206 |
-
|
| 207 |
-
if any(item in class_name_lower for item in high_importance):
|
| 208 |
-
return 1.0
|
| 209 |
-
elif any(item in class_name_lower for item in medium_importance):
|
| 210 |
-
return 0.7
|
| 211 |
-
elif any(item in class_name_lower for item in low_importance):
|
| 212 |
-
return 0.4
|
| 213 |
-
else:
|
| 214 |
-
return 0.6 # 預設中等重要性
|
| 215 |
-
|
| 216 |
def format_object_list_for_description(self,
|
| 217 |
objects: List[Dict],
|
| 218 |
use_indefinite_article_for_one: bool = False,
|
|
@@ -230,65 +122,12 @@ class ObjectDescriptionGenerator:
|
|
| 230 |
Returns:
|
| 231 |
str: 格式化的物件描述字符串
|
| 232 |
"""
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
name = obj.get("class_name", "unknown object")
|
| 240 |
-
if name == "unknown object" or not name:
|
| 241 |
-
continue
|
| 242 |
-
counts[name] = counts.get(name, 0) + 1
|
| 243 |
-
|
| 244 |
-
if not counts:
|
| 245 |
-
return "no specific objects clearly identified"
|
| 246 |
-
|
| 247 |
-
descriptions = []
|
| 248 |
-
# 按計數降序然後按名稱升序排序,限制物件類型數量
|
| 249 |
-
sorted_counts = sorted(counts.items(), key=lambda item: (-item[1], item[0]))[:max_types_to_list]
|
| 250 |
-
|
| 251 |
-
for name, count in sorted_counts:
|
| 252 |
-
if count == 1:
|
| 253 |
-
if use_indefinite_article_for_one:
|
| 254 |
-
if name[0].lower() in 'aeiou':
|
| 255 |
-
descriptions.append(f"an {name}")
|
| 256 |
-
else:
|
| 257 |
-
descriptions.append(f"a {name}")
|
| 258 |
-
else:
|
| 259 |
-
descriptions.append(f"one {name}")
|
| 260 |
-
else:
|
| 261 |
-
# 處理複數形式
|
| 262 |
-
plural_name = name
|
| 263 |
-
if name.endswith("y") and not name.lower().endswith(("ay", "ey", "iy", "oy", "uy")):
|
| 264 |
-
plural_name = name[:-1] + "ies"
|
| 265 |
-
elif name.endswith(("s", "sh", "ch", "x", "z")):
|
| 266 |
-
plural_name = name + "es"
|
| 267 |
-
elif not name.endswith("s"):
|
| 268 |
-
plural_name = name + "s"
|
| 269 |
-
|
| 270 |
-
if count_threshold_for_generalization != -1 and count > count_threshold_for_generalization:
|
| 271 |
-
if count <= count_threshold_for_generalization + 3:
|
| 272 |
-
descriptions.append(f"several {plural_name}")
|
| 273 |
-
else:
|
| 274 |
-
descriptions.append(f"many {plural_name}")
|
| 275 |
-
else:
|
| 276 |
-
descriptions.append(f"{count} {plural_name}")
|
| 277 |
-
|
| 278 |
-
if not descriptions:
|
| 279 |
-
return "no specific objects clearly identified"
|
| 280 |
-
|
| 281 |
-
if len(descriptions) == 1:
|
| 282 |
-
return descriptions[0]
|
| 283 |
-
elif len(descriptions) == 2:
|
| 284 |
-
return f"{descriptions[0]} and {descriptions[1]}"
|
| 285 |
-
else:
|
| 286 |
-
# 使用牛津逗號格式
|
| 287 |
-
return ", ".join(descriptions[:-1]) + f", and {descriptions[-1]}"
|
| 288 |
-
|
| 289 |
-
except Exception as e:
|
| 290 |
-
self.logger.warning(f"Error formatting object list: {str(e)}")
|
| 291 |
-
return "various objects"
|
| 292 |
|
| 293 |
def get_spatial_description(self, obj: Dict, image_width: Optional[int] = None,
|
| 294 |
image_height: Optional[int] = None,
|
|
@@ -305,95 +144,16 @@ class ObjectDescriptionGenerator:
|
|
| 305 |
Returns:
|
| 306 |
str: 空間描述字符串,空值region時返回空字串
|
| 307 |
"""
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
if object_type and any(vehicle in object_type.lower() for vehicle in ["car", "truck", "bus"]):
|
| 315 |
-
return "positioned in the scene"
|
| 316 |
-
elif object_type and "person" in object_type.lower():
|
| 317 |
-
return "present in the area"
|
| 318 |
-
else:
|
| 319 |
-
return "located in the scene"
|
| 320 |
-
|
| 321 |
-
# 如果提供了RegionAnalyzer實例,使用其標準化方法
|
| 322 |
-
if region_analyzer and hasattr(region_analyzer, 'get_spatial_description_phrase'):
|
| 323 |
-
object_type = obj.get("class_name", "")
|
| 324 |
-
if hasattr(region_analyzer, 'get_contextual_spatial_description'):
|
| 325 |
-
spatial_desc = region_analyzer.get_contextual_spatial_description(region, object_type)
|
| 326 |
-
else:
|
| 327 |
-
spatial_desc = region_analyzer.get_spatial_description_phrase(region)
|
| 328 |
-
|
| 329 |
-
if spatial_desc:
|
| 330 |
-
return spatial_desc
|
| 331 |
-
|
| 332 |
-
# 備用邏輯:使用改進的內建映射
|
| 333 |
-
clean_region = region.replace('_', ' ').strip().lower()
|
| 334 |
-
|
| 335 |
-
region_map = {
|
| 336 |
-
"top left": "in the upper left area",
|
| 337 |
-
"top center": "in the upper area",
|
| 338 |
-
"top right": "in the upper right area",
|
| 339 |
-
"middle left": "on the left side",
|
| 340 |
-
"middle center": "in the center",
|
| 341 |
-
"center": "in the center",
|
| 342 |
-
"middle right": "on the right side",
|
| 343 |
-
"bottom left": "in the lower left area",
|
| 344 |
-
"bottom center": "in the lower area",
|
| 345 |
-
"bottom right": "in the lower right area"
|
| 346 |
-
}
|
| 347 |
-
|
| 348 |
-
# 直接映射匹配
|
| 349 |
-
if clean_region in region_map:
|
| 350 |
-
return region_map[clean_region]
|
| 351 |
-
|
| 352 |
-
# 模糊匹配處理
|
| 353 |
-
if "top" in clean_region and "left" in clean_region:
|
| 354 |
-
return "in the upper left area"
|
| 355 |
-
elif "top" in clean_region and "right" in clean_region:
|
| 356 |
-
return "in the upper right area"
|
| 357 |
-
elif "bottom" in clean_region and "left" in clean_region:
|
| 358 |
-
return "in the lower left area"
|
| 359 |
-
elif "bottom" in clean_region and "right" in clean_region:
|
| 360 |
-
return "in the lower right area"
|
| 361 |
-
elif "top" in clean_region:
|
| 362 |
-
return "in the upper area"
|
| 363 |
-
elif "bottom" in clean_region:
|
| 364 |
-
return "in the lower area"
|
| 365 |
-
elif "left" in clean_region:
|
| 366 |
-
return "on the left side"
|
| 367 |
-
elif "right" in clean_region:
|
| 368 |
-
return "on the right side"
|
| 369 |
-
elif "center" in clean_region or "middle" in clean_region:
|
| 370 |
-
return "in the center"
|
| 371 |
-
|
| 372 |
-
# 如果region無法識別,使用normalized_center作為最後備用
|
| 373 |
-
norm_center = obj.get("normalized_center")
|
| 374 |
-
if norm_center and image_width and image_height:
|
| 375 |
-
x_norm, y_norm = norm_center
|
| 376 |
-
h_pos = "left" if x_norm < 0.4 else "right" if x_norm > 0.6 else "center"
|
| 377 |
-
v_pos = "upper" if y_norm < 0.4 else "lower" if y_norm > 0.6 else "center"
|
| 378 |
-
|
| 379 |
-
if h_pos == "center" and v_pos == "center":
|
| 380 |
-
return "in the center"
|
| 381 |
-
return f"in the {v_pos} {h_pos} area"
|
| 382 |
-
|
| 383 |
-
# 如果所有方法都失敗,返回空字串
|
| 384 |
-
return ""
|
| 385 |
-
|
| 386 |
-
except Exception as e:
|
| 387 |
-
self.logger.warning(f"Error generating spatial description: {str(e)}")
|
| 388 |
-
return ""
|
| 389 |
|
| 390 |
def optimize_object_description(self, description: str) -> str:
|
| 391 |
"""
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
這個函數是後處理階段的關鍵組件,負責清理和精簡自然語言生成系統
|
| 395 |
-
產出的描述文字。它專門處理常見的重複問題,如相同物件的重複
|
| 396 |
-
列舉和冗餘的空間描述,讓最終的描述更簡潔自然。
|
| 397 |
|
| 398 |
Args:
|
| 399 |
description: 原始的場景描述文本,可能包含重複或冗餘的表達
|
|
@@ -401,164 +161,7 @@ class ObjectDescriptionGenerator:
|
|
| 401 |
Returns:
|
| 402 |
str: 經過優化清理的描述文本,如果處理失敗則返回原始文本
|
| 403 |
"""
|
| 404 |
-
|
| 405 |
-
import re
|
| 406 |
-
|
| 407 |
-
# 1. 處理多餘的空間限定表達
|
| 408 |
-
# 使用通用模式來識別和移除不必要的空間描述
|
| 409 |
-
# 例如:"bed in the room" -> "bed",因為床本身就表示是室內環境
|
| 410 |
-
description = self._remove_redundant_spatial_qualifiers(description)
|
| 411 |
-
|
| 412 |
-
# 2. 辨識並處理物件列表的重複問題
|
| 413 |
-
# 尋找形如 "with X, Y, Z" 或 "with X and Y" 的物件列表
|
| 414 |
-
# 使用正則表達式捕獲 "with" 關鍵字後的物件序列
|
| 415 |
-
# 注意:正則表達式需要修正以避免貪婪匹配的問題
|
| 416 |
-
object_lists = re.findall(r'with ([^.]+?)(?=\.|$)', description)
|
| 417 |
-
|
| 418 |
-
# 遍歷每個找到的物件列表進行重複檢測和優化
|
| 419 |
-
for obj_list in object_lists:
|
| 420 |
-
# 3. 解析單個物件列表中的項目
|
| 421 |
-
# 使用更精確的正則表達式來分割物件項目
|
| 422 |
-
# 處理 "X, Y, and Z" 或 "X and Y" 格式的列表
|
| 423 |
-
# 需要特別注意處理最後一個 "and" 的情況
|
| 424 |
-
|
| 425 |
-
# 先處理逗號格式 "A, B, and C"
|
| 426 |
-
if ", and " in obj_list:
|
| 427 |
-
# 分割 ", and " 前後的部分
|
| 428 |
-
before_last_and = obj_list.rsplit(", and ", 1)[0]
|
| 429 |
-
last_item = obj_list.rsplit(", and ", 1)[1]
|
| 430 |
-
|
| 431 |
-
# 處理前面的項目(用逗號分割)
|
| 432 |
-
front_items = [item.strip() for item in before_last_and.split(",")]
|
| 433 |
-
# 添加最後一個項目
|
| 434 |
-
all_items = front_items + [last_item.strip()]
|
| 435 |
-
elif " and " in obj_list:
|
| 436 |
-
# 處理簡單的 "A and B" 格式
|
| 437 |
-
all_items = [item.strip() for item in obj_list.split(" and ")]
|
| 438 |
-
else:
|
| 439 |
-
# 處理純逗號分隔的列表
|
| 440 |
-
all_items = [item.strip() for item in obj_list.split(",")]
|
| 441 |
-
|
| 442 |
-
# 4. 統計物件出現頻率
|
| 443 |
-
# 建立字典來記錄每個物件的出現次數
|
| 444 |
-
item_counts = {}
|
| 445 |
-
|
| 446 |
-
for item in all_items:
|
| 447 |
-
# 清理項目文字並過濾無效內容
|
| 448 |
-
item = item.strip()
|
| 449 |
-
# 過濾掉連接詞和空白項目
|
| 450 |
-
if item and item not in ["and", "with", ""]:
|
| 451 |
-
# 移除可能的冠詞前綴以便正確計數
|
| 452 |
-
# 例如 "a car" 和 "car" 應該被視為同一項目
|
| 453 |
-
clean_item = self._normalize_item_for_counting(item)
|
| 454 |
-
if clean_item not in item_counts:
|
| 455 |
-
item_counts[clean_item] = 0
|
| 456 |
-
item_counts[clean_item] += 1
|
| 457 |
-
|
| 458 |
-
# 5. 生成優化後的物件列表
|
| 459 |
-
if item_counts:
|
| 460 |
-
new_items = []
|
| 461 |
-
|
| 462 |
-
for item, count in item_counts.items():
|
| 463 |
-
if count > 1:
|
| 464 |
-
# 對於重複項目,使用數字加複數形式
|
| 465 |
-
plural_item = self._make_plural(item)
|
| 466 |
-
new_items.append(f"{count} {plural_item}")
|
| 467 |
-
else:
|
| 468 |
-
# 單個項目保持原樣
|
| 469 |
-
new_items.append(item)
|
| 470 |
-
|
| 471 |
-
# 6. 重新格式化物件列表
|
| 472 |
-
# 使用標準的英文列表連接格式
|
| 473 |
-
if len(new_items) == 1:
|
| 474 |
-
new_list = new_items[0]
|
| 475 |
-
elif len(new_items) == 2:
|
| 476 |
-
new_list = f"{new_items[0]} and {new_items[1]}"
|
| 477 |
-
else:
|
| 478 |
-
# 使用逗號格式確保清晰度
|
| 479 |
-
new_list = ", ".join(new_items[:-1]) + f", and {new_items[-1]}"
|
| 480 |
-
|
| 481 |
-
# 7. 在原文中替換優化後的列表
|
| 482 |
-
# 將原始的多餘列表替換為優化後的簡潔版本
|
| 483 |
-
description = description.replace(obj_list, new_list)
|
| 484 |
-
|
| 485 |
-
return description
|
| 486 |
-
|
| 487 |
-
except Exception as e:
|
| 488 |
-
self.logger.warning(f"Error optimizing object description: {str(e)}")
|
| 489 |
-
return description
|
| 490 |
-
|
| 491 |
-
def _remove_redundant_spatial_qualifiers(self, description: str) -> str:
|
| 492 |
-
"""
|
| 493 |
-
移除描述中冗餘的空間限定詞
|
| 494 |
-
|
| 495 |
-
這個方法使用模式匹配來識別和移除不必要的空間描述,例如
|
| 496 |
-
"bed in the room" 中的 "in the room" 部分通常是多餘的,因為
|
| 497 |
-
床這個物件本身就是室內環境。
|
| 498 |
-
|
| 499 |
-
Args:
|
| 500 |
-
description: 包含可能多餘空間描述的文本
|
| 501 |
-
|
| 502 |
-
Returns:
|
| 503 |
-
str: 移除多餘空間限定詞後的文本
|
| 504 |
-
"""
|
| 505 |
-
import re
|
| 506 |
-
|
| 507 |
-
# 定義常見的多餘空間表達模式
|
| 508 |
-
# 這些模式捕獲「物件 + 不必要的空間限定」的情況
|
| 509 |
-
redundant_patterns = [
|
| 510 |
-
# 室內物件的多餘房間描述
|
| 511 |
-
(r'\b(bed|sofa|couch|chair|table|desk|dresser|nightstand)\s+in\s+the\s+(room|bedroom|living\s+room)', r'\1'),
|
| 512 |
-
# 廚房物件的多餘描述
|
| 513 |
-
(r'\b(refrigerator|stove|oven|sink|microwave)\s+in\s+the\s+kitchen', r'\1'),
|
| 514 |
-
# 浴室物件的多餘描述
|
| 515 |
-
(r'\b(toilet|shower|bathtub|sink)\s+in\s+the\s+(bathroom|restroom)', r'\1'),
|
| 516 |
-
# 一般性的多餘表達:「在場景中」、「在圖片中」等
|
| 517 |
-
(r'\b([\w\s]+)\s+in\s+the\s+(scene|image|picture|frame)', r'\1'),
|
| 518 |
-
]
|
| 519 |
-
|
| 520 |
-
for pattern, replacement in redundant_patterns:
|
| 521 |
-
description = re.sub(pattern, replacement, description, flags=re.IGNORECASE)
|
| 522 |
-
|
| 523 |
-
return description
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
def _normalize_item_for_counting(self, item: str) -> str:
|
| 527 |
-
"""
|
| 528 |
-
正規化物件項目以便準確計數
|
| 529 |
-
|
| 530 |
-
移除冠詞和其他可能影響計數準確性的前綴詞彙,
|
| 531 |
-
確保 "a car" 和 "car" 被視為同一物件類型。
|
| 532 |
-
|
| 533 |
-
Args:
|
| 534 |
-
item: 原始物件項目字串
|
| 535 |
-
|
| 536 |
-
Returns:
|
| 537 |
-
str: 正規化後的物件項目
|
| 538 |
-
"""
|
| 539 |
-
# 移除常見的英文冠詞
|
| 540 |
-
item = re.sub(r'^(a|an|the)\s+', '', item.lower())
|
| 541 |
-
return item.strip()
|
| 542 |
-
|
| 543 |
-
def _make_plural(self, item: str) -> str:
|
| 544 |
-
"""
|
| 545 |
-
將單數名詞轉換為複數形式
|
| 546 |
-
|
| 547 |
-
Args:
|
| 548 |
-
item: 單數形式的名詞
|
| 549 |
-
|
| 550 |
-
Returns:
|
| 551 |
-
str: 複數形式的名詞
|
| 552 |
-
"""
|
| 553 |
-
# 重用已經實現的複數化邏輯
|
| 554 |
-
if item.endswith("y") and len(item) > 1 and item[-2].lower() not in 'aeiou':
|
| 555 |
-
return item[:-1] + "ies"
|
| 556 |
-
elif item.endswith(("s", "sh", "ch", "x", "z")):
|
| 557 |
-
return item + "es"
|
| 558 |
-
elif not item.endswith("s"):
|
| 559 |
-
return item + "s"
|
| 560 |
-
else:
|
| 561 |
-
return item
|
| 562 |
|
| 563 |
def generate_dynamic_everyday_description(self,
|
| 564 |
detected_objects: List[Dict],
|
|
@@ -586,6 +189,7 @@ class ObjectDescriptionGenerator:
|
|
| 586 |
try:
|
| 587 |
description_segments = []
|
| 588 |
image_width, image_height = image_dimensions if image_dimensions else (None, None)
|
|
|
|
| 589 |
|
| 590 |
self.logger.debug(f"Generating dynamic description for {len(detected_objects)} objects, "
|
| 591 |
f"viewpoint: {viewpoint}, lighting: {lighting_info is not None}")
|
|
@@ -625,8 +229,6 @@ class ObjectDescriptionGenerator:
|
|
| 625 |
else:
|
| 626 |
description_segments.append("Within this setting, no specific objects were clearly identified.")
|
| 627 |
else:
|
| 628 |
-
objects_by_class: Dict[str, List[Dict]] = {}
|
| 629 |
-
|
| 630 |
# 使用置信度過濾
|
| 631 |
confident_objects = [obj for obj in detected_objects
|
| 632 |
if obj.get("confidence", 0) >= self.confidence_threshold_for_description]
|
|
@@ -642,172 +244,29 @@ class ObjectDescriptionGenerator:
|
|
| 642 |
else:
|
| 643 |
description_segments.append(no_confident_obj_msg.lower().capitalize())
|
| 644 |
else:
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
avg_confidence = stats.get("avg_confidence", 0)
|
| 650 |
-
|
| 651 |
-
# 動態調整置信度閾值
|
| 652 |
-
dynamic_threshold = self.confidence_threshold_for_description
|
| 653 |
-
if class_name in ["potted plant", "vase", "clock", "book"]:
|
| 654 |
-
dynamic_threshold = max(0.15, self.confidence_threshold_for_description * 0.6)
|
| 655 |
-
elif count >= 3:
|
| 656 |
-
dynamic_threshold = max(0.2, self.confidence_threshold_for_description * 0.8)
|
| 657 |
-
|
| 658 |
-
if count > 0 and avg_confidence >= dynamic_threshold:
|
| 659 |
-
matching_objects = [obj for obj in confident_objects if obj.get("class_name") == class_name]
|
| 660 |
-
if not matching_objects:
|
| 661 |
-
matching_objects = [obj for obj in detected_objects
|
| 662 |
-
if obj.get("class_name") == class_name and obj.get("confidence", 0) >= dynamic_threshold]
|
| 663 |
-
|
| 664 |
-
if matching_objects:
|
| 665 |
-
actual_count = min(stats["count"], len(matching_objects))
|
| 666 |
-
objects_by_class[class_name] = matching_objects[:actual_count]
|
| 667 |
-
else:
|
| 668 |
-
# 備用邏輯,同樣使用動態閾值
|
| 669 |
-
for obj in confident_objects:
|
| 670 |
-
name = obj.get("class_name", "unknown object")
|
| 671 |
-
if name == "unknown object" or not name:
|
| 672 |
-
continue
|
| 673 |
-
if name not in objects_by_class:
|
| 674 |
-
objects_by_class[name] = []
|
| 675 |
-
objects_by_class[name].append(obj)
|
| 676 |
-
|
| 677 |
-
print(f"DEBUG: Before spatial deduplication:")
|
| 678 |
-
for class_name in ["car", "traffic light", "person", "handbag"]:
|
| 679 |
-
if class_name in objects_by_class:
|
| 680 |
-
print(f"DEBUG: {class_name}: {len(objects_by_class[class_name])} objects before dedup")
|
| 681 |
|
| 682 |
if not objects_by_class:
|
| 683 |
description_segments.append("No common objects were confidently identified for detailed description.")
|
| 684 |
else:
|
| 685 |
-
#
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
priority = 2
|
| 701 |
-
elif count >= 3:
|
| 702 |
-
priority = max(1, priority - 1)
|
| 703 |
-
elif normalized_class_name in ["potted plant", "vase", "clock", "book"] and count >= 2:
|
| 704 |
-
priority = 2
|
| 705 |
-
|
| 706 |
-
avg_area = sum(o.get("normalized_area", 0.0) for o in obj_group_list) / len(obj_group_list) if obj_group_list else 0
|
| 707 |
-
quantity_bonus = min(count / 5.0, 1.0)
|
| 708 |
-
|
| 709 |
-
return (priority, -len(obj_group_list), -avg_area, -quantity_bonus)
|
| 710 |
-
|
| 711 |
-
# remove duplicate
|
| 712 |
-
deduplicated_objects_by_class = {}
|
| 713 |
-
processed_positions = []
|
| 714 |
-
|
| 715 |
-
for class_name, group_of_objects in objects_by_class.items():
|
| 716 |
-
unique_objects = []
|
| 717 |
-
|
| 718 |
-
for obj in group_of_objects:
|
| 719 |
-
obj_position = obj.get("normalized_center", [0.5, 0.5])
|
| 720 |
-
is_duplicate = False
|
| 721 |
-
|
| 722 |
-
for processed_pos in processed_positions:
|
| 723 |
-
position_distance = abs(obj_position[0] - processed_pos[0]) + abs(obj_position[1] - processed_pos[1])
|
| 724 |
-
if position_distance < 0.15:
|
| 725 |
-
is_duplicate = True
|
| 726 |
-
break
|
| 727 |
-
|
| 728 |
-
if not is_duplicate:
|
| 729 |
-
unique_objects.append(obj)
|
| 730 |
-
processed_positions.append(obj_position)
|
| 731 |
-
|
| 732 |
-
if unique_objects:
|
| 733 |
-
deduplicated_objects_by_class[class_name] = unique_objects
|
| 734 |
-
|
| 735 |
-
objects_by_class = deduplicated_objects_by_class
|
| 736 |
-
print(f"DEBUG: After spatial deduplication:")
|
| 737 |
-
for class_name in ["car", "traffic light", "person", "handbag"]:
|
| 738 |
-
if class_name in objects_by_class:
|
| 739 |
-
print(f"DEBUG: {class_name}: {len(objects_by_class[class_name])} objects after dedup")
|
| 740 |
-
|
| 741 |
-
sorted_object_groups = sorted(objects_by_class.items(), key=sort_key_object_groups)
|
| 742 |
-
|
| 743 |
-
object_clauses = []
|
| 744 |
-
|
| 745 |
-
for class_name, group_of_objects in sorted_object_groups:
|
| 746 |
-
count = len(group_of_objects)
|
| 747 |
-
if class_name in ["car", "traffic light", "person", "handbag"]:
|
| 748 |
-
print(f"DEBUG: Final count for {class_name}: {count}")
|
| 749 |
-
if count == 0:
|
| 750 |
-
continue
|
| 751 |
-
|
| 752 |
-
# 標準化class name
|
| 753 |
-
normalized_class_name = self._normalize_object_class_name(class_name)
|
| 754 |
-
|
| 755 |
-
# 使用統計信息確保準確的數量描述
|
| 756 |
-
if object_statistics and class_name in object_statistics:
|
| 757 |
-
actual_count = object_statistics[class_name]["count"]
|
| 758 |
-
formatted_name_with_exact_count = self._format_object_count_description(
|
| 759 |
-
normalized_class_name,
|
| 760 |
-
actual_count,
|
| 761 |
-
scene_type=scene_type
|
| 762 |
-
)
|
| 763 |
-
else:
|
| 764 |
-
formatted_name_with_exact_count = self._format_object_count_description(
|
| 765 |
-
normalized_class_name,
|
| 766 |
-
count,
|
| 767 |
-
scene_type=scene_type
|
| 768 |
-
)
|
| 769 |
-
|
| 770 |
-
if formatted_name_with_exact_count == "no specific objects clearly identified" or not formatted_name_with_exact_count:
|
| 771 |
-
continue
|
| 772 |
-
|
| 773 |
-
# 確定群組的集體位置
|
| 774 |
-
location_description_suffix = ""
|
| 775 |
-
if count == 1:
|
| 776 |
-
spatial_desc = self.get_spatial_description(group_of_objects[0], image_width, image_height, self.region_analyzer)
|
| 777 |
-
if spatial_desc:
|
| 778 |
-
location_description_suffix = f"is {spatial_desc}"
|
| 779 |
-
else:
|
| 780 |
-
distinct_regions = sorted(list(set(obj.get("region", "") for obj in group_of_objects if obj.get("region"))))
|
| 781 |
-
valid_regions = [r for r in distinct_regions if r and r != "unknown" and r.strip()]
|
| 782 |
-
if not valid_regions:
|
| 783 |
-
location_description_suffix = "is positioned in the scene"
|
| 784 |
-
elif len(valid_regions) == 1:
|
| 785 |
-
spatial_desc = self.get_spatial_description_phrase(valid_regions[0])
|
| 786 |
-
location_description_suffix = f"is primarily {spatial_desc}" if spatial_desc else "is positioned in the scene"
|
| 787 |
-
elif len(valid_regions) == 2:
|
| 788 |
-
clean_region1 = valid_regions[0].replace('_', ' ')
|
| 789 |
-
clean_region2 = valid_regions[1].replace('_', ' ')
|
| 790 |
-
location_description_suffix = f"is mainly across the {clean_region1} and {clean_region2} areas"
|
| 791 |
-
else:
|
| 792 |
-
location_description_suffix = "is distributed in various parts of the scene"
|
| 793 |
-
else:
|
| 794 |
-
distinct_regions = sorted(list(set(obj.get("region", "") for obj in group_of_objects if obj.get("region"))))
|
| 795 |
-
valid_regions = [r for r in distinct_regions if r and r != "unknown" and r.strip()]
|
| 796 |
-
if not valid_regions:
|
| 797 |
-
location_description_suffix = "are visible in the scene"
|
| 798 |
-
elif len(valid_regions) == 1:
|
| 799 |
-
clean_region = valid_regions[0].replace('_', ' ')
|
| 800 |
-
location_description_suffix = f"are primarily in the {clean_region} area"
|
| 801 |
-
elif len(valid_regions) == 2:
|
| 802 |
-
clean_region1 = valid_regions[0].replace('_', ' ')
|
| 803 |
-
clean_region2 = valid_regions[1].replace('_', ' ')
|
| 804 |
-
location_description_suffix = f"are mainly across the {clean_region1} and {clean_region2} areas"
|
| 805 |
-
else:
|
| 806 |
-
location_description_suffix = "are distributed in various parts of the scene"
|
| 807 |
-
|
| 808 |
-
# 首字母大寫
|
| 809 |
-
formatted_name_capitalized = formatted_name_with_exact_count[0].upper() + formatted_name_with_exact_count[1:]
|
| 810 |
-
object_clauses.append(f"{formatted_name_capitalized} {location_description_suffix}")
|
| 811 |
|
| 812 |
if object_clauses:
|
| 813 |
if not description_segments:
|
|
@@ -845,7 +304,7 @@ class ObjectDescriptionGenerator:
|
|
| 845 |
raw_description += "."
|
| 846 |
|
| 847 |
# 移除重複性和不適當的描述詞彙
|
| 848 |
-
raw_description = self.
|
| 849 |
|
| 850 |
if not raw_description or len(raw_description.strip()) < 20:
|
| 851 |
if 'confident_objects' in locals() and confident_objects:
|
|
@@ -860,586 +319,6 @@ class ObjectDescriptionGenerator:
|
|
| 860 |
self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
|
| 861 |
raise ObjectDescriptionError(error_msg) from e
|
| 862 |
|
| 863 |
-
def _remove_repetitive_descriptors(self, description: str) -> str:
|
| 864 |
-
"""
|
| 865 |
-
移除描述中的重複性和不適當的描述詞彙,特別是 "identical" 等詞彙
|
| 866 |
-
|
| 867 |
-
Args:
|
| 868 |
-
description: 原始描述文本
|
| 869 |
-
|
| 870 |
-
Returns:
|
| 871 |
-
str: 清理後的描述文本
|
| 872 |
-
"""
|
| 873 |
-
try:
|
| 874 |
-
import re
|
| 875 |
-
|
| 876 |
-
# 定義需要移除或替換的模式
|
| 877 |
-
cleanup_patterns = [
|
| 878 |
-
# 移除 "identical" 描述模式
|
| 879 |
-
(r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
| 880 |
-
(r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
| 881 |
-
(r'\bidentical\s+([a-zA-Z\s]+)', r'\1'),
|
| 882 |
-
|
| 883 |
-
# 改善 "comprehensive arrangement" 等過於技術性的表達
|
| 884 |
-
(r'\bcomprehensive arrangement of\b', 'arrangement of'),
|
| 885 |
-
(r'\bcomprehensive view featuring\b', 'scene featuring'),
|
| 886 |
-
(r'\bcomprehensive display of\b', 'display of'),
|
| 887 |
-
|
| 888 |
-
# 簡化過度描述性的短語
|
| 889 |
-
(r'\bpositioning around\s+(\d+)\s+identical\b', r'positioning around \1'),
|
| 890 |
-
(r'\barranged around\s+(\d+)\s+identical\b', r'arranged around \1'),
|
| 891 |
-
]
|
| 892 |
-
|
| 893 |
-
processed_description = description
|
| 894 |
-
for pattern, replacement in cleanup_patterns:
|
| 895 |
-
processed_description = re.sub(pattern, replacement, processed_description, flags=re.IGNORECASE)
|
| 896 |
-
|
| 897 |
-
# 進一步清理可能的多餘空格
|
| 898 |
-
processed_description = re.sub(r'\s+', ' ', processed_description).strip()
|
| 899 |
-
|
| 900 |
-
self.logger.debug(f"Cleaned description: removed repetitive descriptors")
|
| 901 |
-
return processed_description
|
| 902 |
-
|
| 903 |
-
except Exception as e:
|
| 904 |
-
self.logger.warning(f"Error removing repetitive descriptors: {str(e)}")
|
| 905 |
-
return description
|
| 906 |
-
|
| 907 |
-
def _format_object_count_description(self, class_name: str, count: int,
|
| 908 |
-
scene_type: Optional[str] = None,
|
| 909 |
-
detected_objects: Optional[List[Dict]] = None,
|
| 910 |
-
avg_confidence: float = 0.0) -> str:
|
| 911 |
-
"""
|
| 912 |
-
格式化物件數量描述的核心方法,整合空間排列、材質推斷和場景語境
|
| 913 |
-
|
| 914 |
-
這個方法是整個物件描述系統的核心,它將多個子功能整合在一起:
|
| 915 |
-
1. 數字到文字的轉換(避免阿拉伯數字)
|
| 916 |
-
2. 基於場景的材質推斷
|
| 917 |
-
3. 空間排列模式的描述
|
| 918 |
-
4. 語境化的物件描述
|
| 919 |
-
|
| 920 |
-
Args:
|
| 921 |
-
class_name: 標準化後的類別名稱
|
| 922 |
-
count: 物件數量
|
| 923 |
-
scene_type: 場景類型,用於語境化描述
|
| 924 |
-
detected_objects: 該類型的所有檢測物件,用於空間分析
|
| 925 |
-
avg_confidence: 平均檢測置信度,影響材質推斷的可信度
|
| 926 |
-
|
| 927 |
-
Returns:
|
| 928 |
-
str: 完整的格式化數量描述
|
| 929 |
-
"""
|
| 930 |
-
try:
|
| 931 |
-
if count <= 0:
|
| 932 |
-
return ""
|
| 933 |
-
|
| 934 |
-
# 獲取基礎的複數形式
|
| 935 |
-
plural_form = self._get_plural_form(class_name)
|
| 936 |
-
|
| 937 |
-
# 單數情況的處理
|
| 938 |
-
if count == 1:
|
| 939 |
-
return self._format_single_object_description(class_name, scene_type,
|
| 940 |
-
detected_objects, avg_confidence)
|
| 941 |
-
|
| 942 |
-
# 複數情況的處理
|
| 943 |
-
return self._format_multiple_objects_description(class_name, count, plural_form,
|
| 944 |
-
scene_type, detected_objects, avg_confidence)
|
| 945 |
-
|
| 946 |
-
except Exception as e:
|
| 947 |
-
self.logger.warning(f"Error formatting object count for '{class_name}': {str(e)}")
|
| 948 |
-
return f"{count} {class_name}s" if count > 1 else class_name
|
| 949 |
-
|
| 950 |
-
def _format_single_object_description(self, class_name: str, scene_type: Optional[str],
|
| 951 |
-
detected_objects: Optional[List[Dict]],
|
| 952 |
-
avg_confidence: float) -> str:
|
| 953 |
-
"""
|
| 954 |
-
處理單個物件的描述生成
|
| 955 |
-
|
| 956 |
-
對於單個物件,我們重點在於通過材質推斷和位置描述來豐富描述內容,
|
| 957 |
-
避免簡單的 "a chair" 這樣的描述,而是生成 "a wooden dining chair" 這樣的表達
|
| 958 |
-
|
| 959 |
-
Args:
|
| 960 |
-
class_name: 物件類別名稱
|
| 961 |
-
scene_type: 場景類型
|
| 962 |
-
detected_objects: 檢測物件列表
|
| 963 |
-
avg_confidence: 平均置信度
|
| 964 |
-
|
| 965 |
-
Returns:
|
| 966 |
-
str: 單個物件的完整描述
|
| 967 |
-
"""
|
| 968 |
-
article = "an" if class_name[0].lower() in 'aeiou' else "a"
|
| 969 |
-
|
| 970 |
-
# 獲取材質描述符
|
| 971 |
-
material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
|
| 972 |
-
|
| 973 |
-
# 獲取位置或特徵描述符
|
| 974 |
-
feature_descriptor = self._get_single_object_feature(class_name, scene_type, detected_objects)
|
| 975 |
-
|
| 976 |
-
# 組合描述
|
| 977 |
-
descriptors = []
|
| 978 |
-
if material_descriptor:
|
| 979 |
-
descriptors.append(material_descriptor)
|
| 980 |
-
if feature_descriptor:
|
| 981 |
-
descriptors.append(feature_descriptor)
|
| 982 |
-
|
| 983 |
-
if descriptors:
|
| 984 |
-
return f"{article} {' '.join(descriptors)} {class_name}"
|
| 985 |
-
else:
|
| 986 |
-
return f"{article} {class_name}"
|
| 987 |
-
|
| 988 |
-
def _format_multiple_objects_description(self, class_name: str, count: int, plural_form: str,
|
| 989 |
-
scene_type: Optional[str], detected_objects: Optional[List[Dict]],
|
| 990 |
-
avg_confidence: float) -> str:
|
| 991 |
-
"""
|
| 992 |
-
處理多個物件的描述生成
|
| 993 |
-
|
| 994 |
-
對於多個物件,我們的重點是:
|
| 995 |
-
1. 將數字轉換為文字表達
|
| 996 |
-
2. 分析空間排列模式
|
| 997 |
-
3. 添加適當的材質或功能描述
|
| 998 |
-
4. 生成自然流暢的描述
|
| 999 |
-
|
| 1000 |
-
Args:
|
| 1001 |
-
class_name: 物件類別名稱
|
| 1002 |
-
count: 物件數量
|
| 1003 |
-
plural_form: 複數形式
|
| 1004 |
-
scene_type: 場景類型
|
| 1005 |
-
detected_objects: 檢測物件列表
|
| 1006 |
-
avg_confidence: 平均置信度
|
| 1007 |
-
|
| 1008 |
-
Returns:
|
| 1009 |
-
str: 多個物件的完整描述
|
| 1010 |
-
"""
|
| 1011 |
-
# 數字到文字的轉換映射
|
| 1012 |
-
number_words = {
|
| 1013 |
-
2: "two", 3: "three", 4: "four", 5: "five", 6: "six",
|
| 1014 |
-
7: "seven", 8: "eight", 9: "nine", 10: "ten",
|
| 1015 |
-
11: "eleven", 12: "twelve"
|
| 1016 |
-
}
|
| 1017 |
-
|
| 1018 |
-
# 確定基礎數量表達
|
| 1019 |
-
if count in number_words:
|
| 1020 |
-
count_expression = number_words[count]
|
| 1021 |
-
elif count <= 20:
|
| 1022 |
-
count_expression = "several"
|
| 1023 |
-
else:
|
| 1024 |
-
count_expression = "numerous"
|
| 1025 |
-
|
| 1026 |
-
# 獲取材質或功能描述符
|
| 1027 |
-
material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
|
| 1028 |
-
|
| 1029 |
-
# 獲取空間排列描述
|
| 1030 |
-
spatial_descriptor = self._get_spatial_arrangement_descriptor(class_name, scene_type,
|
| 1031 |
-
detected_objects, count)
|
| 1032 |
-
|
| 1033 |
-
# 組合最終描述
|
| 1034 |
-
descriptors = []
|
| 1035 |
-
if material_descriptor:
|
| 1036 |
-
descriptors.append(material_descriptor)
|
| 1037 |
-
|
| 1038 |
-
# 構建基礎描述
|
| 1039 |
-
base_description = f"{count_expression} {' '.join(descriptors)} {plural_form}".strip()
|
| 1040 |
-
|
| 1041 |
-
# 添加空間排列信息
|
| 1042 |
-
if spatial_descriptor:
|
| 1043 |
-
return f"{base_description} {spatial_descriptor}"
|
| 1044 |
-
else:
|
| 1045 |
-
return base_description
|
| 1046 |
-
|
| 1047 |
-
def _get_material_descriptor(self, class_name: str, scene_type: Optional[str],
|
| 1048 |
-
avg_confidence: float) -> Optional[str]:
|
| 1049 |
-
"""
|
| 1050 |
-
基於場景語境和置信度進行材質推斷
|
| 1051 |
-
|
| 1052 |
-
這個方法實現了智能的材質推斷,它不依賴複雜的圖像分析,
|
| 1053 |
-
而是基於常識和場景邏輯來推斷最可能的材質描述
|
| 1054 |
-
|
| 1055 |
-
Args:
|
| 1056 |
-
class_name: 物件類別名稱
|
| 1057 |
-
scene_type: 場景類型
|
| 1058 |
-
avg_confidence: 檢測置信度,影響推斷的保守程度
|
| 1059 |
-
|
| 1060 |
-
Returns:
|
| 1061 |
-
Optional[str]: 材質描述符,如果無法推斷則返回None
|
| 1062 |
-
"""
|
| 1063 |
-
# 只有在置信度足夠高時才進行材質推斷
|
| 1064 |
-
if avg_confidence < 0.5:
|
| 1065 |
-
return None
|
| 1066 |
-
|
| 1067 |
-
# 餐廳和用餐相關場景
|
| 1068 |
-
if scene_type and scene_type in ["dining_area", "restaurant", "upscale_dining", "cafe"]:
|
| 1069 |
-
material_mapping = {
|
| 1070 |
-
"chair": "wooden" if avg_confidence > 0.7 else None,
|
| 1071 |
-
"dining table": "wooden",
|
| 1072 |
-
"couch": "upholstered",
|
| 1073 |
-
"vase": "decorative"
|
| 1074 |
-
}
|
| 1075 |
-
return material_mapping.get(class_name)
|
| 1076 |
-
|
| 1077 |
-
# 辦公場景
|
| 1078 |
-
elif scene_type and scene_type in ["office_workspace", "meeting_room", "conference_room"]:
|
| 1079 |
-
material_mapping = {
|
| 1080 |
-
"chair": "office",
|
| 1081 |
-
"dining table": "conference", # 在辦公環境中,餐桌通常是會議桌
|
| 1082 |
-
"laptop": "modern",
|
| 1083 |
-
"book": "reference"
|
| 1084 |
-
}
|
| 1085 |
-
return material_mapping.get(class_name)
|
| 1086 |
-
|
| 1087 |
-
# 客廳場景
|
| 1088 |
-
elif scene_type and scene_type in ["living_room"]:
|
| 1089 |
-
material_mapping = {
|
| 1090 |
-
"couch": "comfortable",
|
| 1091 |
-
"chair": "accent",
|
| 1092 |
-
"tv": "large",
|
| 1093 |
-
"vase": "decorative"
|
| 1094 |
-
}
|
| 1095 |
-
return material_mapping.get(class_name)
|
| 1096 |
-
|
| 1097 |
-
# 室外場景
|
| 1098 |
-
elif scene_type and scene_type in ["city_street", "park_area", "parking_lot"]:
|
| 1099 |
-
material_mapping = {
|
| 1100 |
-
"car": "parked",
|
| 1101 |
-
"person": "walking",
|
| 1102 |
-
"bicycle": "stationed"
|
| 1103 |
-
}
|
| 1104 |
-
return material_mapping.get(class_name)
|
| 1105 |
-
|
| 1106 |
-
# 如果沒有特定的場景映射,返回通用描述符
|
| 1107 |
-
generic_mapping = {
|
| 1108 |
-
"chair": "comfortable",
|
| 1109 |
-
"dining table": "sturdy",
|
| 1110 |
-
"car": "parked",
|
| 1111 |
-
"person": "present"
|
| 1112 |
-
}
|
| 1113 |
-
|
| 1114 |
-
return generic_mapping.get(class_name)
|
| 1115 |
-
|
| 1116 |
-
def _get_spatial_arrangement_descriptor(self, class_name: str, scene_type: Optional[str],
|
| 1117 |
-
detected_objects: Optional[List[Dict]],
|
| 1118 |
-
count: int) -> Optional[str]:
|
| 1119 |
-
"""
|
| 1120 |
-
分析物件的空間排列模式並生成相應描述
|
| 1121 |
-
|
| 1122 |
-
這個方法通過分析物件的位置分布來判斷排列模式,
|
| 1123 |
-
然後根據物件類型和場景生成適當的空間描述
|
| 1124 |
-
|
| 1125 |
-
Args:
|
| 1126 |
-
class_name: 物件類別名稱
|
| 1127 |
-
scene_type: 場景類型
|
| 1128 |
-
detected_objects: 該類型的所有檢測物件
|
| 1129 |
-
count: 物件數量
|
| 1130 |
-
|
| 1131 |
-
Returns:
|
| 1132 |
-
Optional[str]: 空間排列描述,如果無法分析則返回None
|
| 1133 |
-
"""
|
| 1134 |
-
if not detected_objects or len(detected_objects) < 2:
|
| 1135 |
-
return None
|
| 1136 |
-
|
| 1137 |
-
try:
|
| 1138 |
-
# 提取物件的標準化位置
|
| 1139 |
-
positions = []
|
| 1140 |
-
for obj in detected_objects:
|
| 1141 |
-
center = obj.get("normalized_center", [0.5, 0.5])
|
| 1142 |
-
if isinstance(center, (list, tuple)) and len(center) >= 2:
|
| 1143 |
-
positions.append(center)
|
| 1144 |
-
|
| 1145 |
-
if len(positions) < 2:
|
| 1146 |
-
return None
|
| 1147 |
-
|
| 1148 |
-
# 分析排列模式
|
| 1149 |
-
arrangement_pattern = self._analyze_arrangement_pattern(positions)
|
| 1150 |
-
|
| 1151 |
-
# 根據物件類型和場景生成描述
|
| 1152 |
-
return self._generate_arrangement_description(class_name, scene_type,
|
| 1153 |
-
arrangement_pattern, count)
|
| 1154 |
-
|
| 1155 |
-
except Exception as e:
|
| 1156 |
-
self.logger.warning(f"Error analyzing spatial arrangement: {str(e)}")
|
| 1157 |
-
return None
|
| 1158 |
-
|
| 1159 |
-
def _analyze_arrangement_pattern(self, positions: List[List[float]]) -> str:
|
| 1160 |
-
"""
|
| 1161 |
-
分析位置點的排列模式
|
| 1162 |
-
|
| 1163 |
-
這個方法使用簡單的幾何分析來判斷物件的排列類型,
|
| 1164 |
-
幫助我們理解物件在空間中的組織方式
|
| 1165 |
-
|
| 1166 |
-
Args:
|
| 1167 |
-
positions: 標準化的位置座標列表
|
| 1168 |
-
|
| 1169 |
-
Returns:
|
| 1170 |
-
str: 排列模式類型(linear, clustered, scattered, circular等)
|
| 1171 |
-
"""
|
| 1172 |
-
import numpy as np
|
| 1173 |
-
|
| 1174 |
-
if len(positions) < 2:
|
| 1175 |
-
return "single"
|
| 1176 |
-
|
| 1177 |
-
# 轉換為numpy陣列便於計算
|
| 1178 |
-
pos_array = np.array(positions)
|
| 1179 |
-
|
| 1180 |
-
# 計算位置的分布特徵
|
| 1181 |
-
x_coords = pos_array[:, 0]
|
| 1182 |
-
y_coords = pos_array[:, 1]
|
| 1183 |
-
|
| 1184 |
-
# 分析x和y方向的變異程度
|
| 1185 |
-
x_variance = np.var(x_coords)
|
| 1186 |
-
y_variance = np.var(y_coords)
|
| 1187 |
-
|
| 1188 |
-
# 計算物件間的平均距離
|
| 1189 |
-
distances = []
|
| 1190 |
-
for i in range(len(positions)):
|
| 1191 |
-
for j in range(i + 1, len(positions)):
|
| 1192 |
-
dist = np.sqrt((positions[i][0] - positions[j][0])**2 +
|
| 1193 |
-
(positions[i][1] - positions[j][1])**2)
|
| 1194 |
-
distances.append(dist)
|
| 1195 |
-
|
| 1196 |
-
avg_distance = np.mean(distances) if distances else 0
|
| 1197 |
-
distance_variance = np.var(distances) if distances else 0
|
| 1198 |
-
|
| 1199 |
-
# 判斷排列模式
|
| 1200 |
-
if len(positions) >= 4 and self._is_circular_pattern(positions):
|
| 1201 |
-
return "circular"
|
| 1202 |
-
elif x_variance < 0.05 or y_variance < 0.05: # 一個方向變異很小
|
| 1203 |
-
return "linear"
|
| 1204 |
-
elif avg_distance < 0.3 and distance_variance < 0.02: # 物件聚集且距離相近
|
| 1205 |
-
return "clustered"
|
| 1206 |
-
elif avg_distance > 0.6: # 物件分散
|
| 1207 |
-
return "scattered"
|
| 1208 |
-
elif distance_variance < 0.03: # 距離一致,可能是規則排列
|
| 1209 |
-
return "regular"
|
| 1210 |
-
else:
|
| 1211 |
-
return "distributed"
|
| 1212 |
-
|
| 1213 |
-
def _is_circular_pattern(self, positions: List[List[float]]) -> bool:
|
| 1214 |
-
"""
|
| 1215 |
-
檢查位置是否形成圓形或環形排列
|
| 1216 |
-
|
| 1217 |
-
Args:
|
| 1218 |
-
positions: 位置座標列表
|
| 1219 |
-
|
| 1220 |
-
Returns:
|
| 1221 |
-
bool: 是否為圓形排列
|
| 1222 |
-
"""
|
| 1223 |
-
import numpy as np
|
| 1224 |
-
|
| 1225 |
-
if len(positions) < 4:
|
| 1226 |
-
return False
|
| 1227 |
-
|
| 1228 |
-
try:
|
| 1229 |
-
pos_array = np.array(positions)
|
| 1230 |
-
|
| 1231 |
-
# 計算中心點
|
| 1232 |
-
center_x = np.mean(pos_array[:, 0])
|
| 1233 |
-
center_y = np.mean(pos_array[:, 1])
|
| 1234 |
-
|
| 1235 |
-
# 計算每個點到中心的距離
|
| 1236 |
-
distances_to_center = []
|
| 1237 |
-
for pos in positions:
|
| 1238 |
-
dist = np.sqrt((pos[0] - center_x)**2 + (pos[1] - center_y)**2)
|
| 1239 |
-
distances_to_center.append(dist)
|
| 1240 |
-
|
| 1241 |
-
# 如果所有距離都相近,可能是圓形排列
|
| 1242 |
-
distance_variance = np.var(distances_to_center)
|
| 1243 |
-
return distance_variance < 0.05 and np.mean(distances_to_center) > 0.2
|
| 1244 |
-
|
| 1245 |
-
except:
|
| 1246 |
-
return False
|
| 1247 |
-
|
| 1248 |
-
def _generate_arrangement_description(self, class_name: str, scene_type: Optional[str],
|
| 1249 |
-
arrangement_pattern: str, count: int) -> Optional[str]:
|
| 1250 |
-
"""
|
| 1251 |
-
根據物件類型、場景和排列模式生成空間描述
|
| 1252 |
-
|
| 1253 |
-
這個方法將抽象的排列模式轉換為自然語言描述,
|
| 1254 |
-
並根據具體的物件類型和場景語境進行定制
|
| 1255 |
-
|
| 1256 |
-
Args:
|
| 1257 |
-
class_name: 物件類別名稱
|
| 1258 |
-
scene_type: 場景類型
|
| 1259 |
-
arrangement_pattern: 排列模式
|
| 1260 |
-
count: 物件數量
|
| 1261 |
-
|
| 1262 |
-
Returns:
|
| 1263 |
-
Optional[str]: 生成的空間排列描述
|
| 1264 |
-
"""
|
| 1265 |
-
# 基於物件類型的描述模板
|
| 1266 |
-
arrangement_templates = {
|
| 1267 |
-
"chair": {
|
| 1268 |
-
"linear": "arranged in a row",
|
| 1269 |
-
"clustered": "grouped together for conversation",
|
| 1270 |
-
"circular": "arranged around the table",
|
| 1271 |
-
"scattered": "positioned throughout the space",
|
| 1272 |
-
"regular": "evenly spaced",
|
| 1273 |
-
"distributed": "thoughtfully positioned"
|
| 1274 |
-
},
|
| 1275 |
-
"dining table": {
|
| 1276 |
-
"linear": "aligned to create a unified dining space",
|
| 1277 |
-
"clustered": "grouped to form intimate dining areas",
|
| 1278 |
-
"scattered": "distributed to optimize space flow",
|
| 1279 |
-
"regular": "systematically positioned",
|
| 1280 |
-
"distributed": "strategically placed"
|
| 1281 |
-
},
|
| 1282 |
-
"car": {
|
| 1283 |
-
"linear": "parked in sequence",
|
| 1284 |
-
"clustered": "grouped in the parking area",
|
| 1285 |
-
"scattered": "distributed throughout the lot",
|
| 1286 |
-
"regular": "neatly parked",
|
| 1287 |
-
"distributed": "positioned across the area"
|
| 1288 |
-
},
|
| 1289 |
-
"person": {
|
| 1290 |
-
"linear": "moving in a line",
|
| 1291 |
-
"clustered": "gathered together",
|
| 1292 |
-
"circular": "forming a circle",
|
| 1293 |
-
"scattered": "spread across the area",
|
| 1294 |
-
"distributed": "positioned throughout the scene"
|
| 1295 |
-
}
|
| 1296 |
-
}
|
| 1297 |
-
|
| 1298 |
-
# 獲取對應的描述模板
|
| 1299 |
-
if class_name in arrangement_templates:
|
| 1300 |
-
template_dict = arrangement_templates[class_name]
|
| 1301 |
-
base_description = template_dict.get(arrangement_pattern, "positioned in the scene")
|
| 1302 |
-
else:
|
| 1303 |
-
# 通用的排列描述
|
| 1304 |
-
generic_templates = {
|
| 1305 |
-
"linear": "arranged in a line",
|
| 1306 |
-
"clustered": "grouped together",
|
| 1307 |
-
"circular": "arranged in a circular pattern",
|
| 1308 |
-
"scattered": "distributed across the space",
|
| 1309 |
-
"regular": "evenly positioned",
|
| 1310 |
-
"distributed": "thoughtfully placed"
|
| 1311 |
-
}
|
| 1312 |
-
base_description = generic_templates.get(arrangement_pattern, "positioned in the scene")
|
| 1313 |
-
|
| 1314 |
-
return base_description
|
| 1315 |
-
|
| 1316 |
-
def _get_single_object_feature(self, class_name: str, scene_type: Optional[str],
|
| 1317 |
-
detected_objects: Optional[List[Dict]]) -> Optional[str]:
|
| 1318 |
-
"""
|
| 1319 |
-
為單個物件生成特徵描述符
|
| 1320 |
-
|
| 1321 |
-
當只有一個物件時,我們可以提供更具體的位置或功能描述
|
| 1322 |
-
|
| 1323 |
-
Args:
|
| 1324 |
-
class_name: 物件類別名稱
|
| 1325 |
-
scene_type: 場景類型
|
| 1326 |
-
detected_objects: 檢測物件(單個)
|
| 1327 |
-
|
| 1328 |
-
Returns:
|
| 1329 |
-
Optional[str]: 特徵描述符
|
| 1330 |
-
"""
|
| 1331 |
-
if not detected_objects or len(detected_objects) != 1:
|
| 1332 |
-
return None
|
| 1333 |
-
|
| 1334 |
-
obj = detected_objects[0]
|
| 1335 |
-
region = obj.get("region", "").lower()
|
| 1336 |
-
|
| 1337 |
-
# 基於位置的描述
|
| 1338 |
-
if "center" in region:
|
| 1339 |
-
if class_name == "dining table":
|
| 1340 |
-
return "central"
|
| 1341 |
-
elif class_name == "chair":
|
| 1342 |
-
return "centrally placed"
|
| 1343 |
-
elif "corner" in region or "left" in region or "right" in region:
|
| 1344 |
-
return "positioned"
|
| 1345 |
-
|
| 1346 |
-
# 基於場景的功能描述
|
| 1347 |
-
if scene_type and scene_type in ["dining_area", "restaurant"]:
|
| 1348 |
-
if class_name == "chair":
|
| 1349 |
-
return "dining"
|
| 1350 |
-
elif class_name == "vase":
|
| 1351 |
-
return "decorative"
|
| 1352 |
-
|
| 1353 |
-
return None
|
| 1354 |
-
|
| 1355 |
-
def _get_plural_form(self, word: str) -> str:
|
| 1356 |
-
"""
|
| 1357 |
-
獲取詞彙的複數形式
|
| 1358 |
-
|
| 1359 |
-
Args:
|
| 1360 |
-
word: 單數詞彙
|
| 1361 |
-
|
| 1362 |
-
Returns:
|
| 1363 |
-
str: 複數形式
|
| 1364 |
-
"""
|
| 1365 |
-
try:
|
| 1366 |
-
# 特殊複數形式
|
| 1367 |
-
irregular_plurals = {
|
| 1368 |
-
'person': 'people',
|
| 1369 |
-
'child': 'children',
|
| 1370 |
-
'foot': 'feet',
|
| 1371 |
-
'tooth': 'teeth',
|
| 1372 |
-
'mouse': 'mice',
|
| 1373 |
-
'man': 'men',
|
| 1374 |
-
'woman': 'women'
|
| 1375 |
-
}
|
| 1376 |
-
|
| 1377 |
-
if word.lower() in irregular_plurals:
|
| 1378 |
-
return irregular_plurals[word.lower()]
|
| 1379 |
-
|
| 1380 |
-
# 規則複數形式
|
| 1381 |
-
if word.endswith(('s', 'sh', 'ch', 'x', 'z')):
|
| 1382 |
-
return word + 'es'
|
| 1383 |
-
elif word.endswith('y') and word[-2] not in 'aeiou':
|
| 1384 |
-
return word[:-1] + 'ies'
|
| 1385 |
-
elif word.endswith('f'):
|
| 1386 |
-
return word[:-1] + 'ves'
|
| 1387 |
-
elif word.endswith('fe'):
|
| 1388 |
-
return word[:-2] + 'ves'
|
| 1389 |
-
else:
|
| 1390 |
-
return word + 's'
|
| 1391 |
-
|
| 1392 |
-
except Exception as e:
|
| 1393 |
-
self.logger.warning(f"Error getting plural form for '{word}': {str(e)}")
|
| 1394 |
-
return word + 's'
|
| 1395 |
-
|
| 1396 |
-
def _normalize_object_class_name(self, class_name: str) -> str:
|
| 1397 |
-
"""
|
| 1398 |
-
標準化物件類別名稱,確保輸出自然語言格式
|
| 1399 |
-
|
| 1400 |
-
Args:
|
| 1401 |
-
class_name: 原始類別名稱
|
| 1402 |
-
|
| 1403 |
-
Returns:
|
| 1404 |
-
str: 標準化後的類別名稱
|
| 1405 |
-
"""
|
| 1406 |
-
try:
|
| 1407 |
-
if not class_name or not isinstance(class_name, str):
|
| 1408 |
-
return "object"
|
| 1409 |
-
|
| 1410 |
-
# 移除可能的技術性前綴或後綴
|
| 1411 |
-
import re
|
| 1412 |
-
normalized = re.sub(r'^(class_|id_|type_)', '', class_name.lower())
|
| 1413 |
-
normalized = re.sub(r'(_class|_id|_type)$', '', normalized)
|
| 1414 |
-
|
| 1415 |
-
# 將下劃線和連字符替換為空格
|
| 1416 |
-
normalized = normalized.replace('_', ' ').replace('-', ' ')
|
| 1417 |
-
|
| 1418 |
-
# 移除多餘空格
|
| 1419 |
-
normalized = ' '.join(normalized.split())
|
| 1420 |
-
|
| 1421 |
-
# 特殊類別名稱的標準化映射
|
| 1422 |
-
class_name_mapping = {
|
| 1423 |
-
'traffic light': 'traffic light',
|
| 1424 |
-
'stop sign': 'stop sign',
|
| 1425 |
-
'fire hydrant': 'fire hydrant',
|
| 1426 |
-
'dining table': 'dining table',
|
| 1427 |
-
'potted plant': 'potted plant',
|
| 1428 |
-
'tv monitor': 'television',
|
| 1429 |
-
'cell phone': 'mobile phone',
|
| 1430 |
-
'wine glass': 'wine glass',
|
| 1431 |
-
'hot dog': 'hot dog',
|
| 1432 |
-
'teddy bear': 'teddy bear',
|
| 1433 |
-
'hair drier': 'hair dryer',
|
| 1434 |
-
'toothbrush': 'toothbrush'
|
| 1435 |
-
}
|
| 1436 |
-
|
| 1437 |
-
return class_name_mapping.get(normalized, normalized)
|
| 1438 |
-
|
| 1439 |
-
except Exception as e:
|
| 1440 |
-
self.logger.warning(f"Error normalizing class name '{class_name}': {str(e)}")
|
| 1441 |
-
return class_name if isinstance(class_name, str) else "object"
|
| 1442 |
-
|
| 1443 |
def generate_basic_details(self, scene_type: str, detected_objects: List[Dict]) -> str:
|
| 1444 |
"""
|
| 1445 |
當模板不可用時生成基本詳細��息
|
|
@@ -1588,7 +467,7 @@ class ObjectDescriptionGenerator:
|
|
| 1588 |
furniture_names = []
|
| 1589 |
for obj in furniture_objects[:3]:
|
| 1590 |
raw_name = obj.get("class_name", "furniture")
|
| 1591 |
-
normalized_name = self.
|
| 1592 |
furniture_names.append(normalized_name)
|
| 1593 |
|
| 1594 |
unique_names = list(set(furniture_names))
|
|
@@ -1786,7 +665,6 @@ class ObjectDescriptionGenerator:
|
|
| 1786 |
return "functional area"
|
| 1787 |
|
| 1788 |
# 移除數字後綴(如 crossing_zone_1 -> crossing_zone)
|
| 1789 |
-
import re
|
| 1790 |
base_name = re.sub(r'_\d+$', '', zone_name)
|
| 1791 |
|
| 1792 |
# 將下劃線替換為空格
|
|
@@ -1851,9 +729,16 @@ class ObjectDescriptionGenerator:
|
|
| 1851 |
old_value = getattr(self, key)
|
| 1852 |
setattr(self, key, value)
|
| 1853 |
self.logger.info(f"Updated {key}: {old_value} -> {value}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1854 |
else:
|
| 1855 |
self.logger.warning(f"Unknown configuration parameter: {key}")
|
| 1856 |
|
| 1857 |
except Exception as e:
|
| 1858 |
self.logger.error(f"Error updating configuration: {str(e)}")
|
| 1859 |
-
raise ObjectDescriptionError(f"Failed to update configuration: {str(e)}") from e
|
|
|
|
| 4 |
from typing import Dict, List, Tuple, Optional, Any
|
| 5 |
import numpy as np
|
| 6 |
|
| 7 |
+
from prominence_calculator import ProminenceCalculator
|
| 8 |
+
from spatial_location_handler import SpatialLocationHandler
|
| 9 |
+
from text_optimizer import TextOptimizer
|
| 10 |
+
from object_group_processor import ObjectGroupProcessor
|
| 11 |
+
|
| 12 |
class ObjectDescriptionError(Exception):
|
| 13 |
"""物件描述生成過程中的自定義異常"""
|
| 14 |
pass
|
|
|
|
| 17 |
class ObjectDescriptionGenerator:
|
| 18 |
"""
|
| 19 |
物件描述生成器 - 負責將檢測到的物件轉換為自然語言描述
|
| 20 |
+
匯總於EnhancedSceneDescriber
|
| 21 |
|
| 22 |
該類別處理物件相關的所有描述生成邏輯,包括重要物件的辨識、
|
| 23 |
空間位置描述、物件列表格式化以及描述文本的優化。
|
| 24 |
+
|
| 25 |
+
作為 Facade 模式的實現,協調四個專門的子組件來完成複雜的描述生成任務。
|
| 26 |
"""
|
| 27 |
|
| 28 |
def __init__(self,
|
|
|
|
| 39 |
max_categories_to_return: 返回的物件類別最大數量
|
| 40 |
max_total_objects: 返回的物件總數上限
|
| 41 |
confidence_threshold_for_description: 用於描述的置信度閾值
|
| 42 |
+
region_analyzer: 可選的RegionAnalyzer實例
|
| 43 |
"""
|
| 44 |
self.logger = logging.getLogger(self.__class__.__name__)
|
| 45 |
|
|
|
|
| 49 |
self.confidence_threshold_for_description = confidence_threshold_for_description
|
| 50 |
self.region_analyzer = region_analyzer
|
| 51 |
|
| 52 |
+
# 初始化子組件
|
| 53 |
+
self.prominence_calculator = ProminenceCalculator(
|
| 54 |
+
min_prominence_score=self.min_prominence_score
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
self.spatial_handler = SpatialLocationHandler(
|
| 58 |
+
region_analyzer=self.region_analyzer
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
self.text_optimizer = TextOptimizer()
|
| 62 |
+
|
| 63 |
+
self.object_group_processor = ObjectGroupProcessor(
|
| 64 |
+
confidence_threshold_for_description=self.confidence_threshold_for_description,
|
| 65 |
+
spatial_handler=self.spatial_handler,
|
| 66 |
+
text_optimizer=self.text_optimizer
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
self.logger.info("ObjectDescriptionGenerator initialized with prominence_score=%.2f, "
|
| 70 |
"max_categories=%d, max_objects=%d, confidence_threshold=%.2f",
|
| 71 |
min_prominence_score, max_categories_to_return,
|
|
|
|
| 85 |
Returns:
|
| 86 |
List[Dict]: 按重要性排序的物件列表
|
| 87 |
"""
|
| 88 |
+
return self.prominence_calculator.filter_prominent_objects(
|
| 89 |
+
detected_objects=detected_objects,
|
| 90 |
+
min_prominence_score=min_prominence_score,
|
| 91 |
+
max_categories_to_return=max_categories_to_return
|
| 92 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
def set_region_analyzer(self, region_analyzer: Any) -> None:
|
| 95 |
"""
|
|
|
|
| 100 |
"""
|
| 101 |
try:
|
| 102 |
self.region_analyzer = region_analyzer
|
| 103 |
+
self.spatial_handler.set_region_analyzer(region_analyzer)
|
| 104 |
self.logger.info("RegionAnalyzer instance set for ObjectDescriptionGenerator")
|
| 105 |
except Exception as e:
|
| 106 |
self.logger.warning(f"Error setting RegionAnalyzer: {str(e)}")
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
def format_object_list_for_description(self,
|
| 109 |
objects: List[Dict],
|
| 110 |
use_indefinite_article_for_one: bool = False,
|
|
|
|
| 122 |
Returns:
|
| 123 |
str: 格式化的物件描述字符串
|
| 124 |
"""
|
| 125 |
+
return self.text_optimizer.format_object_list_for_description(
|
| 126 |
+
objects=objects,
|
| 127 |
+
use_indefinite_article_for_one=use_indefinite_article_for_one,
|
| 128 |
+
count_threshold_for_generalization=count_threshold_for_generalization,
|
| 129 |
+
max_types_to_list=max_types_to_list
|
| 130 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
def get_spatial_description(self, obj: Dict, image_width: Optional[int] = None,
|
| 133 |
image_height: Optional[int] = None,
|
|
|
|
| 144 |
Returns:
|
| 145 |
str: 空間描述字符串,空值region時返回空字串
|
| 146 |
"""
|
| 147 |
+
return self.spatial_handler.generate_spatial_description(
|
| 148 |
+
obj=obj,
|
| 149 |
+
image_width=image_width,
|
| 150 |
+
image_height=image_height,
|
| 151 |
+
region_analyzer=region_analyzer
|
| 152 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
def optimize_object_description(self, description: str) -> str:
|
| 155 |
"""
|
| 156 |
+
優化物件描述文本,消除多餘重複並改善表達流暢度
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
Args:
|
| 159 |
description: 原始的場景描述文本,可能包含重複或冗餘的表達
|
|
|
|
| 161 |
Returns:
|
| 162 |
str: 經過優化清理的描述文本,如果處理失敗則返回原始文本
|
| 163 |
"""
|
| 164 |
+
return self.text_optimizer.optimize_object_description(description)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
def generate_dynamic_everyday_description(self,
|
| 167 |
detected_objects: List[Dict],
|
|
|
|
| 189 |
try:
|
| 190 |
description_segments = []
|
| 191 |
image_width, image_height = image_dimensions if image_dimensions else (None, None)
|
| 192 |
+
scene_type = places365_info.get("scene", "") if places365_info else ""
|
| 193 |
|
| 194 |
self.logger.debug(f"Generating dynamic description for {len(detected_objects)} objects, "
|
| 195 |
f"viewpoint: {viewpoint}, lighting: {lighting_info is not None}")
|
|
|
|
| 229 |
else:
|
| 230 |
description_segments.append("Within this setting, no specific objects were clearly identified.")
|
| 231 |
else:
|
|
|
|
|
|
|
| 232 |
# 使用置信度過濾
|
| 233 |
confident_objects = [obj for obj in detected_objects
|
| 234 |
if obj.get("confidence", 0) >= self.confidence_threshold_for_description]
|
|
|
|
| 244 |
else:
|
| 245 |
description_segments.append(no_confident_obj_msg.lower().capitalize())
|
| 246 |
else:
|
| 247 |
+
# 使用 ObjectGroupProcessor 處理物件分組和排序
|
| 248 |
+
objects_by_class = self.object_group_processor.group_objects_by_class(
|
| 249 |
+
confident_objects, object_statistics
|
| 250 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
|
| 252 |
if not objects_by_class:
|
| 253 |
description_segments.append("No common objects were confidently identified for detailed description.")
|
| 254 |
else:
|
| 255 |
+
# 移除重複物件
|
| 256 |
+
deduplicated_objects_by_class = self.object_group_processor.remove_duplicate_objects(
|
| 257 |
+
objects_by_class
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
# 排序物件組
|
| 261 |
+
sorted_object_groups = self.object_group_processor.sort_object_groups(
|
| 262 |
+
deduplicated_objects_by_class
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
# 生成物件描述子句
|
| 266 |
+
object_clauses = self.object_group_processor.generate_object_clauses(
|
| 267 |
+
sorted_object_groups, object_statistics, scene_type,
|
| 268 |
+
image_width, image_height, self.region_analyzer
|
| 269 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
|
| 271 |
if object_clauses:
|
| 272 |
if not description_segments:
|
|
|
|
| 304 |
raw_description += "."
|
| 305 |
|
| 306 |
# 移除重複性和不適當的描述詞彙
|
| 307 |
+
raw_description = self.text_optimizer.remove_repetitive_descriptors(raw_description)
|
| 308 |
|
| 309 |
if not raw_description or len(raw_description.strip()) < 20:
|
| 310 |
if 'confident_objects' in locals() and confident_objects:
|
|
|
|
| 319 |
self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
|
| 320 |
raise ObjectDescriptionError(error_msg) from e
|
| 321 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
def generate_basic_details(self, scene_type: str, detected_objects: List[Dict]) -> str:
|
| 323 |
"""
|
| 324 |
當模板不可用時生成基本詳細��息
|
|
|
|
| 467 |
furniture_names = []
|
| 468 |
for obj in furniture_objects[:3]:
|
| 469 |
raw_name = obj.get("class_name", "furniture")
|
| 470 |
+
normalized_name = self.text_optimizer.normalize_object_class_name(raw_name)
|
| 471 |
furniture_names.append(normalized_name)
|
| 472 |
|
| 473 |
unique_names = list(set(furniture_names))
|
|
|
|
| 665 |
return "functional area"
|
| 666 |
|
| 667 |
# 移除數字後綴(如 crossing_zone_1 -> crossing_zone)
|
|
|
|
| 668 |
base_name = re.sub(r'_\d+$', '', zone_name)
|
| 669 |
|
| 670 |
# 將下劃線替換為空格
|
|
|
|
| 729 |
old_value = getattr(self, key)
|
| 730 |
setattr(self, key, value)
|
| 731 |
self.logger.info(f"Updated {key}: {old_value} -> {value}")
|
| 732 |
+
|
| 733 |
+
# 同步更新子組件的配置
|
| 734 |
+
if key == "min_prominence_score" and hasattr(self, 'prominence_calculator'):
|
| 735 |
+
self.prominence_calculator.min_prominence_score = value
|
| 736 |
+
elif key == "confidence_threshold_for_description" and hasattr(self, 'object_group_processor'):
|
| 737 |
+
self.object_group_processor.confidence_threshold_for_description = value
|
| 738 |
+
|
| 739 |
else:
|
| 740 |
self.logger.warning(f"Unknown configuration parameter: {key}")
|
| 741 |
|
| 742 |
except Exception as e:
|
| 743 |
self.logger.error(f"Error updating configuration: {str(e)}")
|
| 744 |
+
raise ObjectDescriptionError(f"Failed to update configuration: {str(e)}") from e
|
object_group_processor.py
ADDED
|
@@ -0,0 +1,397 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from typing import Dict, List, Tuple, Optional, Any
|
| 3 |
+
|
| 4 |
+
class ObjectGroupProcessor:
|
| 5 |
+
"""
|
| 6 |
+
物件組處理器 - 專門處理物件分組、排序和子句生成的邏輯
|
| 7 |
+
負責物件按類別分組、重複物件檢測移除、物件組優先級排序以及描述子句的生成
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
def __init__(self, confidence_threshold_for_description: float = 0.25,
|
| 11 |
+
spatial_handler: Optional[Any] = None,
|
| 12 |
+
text_optimizer: Optional[Any] = None):
|
| 13 |
+
"""
|
| 14 |
+
初始化物件組處理器
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
confidence_threshold_for_description: 用於描述的置信度閾值
|
| 18 |
+
spatial_handler: 空間位置處理器實例
|
| 19 |
+
text_optimizer: 文本優化器實例
|
| 20 |
+
"""
|
| 21 |
+
self.logger = logging.getLogger(self.__class__.__name__)
|
| 22 |
+
self.confidence_threshold_for_description = confidence_threshold_for_description
|
| 23 |
+
self.spatial_handler = spatial_handler
|
| 24 |
+
self.text_optimizer = text_optimizer
|
| 25 |
+
|
| 26 |
+
def group_objects_by_class(self, confident_objects: List[Dict],
|
| 27 |
+
object_statistics: Optional[Dict]) -> Dict[str, List[Dict]]:
|
| 28 |
+
"""
|
| 29 |
+
按類別分組物件
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
confident_objects: 置信度過濾後的物件
|
| 33 |
+
object_statistics: 物件統計信息
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
Dict[str, List[Dict]]: 按類別分組的物件
|
| 37 |
+
"""
|
| 38 |
+
objects_by_class = {}
|
| 39 |
+
|
| 40 |
+
if object_statistics:
|
| 41 |
+
# 使用預計算的統計信息,採用動態的信心度
|
| 42 |
+
for class_name, stats in object_statistics.items():
|
| 43 |
+
count = stats.get("count", 0)
|
| 44 |
+
avg_confidence = stats.get("avg_confidence", 0)
|
| 45 |
+
|
| 46 |
+
# 動態調整置信度閾值
|
| 47 |
+
dynamic_threshold = self.confidence_threshold_for_description
|
| 48 |
+
if class_name in ["potted plant", "vase", "clock", "book"]:
|
| 49 |
+
dynamic_threshold = max(0.15, self.confidence_threshold_for_description * 0.6)
|
| 50 |
+
elif count >= 3:
|
| 51 |
+
dynamic_threshold = max(0.2, self.confidence_threshold_for_description * 0.8)
|
| 52 |
+
|
| 53 |
+
if count > 0 and avg_confidence >= dynamic_threshold:
|
| 54 |
+
matching_objects = [obj for obj in confident_objects if obj.get("class_name") == class_name]
|
| 55 |
+
if not matching_objects:
|
| 56 |
+
matching_objects = [obj for obj in confident_objects
|
| 57 |
+
if obj.get("class_name") == class_name and obj.get("confidence", 0) >= dynamic_threshold]
|
| 58 |
+
|
| 59 |
+
if matching_objects:
|
| 60 |
+
actual_count = min(stats["count"], len(matching_objects))
|
| 61 |
+
objects_by_class[class_name] = matching_objects[:actual_count]
|
| 62 |
+
|
| 63 |
+
# Debug logging for specific classes
|
| 64 |
+
if class_name in ["car", "traffic light", "person", "handbag"]:
|
| 65 |
+
print(f"DEBUG: Before spatial deduplication:")
|
| 66 |
+
print(f"DEBUG: {class_name}: {len(objects_by_class[class_name])} objects before dedup")
|
| 67 |
+
else:
|
| 68 |
+
# 備用邏輯,同樣使用動態閾值
|
| 69 |
+
for obj in confident_objects:
|
| 70 |
+
name = obj.get("class_name", "unknown object")
|
| 71 |
+
if name == "unknown object" or not name:
|
| 72 |
+
continue
|
| 73 |
+
if name not in objects_by_class:
|
| 74 |
+
objects_by_class[name] = []
|
| 75 |
+
objects_by_class[name].append(obj)
|
| 76 |
+
|
| 77 |
+
return objects_by_class
|
| 78 |
+
|
| 79 |
+
def remove_duplicate_objects(self, objects_by_class: Dict[str, List[Dict]]) -> Dict[str, List[Dict]]:
|
| 80 |
+
"""
|
| 81 |
+
移除重複物件
|
| 82 |
+
|
| 83 |
+
Args:
|
| 84 |
+
objects_by_class: 按類別分組的物件
|
| 85 |
+
|
| 86 |
+
Returns:
|
| 87 |
+
Dict[str, List[Dict]]: 去重後的物件
|
| 88 |
+
"""
|
| 89 |
+
deduplicated_objects_by_class = {}
|
| 90 |
+
processed_positions = []
|
| 91 |
+
|
| 92 |
+
for class_name, group_of_objects in objects_by_class.items():
|
| 93 |
+
unique_objects = []
|
| 94 |
+
|
| 95 |
+
for obj in group_of_objects:
|
| 96 |
+
obj_position = obj.get("normalized_center", [0.5, 0.5])
|
| 97 |
+
is_duplicate = False
|
| 98 |
+
|
| 99 |
+
for processed_pos in processed_positions:
|
| 100 |
+
position_distance = abs(obj_position[0] - processed_pos[0]) + abs(obj_position[1] - processed_pos[1])
|
| 101 |
+
if position_distance < 0.15:
|
| 102 |
+
is_duplicate = True
|
| 103 |
+
break
|
| 104 |
+
|
| 105 |
+
if not is_duplicate:
|
| 106 |
+
unique_objects.append(obj)
|
| 107 |
+
processed_positions.append(obj_position)
|
| 108 |
+
|
| 109 |
+
if unique_objects:
|
| 110 |
+
deduplicated_objects_by_class[class_name] = unique_objects
|
| 111 |
+
|
| 112 |
+
# Debug logging after deduplication
|
| 113 |
+
for class_name in ["car", "traffic light", "person", "handbag"]:
|
| 114 |
+
if class_name in deduplicated_objects_by_class:
|
| 115 |
+
print(f"DEBUG: After spatial deduplication:")
|
| 116 |
+
print(f"DEBUG: {class_name}: {len(deduplicated_objects_by_class[class_name])} objects after dedup")
|
| 117 |
+
|
| 118 |
+
return deduplicated_objects_by_class
|
| 119 |
+
|
| 120 |
+
def sort_object_groups(self, objects_by_class: Dict[str, List[Dict]]) -> List[Tuple[str, List[Dict]]]:
|
| 121 |
+
"""
|
| 122 |
+
排序物件組
|
| 123 |
+
|
| 124 |
+
Args:
|
| 125 |
+
objects_by_class: 按類別分組的物件
|
| 126 |
+
|
| 127 |
+
Returns:
|
| 128 |
+
List[Tuple[str, List[Dict]]]: 排序後的物件組
|
| 129 |
+
"""
|
| 130 |
+
def sort_key_object_groups(item_tuple: Tuple[str, List[Dict]]):
|
| 131 |
+
class_name_key, obj_group_list = item_tuple
|
| 132 |
+
priority = 3
|
| 133 |
+
count = len(obj_group_list)
|
| 134 |
+
|
| 135 |
+
# 確保類別名稱已標準化
|
| 136 |
+
normalized_class_name = self._normalize_object_class_name(class_name_key)
|
| 137 |
+
|
| 138 |
+
# 動態優先級
|
| 139 |
+
if normalized_class_name == "person":
|
| 140 |
+
priority = 0
|
| 141 |
+
elif normalized_class_name in ["dining table", "chair", "sofa", "bed"]:
|
| 142 |
+
priority = 1
|
| 143 |
+
elif normalized_class_name in ["car", "bus", "truck", "traffic light"]:
|
| 144 |
+
priority = 2
|
| 145 |
+
elif count >= 3:
|
| 146 |
+
priority = max(1, priority - 1)
|
| 147 |
+
elif normalized_class_name in ["potted plant", "vase", "clock", "book"] and count >= 2:
|
| 148 |
+
priority = 2
|
| 149 |
+
|
| 150 |
+
avg_area = sum(o.get("normalized_area", 0.0) for o in obj_group_list) / len(obj_group_list) if obj_group_list else 0
|
| 151 |
+
quantity_bonus = min(count / 5.0, 1.0)
|
| 152 |
+
|
| 153 |
+
return (priority, -len(obj_group_list), -avg_area, -quantity_bonus)
|
| 154 |
+
|
| 155 |
+
return sorted(objects_by_class.items(), key=sort_key_object_groups)
|
| 156 |
+
|
| 157 |
+
def generate_object_clauses(self, sorted_object_groups: List[Tuple[str, List[Dict]]],
|
| 158 |
+
object_statistics: Optional[Dict],
|
| 159 |
+
scene_type: str,
|
| 160 |
+
image_width: Optional[int],
|
| 161 |
+
image_height: Optional[int],
|
| 162 |
+
region_analyzer: Optional[Any] = None) -> List[str]:
|
| 163 |
+
"""
|
| 164 |
+
生成物件描述子句
|
| 165 |
+
|
| 166 |
+
Args:
|
| 167 |
+
sorted_object_groups: 排序後的物件組
|
| 168 |
+
object_statistics: 物件統計信息
|
| 169 |
+
scene_type: 場景類型
|
| 170 |
+
image_width: 圖像寬度
|
| 171 |
+
image_height: 圖像高度
|
| 172 |
+
region_analyzer: 區域分析器實例
|
| 173 |
+
|
| 174 |
+
Returns:
|
| 175 |
+
List[str]: 物件描述子句列表
|
| 176 |
+
"""
|
| 177 |
+
object_clauses = []
|
| 178 |
+
|
| 179 |
+
for class_name, group_of_objects in sorted_object_groups:
|
| 180 |
+
count = len(group_of_objects)
|
| 181 |
+
|
| 182 |
+
# Debug logging for final count
|
| 183 |
+
if class_name in ["car", "traffic light", "person", "handbag"]:
|
| 184 |
+
print(f"DEBUG: Final count for {class_name}: {count}")
|
| 185 |
+
|
| 186 |
+
if count == 0:
|
| 187 |
+
continue
|
| 188 |
+
|
| 189 |
+
# 標準化class name
|
| 190 |
+
normalized_class_name = self._normalize_object_class_name(class_name)
|
| 191 |
+
|
| 192 |
+
# 使用統計信息確保準確的數量描述
|
| 193 |
+
if object_statistics and class_name in object_statistics:
|
| 194 |
+
actual_count = object_statistics[class_name]["count"]
|
| 195 |
+
formatted_name_with_exact_count = self._format_object_count_description(
|
| 196 |
+
normalized_class_name,
|
| 197 |
+
actual_count,
|
| 198 |
+
scene_type=scene_type
|
| 199 |
+
)
|
| 200 |
+
else:
|
| 201 |
+
formatted_name_with_exact_count = self._format_object_count_description(
|
| 202 |
+
normalized_class_name,
|
| 203 |
+
count,
|
| 204 |
+
scene_type=scene_type
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
if formatted_name_with_exact_count == "no specific objects clearly identified" or not formatted_name_with_exact_count:
|
| 208 |
+
continue
|
| 209 |
+
|
| 210 |
+
# 確定群組的集體位置
|
| 211 |
+
location_description_suffix = self._generate_location_description(
|
| 212 |
+
group_of_objects, count, image_width, image_height, region_analyzer
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
+
# 首字母大寫
|
| 216 |
+
formatted_name_capitalized = formatted_name_with_exact_count[0].upper() + formatted_name_with_exact_count[1:]
|
| 217 |
+
object_clauses.append(f"{formatted_name_capitalized} {location_description_suffix}")
|
| 218 |
+
|
| 219 |
+
return object_clauses
|
| 220 |
+
|
| 221 |
+
def format_object_clauses(self, object_clauses: List[str]) -> str:
|
| 222 |
+
"""
|
| 223 |
+
格式化物件描述子句
|
| 224 |
+
|
| 225 |
+
Args:
|
| 226 |
+
object_clauses: 物件描述子句列表
|
| 227 |
+
|
| 228 |
+
Returns:
|
| 229 |
+
str: 格式化後的描述
|
| 230 |
+
"""
|
| 231 |
+
if not object_clauses:
|
| 232 |
+
return "No common objects were confidently identified for detailed description."
|
| 233 |
+
|
| 234 |
+
# 處理第一個子句
|
| 235 |
+
first_clause = object_clauses.pop(0)
|
| 236 |
+
result = first_clause + "."
|
| 237 |
+
|
| 238 |
+
# 處理剩餘子句
|
| 239 |
+
if object_clauses:
|
| 240 |
+
result += " The scene features:"
|
| 241 |
+
joined_object_clauses = ". ".join(object_clauses)
|
| 242 |
+
if joined_object_clauses and not joined_object_clauses.endswith("."):
|
| 243 |
+
joined_object_clauses += "."
|
| 244 |
+
result += " " + joined_object_clauses
|
| 245 |
+
|
| 246 |
+
return result
|
| 247 |
+
|
| 248 |
+
def _generate_location_description(self, group_of_objects: List[Dict], count: int,
|
| 249 |
+
image_width: Optional[int], image_height: Optional[int],
|
| 250 |
+
region_analyzer: Optional[Any] = None) -> str:
|
| 251 |
+
"""
|
| 252 |
+
生成位置描述
|
| 253 |
+
|
| 254 |
+
Args:
|
| 255 |
+
group_of_objects: 物件組
|
| 256 |
+
count: 物件數量
|
| 257 |
+
image_width: 圖像寬度
|
| 258 |
+
image_height: 圖像高度
|
| 259 |
+
region_analyzer: 區域分析器實例
|
| 260 |
+
|
| 261 |
+
Returns:
|
| 262 |
+
str: 位置描述
|
| 263 |
+
"""
|
| 264 |
+
if count == 1:
|
| 265 |
+
if self.spatial_handler:
|
| 266 |
+
spatial_desc = self.spatial_handler.generate_spatial_description(
|
| 267 |
+
group_of_objects[0], image_width, image_height, region_analyzer
|
| 268 |
+
)
|
| 269 |
+
else:
|
| 270 |
+
spatial_desc = self._get_spatial_description_phrase(group_of_objects[0].get("region", ""))
|
| 271 |
+
|
| 272 |
+
if spatial_desc:
|
| 273 |
+
return f"is {spatial_desc}"
|
| 274 |
+
else:
|
| 275 |
+
distinct_regions = sorted(list(set(obj.get("region", "") for obj in group_of_objects if obj.get("region"))))
|
| 276 |
+
valid_regions = [r for r in distinct_regions if r and r != "unknown" and r.strip()]
|
| 277 |
+
if not valid_regions:
|
| 278 |
+
return "is positioned in the scene"
|
| 279 |
+
elif len(valid_regions) == 1:
|
| 280 |
+
spatial_desc = self._get_spatial_description_phrase(valid_regions[0])
|
| 281 |
+
return f"is primarily {spatial_desc}" if spatial_desc else "is positioned in the scene"
|
| 282 |
+
elif len(valid_regions) == 2:
|
| 283 |
+
clean_region1 = valid_regions[0].replace('_', ' ')
|
| 284 |
+
clean_region2 = valid_regions[1].replace('_', ' ')
|
| 285 |
+
return f"is mainly across the {clean_region1} and {clean_region2} areas"
|
| 286 |
+
else:
|
| 287 |
+
return "is distributed in various parts of the scene"
|
| 288 |
+
else:
|
| 289 |
+
distinct_regions = sorted(list(set(obj.get("region", "") for obj in group_of_objects if obj.get("region"))))
|
| 290 |
+
valid_regions = [r for r in distinct_regions if r and r != "unknown" and r.strip()]
|
| 291 |
+
if not valid_regions:
|
| 292 |
+
return "are visible in the scene"
|
| 293 |
+
elif len(valid_regions) == 1:
|
| 294 |
+
clean_region = valid_regions[0].replace('_', ' ')
|
| 295 |
+
return f"are primarily in the {clean_region} area"
|
| 296 |
+
elif len(valid_regions) == 2:
|
| 297 |
+
clean_region1 = valid_regions[0].replace('_', ' ')
|
| 298 |
+
clean_region2 = valid_regions[1].replace('_', ' ')
|
| 299 |
+
return f"are mainly across the {clean_region1} and {clean_region2} areas"
|
| 300 |
+
else:
|
| 301 |
+
return "are distributed in various parts of the scene"
|
| 302 |
+
|
| 303 |
+
def _get_spatial_description_phrase(self, region: str) -> str:
|
| 304 |
+
"""
|
| 305 |
+
獲取空間描述短語的備用方法
|
| 306 |
+
|
| 307 |
+
Args:
|
| 308 |
+
region: 區域字符串
|
| 309 |
+
|
| 310 |
+
Returns:
|
| 311 |
+
str: 空間描述短語
|
| 312 |
+
"""
|
| 313 |
+
if not region or region == "unknown":
|
| 314 |
+
return ""
|
| 315 |
+
|
| 316 |
+
clean_region = region.replace('_', ' ').strip().lower()
|
| 317 |
+
|
| 318 |
+
region_map = {
|
| 319 |
+
"top left": "in the upper left area",
|
| 320 |
+
"top center": "in the upper area",
|
| 321 |
+
"top right": "in the upper right area",
|
| 322 |
+
"middle left": "on the left side",
|
| 323 |
+
"middle center": "in the center",
|
| 324 |
+
"center": "in the center",
|
| 325 |
+
"middle right": "on the right side",
|
| 326 |
+
"bottom left": "in the lower left area",
|
| 327 |
+
"bottom center": "in the lower area",
|
| 328 |
+
"bottom right": "in the lower right area"
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
return region_map.get(clean_region, "")
|
| 332 |
+
|
| 333 |
+
def _normalize_object_class_name(self, class_name: str) -> str:
|
| 334 |
+
"""
|
| 335 |
+
標準化物件類別名稱
|
| 336 |
+
|
| 337 |
+
Args:
|
| 338 |
+
class_name: 原始類別名稱
|
| 339 |
+
|
| 340 |
+
Returns:
|
| 341 |
+
str: 標準化後的類別名稱
|
| 342 |
+
"""
|
| 343 |
+
if self.text_optimizer:
|
| 344 |
+
return self.text_optimizer.normalize_object_class_name(class_name)
|
| 345 |
+
else:
|
| 346 |
+
# 備用標準化邏輯
|
| 347 |
+
if not class_name or not isinstance(class_name, str):
|
| 348 |
+
return "object"
|
| 349 |
+
|
| 350 |
+
# 簡單的標準化處理
|
| 351 |
+
normalized = class_name.replace('_', ' ').strip().lower()
|
| 352 |
+
return normalized
|
| 353 |
+
|
| 354 |
+
def _format_object_count_description(self, class_name: str, count: int,
|
| 355 |
+
scene_type: Optional[str] = None,
|
| 356 |
+
detected_objects: Optional[List[Dict]] = None,
|
| 357 |
+
avg_confidence: float = 0.0) -> str:
|
| 358 |
+
"""
|
| 359 |
+
格式化物件數量描述
|
| 360 |
+
|
| 361 |
+
Args:
|
| 362 |
+
class_name: 標準化後的類別名稱
|
| 363 |
+
count: 物件數量
|
| 364 |
+
scene_type: 場景類型
|
| 365 |
+
detected_objects: 該類型的所有檢測物件
|
| 366 |
+
avg_confidence: 平均檢測置信度
|
| 367 |
+
|
| 368 |
+
Returns:
|
| 369 |
+
str: 完整的格式化數量描述
|
| 370 |
+
"""
|
| 371 |
+
if self.text_optimizer:
|
| 372 |
+
return self.text_optimizer.format_object_count_description(
|
| 373 |
+
class_name, count, scene_type, detected_objects, avg_confidence
|
| 374 |
+
)
|
| 375 |
+
else:
|
| 376 |
+
# 備用格式化邏輯
|
| 377 |
+
if count <= 0:
|
| 378 |
+
return ""
|
| 379 |
+
elif count == 1:
|
| 380 |
+
article = "an" if class_name[0].lower() in 'aeiou' else "a"
|
| 381 |
+
return f"{article} {class_name}"
|
| 382 |
+
else:
|
| 383 |
+
# 簡單的複數處理
|
| 384 |
+
plural_form = class_name + "s" if not class_name.endswith("s") else class_name
|
| 385 |
+
|
| 386 |
+
number_words = {
|
| 387 |
+
2: "two", 3: "three", 4: "four", 5: "five", 6: "six",
|
| 388 |
+
7: "seven", 8: "eight", 9: "nine", 10: "ten",
|
| 389 |
+
11: "eleven", 12: "twelve"
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
if count in number_words:
|
| 393 |
+
return f"{number_words[count]} {plural_form}"
|
| 394 |
+
elif count <= 20:
|
| 395 |
+
return f"several {plural_form}"
|
| 396 |
+
else:
|
| 397 |
+
return f"numerous {plural_form}"
|
pattern_analyzer.py
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import logging
|
| 3 |
+
import traceback
|
| 4 |
+
import numpy as np
|
| 5 |
+
from typing import Dict, List, Any, Optional
|
| 6 |
+
|
| 7 |
+
logger = logging.getLogger(__name__)
|
| 8 |
+
|
| 9 |
+
class PatternAnalyzer:
|
| 10 |
+
"""
|
| 11 |
+
負責各種模式分析,包含交通流動、行人穿越、車輛分佈等的辨識
|
| 12 |
+
專門處理動態區域和移動相關的區域分析
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
def __init__(self):
|
| 16 |
+
"""初始化模式分析器"""
|
| 17 |
+
try:
|
| 18 |
+
logger.info("PatternAnalyzer initialized successfully")
|
| 19 |
+
except Exception as e:
|
| 20 |
+
logger.error(f"Failed to initialize PatternAnalyzer: {str(e)}")
|
| 21 |
+
logger.error(traceback.format_exc())
|
| 22 |
+
raise
|
| 23 |
+
|
| 24 |
+
def analyze_crossing_patterns(self, pedestrians: List[Dict], traffic_lights: List[Dict]) -> Dict:
|
| 25 |
+
"""
|
| 26 |
+
Analyze pedestrian crossing patterns to identify crossing zones.
|
| 27 |
+
若同一 region 中同時有行人與紅綠燈,則將兩者都放入該區域的 objects。
|
| 28 |
+
|
| 29 |
+
Args:
|
| 30 |
+
pedestrians: 行人物件列表(每個 obj 應包含 'class_id', 'region', 'confidence' 等)
|
| 31 |
+
traffic_lights: 紅綠燈物件列表(每個 obj 應包含 'class_id', 'region', 'confidence' 等)
|
| 32 |
+
|
| 33 |
+
Returns:
|
| 34 |
+
crossing_zones: 字典,key 為 zone 名稱,value 包含 'region', 'objects', 'description'
|
| 35 |
+
"""
|
| 36 |
+
try:
|
| 37 |
+
crossing_zones = {}
|
| 38 |
+
|
| 39 |
+
# 如果沒有任何行人,就不辨識任何 crossing zone
|
| 40 |
+
if not pedestrians:
|
| 41 |
+
return crossing_zones
|
| 42 |
+
|
| 43 |
+
# (1) 按照 region 分組行人
|
| 44 |
+
pedestrian_regions = {}
|
| 45 |
+
for p in pedestrians:
|
| 46 |
+
region = p["region"]
|
| 47 |
+
pedestrian_regions.setdefault(region, []).append(p)
|
| 48 |
+
|
| 49 |
+
# (2) 針對每個 region,看是否同時有紅綠燈
|
| 50 |
+
# 建立一個對照表 mapping: region -> { "pedestrians": [...], "traffic_lights": [...] }
|
| 51 |
+
combined_regions = {}
|
| 52 |
+
for region, peds in pedestrian_regions.items():
|
| 53 |
+
# 取得該 region 下所有紅綠燈
|
| 54 |
+
tls_in_region = [t for t in traffic_lights if t["region"] == region]
|
| 55 |
+
combined_regions[region] = {
|
| 56 |
+
"pedestrians": peds,
|
| 57 |
+
"traffic_lights": tls_in_region
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
# (3) 按照行人數量排序,找出前兩個需要建立 crossing zone 的 region
|
| 61 |
+
sorted_regions = sorted(
|
| 62 |
+
combined_regions.items(),
|
| 63 |
+
key=lambda x: len(x[1]["pedestrians"]),
|
| 64 |
+
reverse=True
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
# (4) 將前兩個 region 建立 Crossing Zone,objects 同時包含行人與紅綠燈
|
| 68 |
+
for idx, (region, group) in enumerate(sorted_regions[:2]):
|
| 69 |
+
peds = group["pedestrians"]
|
| 70 |
+
tls = group["traffic_lights"]
|
| 71 |
+
has_nearby_signals = len(tls) > 0
|
| 72 |
+
|
| 73 |
+
# 生成 zone_name(基於 region 方向 + idx 決定主/次 crossing)
|
| 74 |
+
direction = self._get_directional_description_local(region)
|
| 75 |
+
if direction and direction != "central":
|
| 76 |
+
zone_name = f"{direction} crossing area"
|
| 77 |
+
else:
|
| 78 |
+
zone_name = "main crossing area" if idx == 0 else "secondary crossing area"
|
| 79 |
+
|
| 80 |
+
# 組合 description
|
| 81 |
+
description = f"Pedestrian crossing area with {len(peds)} "
|
| 82 |
+
description += "person" if len(peds) == 1 else "people"
|
| 83 |
+
if direction:
|
| 84 |
+
description += f" in {direction} direction"
|
| 85 |
+
if has_nearby_signals:
|
| 86 |
+
description += " near traffic signals"
|
| 87 |
+
|
| 88 |
+
# 將行人 + 同區紅綠燈一併放入 objects
|
| 89 |
+
obj_list = ["pedestrian"] * len(peds)
|
| 90 |
+
if has_nearby_signals:
|
| 91 |
+
obj_list += ["traffic light"] * len(tls)
|
| 92 |
+
|
| 93 |
+
crossing_zones[zone_name] = {
|
| 94 |
+
"region": region,
|
| 95 |
+
"objects": obj_list,
|
| 96 |
+
"description": description
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
return crossing_zones
|
| 100 |
+
|
| 101 |
+
except Exception as e:
|
| 102 |
+
logger.error(f"Error in analyze_crossing_patterns: {str(e)}")
|
| 103 |
+
logger.error(traceback.format_exc())
|
| 104 |
+
return {}
|
| 105 |
+
|
| 106 |
+
def analyze_traffic_zones(self, vehicles: List[Dict]) -> Dict:
|
| 107 |
+
"""
|
| 108 |
+
分析車輛分布以識別具有方向感知的交通區域
|
| 109 |
+
|
| 110 |
+
Args:
|
| 111 |
+
vehicles: 車輛物件列表
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
識別出的交通區域字典
|
| 115 |
+
"""
|
| 116 |
+
try:
|
| 117 |
+
traffic_zones = {}
|
| 118 |
+
|
| 119 |
+
if not vehicles:
|
| 120 |
+
return traffic_zones
|
| 121 |
+
|
| 122 |
+
# 按區域分組車輛
|
| 123 |
+
vehicle_regions = {}
|
| 124 |
+
for v in vehicles:
|
| 125 |
+
region = v["region"]
|
| 126 |
+
if region not in vehicle_regions:
|
| 127 |
+
vehicle_regions[region] = []
|
| 128 |
+
vehicle_regions[region].append(v)
|
| 129 |
+
|
| 130 |
+
# 為有車輛的區域創建交通區域
|
| 131 |
+
main_traffic_region = max(vehicle_regions.items(), key=lambda x: len(x[1]), default=(None, []))
|
| 132 |
+
|
| 133 |
+
if main_traffic_region[0] is not None:
|
| 134 |
+
region = main_traffic_region[0]
|
| 135 |
+
vehicles_in_region = main_traffic_region[1]
|
| 136 |
+
|
| 137 |
+
# 獲取車輛類型列表用於描述
|
| 138 |
+
vehicle_types = [v["class_name"] for v in vehicles_in_region]
|
| 139 |
+
unique_types = list(set(vehicle_types))
|
| 140 |
+
|
| 141 |
+
# 獲取方向描述
|
| 142 |
+
direction = self._get_directional_description_local(region)
|
| 143 |
+
|
| 144 |
+
# 創建描述性區域
|
| 145 |
+
traffic_zones["vehicle_zone"] = {
|
| 146 |
+
"region": region,
|
| 147 |
+
"objects": vehicle_types,
|
| 148 |
+
"description": f"Vehicle traffic area with {', '.join(unique_types[:3])}" +
|
| 149 |
+
(f" in {direction} area" if direction else "")
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
# 如果車輛分布在多個區域,創建次要區域
|
| 153 |
+
if len(vehicle_regions) > 1:
|
| 154 |
+
# 獲取第二大車輛聚集區域
|
| 155 |
+
sorted_regions = sorted(vehicle_regions.items(), key=lambda x: len(x[1]), reverse=True)
|
| 156 |
+
if len(sorted_regions) > 1:
|
| 157 |
+
second_region, second_vehicles = sorted_regions[1]
|
| 158 |
+
direction = self._get_directional_description_local(second_region)
|
| 159 |
+
vehicle_types = [v["class_name"] for v in second_vehicles]
|
| 160 |
+
unique_types = list(set(vehicle_types))
|
| 161 |
+
|
| 162 |
+
traffic_zones["secondary_vehicle_zone"] = {
|
| 163 |
+
"region": second_region,
|
| 164 |
+
"objects": vehicle_types,
|
| 165 |
+
"description": f"Secondary traffic area with {', '.join(unique_types[:2])}" +
|
| 166 |
+
(f" in {direction} direction" if direction else "")
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
return traffic_zones
|
| 170 |
+
|
| 171 |
+
except Exception as e:
|
| 172 |
+
logger.error(f"Error analyzing traffic zones: {str(e)}")
|
| 173 |
+
logger.error(traceback.format_exc())
|
| 174 |
+
return {}
|
| 175 |
+
|
| 176 |
+
def analyze_aerial_traffic_patterns(self, vehicle_objs: List[Dict]) -> Dict:
|
| 177 |
+
"""
|
| 178 |
+
分析空中視角的車輛交通模式
|
| 179 |
+
|
| 180 |
+
Args:
|
| 181 |
+
vehicle_objs: 車輛物件列表
|
| 182 |
+
|
| 183 |
+
Returns:
|
| 184 |
+
交通模式區域字典
|
| 185 |
+
"""
|
| 186 |
+
try:
|
| 187 |
+
zones = {}
|
| 188 |
+
|
| 189 |
+
if not vehicle_objs:
|
| 190 |
+
return zones
|
| 191 |
+
|
| 192 |
+
# 將位置轉換為數組進行模式分析
|
| 193 |
+
positions = np.array([obj["normalized_center"] for obj in vehicle_objs])
|
| 194 |
+
|
| 195 |
+
if len(positions) >= 2:
|
| 196 |
+
# 計算分布指標
|
| 197 |
+
x_coords = positions[:, 0]
|
| 198 |
+
y_coords = positions[:, 1]
|
| 199 |
+
|
| 200 |
+
x_mean = np.mean(x_coords)
|
| 201 |
+
y_mean = np.mean(y_coords)
|
| 202 |
+
x_std = np.std(x_coords)
|
| 203 |
+
y_std = np.std(y_coords)
|
| 204 |
+
|
| 205 |
+
# 判斷車輛是否組織成車道
|
| 206 |
+
if x_std < y_std * 0.5:
|
| 207 |
+
# 車輛垂直對齊 - 代表南北交通
|
| 208 |
+
zones["vertical_traffic_flow"] = {
|
| 209 |
+
"region": "central_vertical",
|
| 210 |
+
"objects": [obj["class_name"] for obj in vehicle_objs[:5]],
|
| 211 |
+
"description": "North-south traffic flow visible from aerial view"
|
| 212 |
+
}
|
| 213 |
+
elif y_std < x_std * 0.5:
|
| 214 |
+
# 車輛水平對齊 - 代表東西交通
|
| 215 |
+
zones["horizontal_traffic_flow"] = {
|
| 216 |
+
"region": "central_horizontal",
|
| 217 |
+
"objects": [obj["class_name"] for obj in vehicle_objs[:5]],
|
| 218 |
+
"description": "East-west traffic flow visible from aerial view"
|
| 219 |
+
}
|
| 220 |
+
else:
|
| 221 |
+
# 車輛多方向 - 代表十字路口
|
| 222 |
+
zones["intersection_traffic"] = {
|
| 223 |
+
"region": "central",
|
| 224 |
+
"objects": [obj["class_name"] for obj in vehicle_objs[:5]],
|
| 225 |
+
"description": "Multi-directional traffic at intersection visible from aerial view"
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
return zones
|
| 229 |
+
|
| 230 |
+
except Exception as e:
|
| 231 |
+
logger.error(f"Error analyzing aerial traffic patterns: {str(e)}")
|
| 232 |
+
logger.error(traceback.format_exc())
|
| 233 |
+
return {}
|
| 234 |
+
|
| 235 |
+
def identify_park_recreational_zones(self, detected_objects: List[Dict]) -> Dict:
|
| 236 |
+
"""
|
| 237 |
+
識別公園的休閒活動區域
|
| 238 |
+
|
| 239 |
+
Args:
|
| 240 |
+
detected_objects: 檢測到的物件列表
|
| 241 |
+
|
| 242 |
+
Returns:
|
| 243 |
+
休閒區域字典
|
| 244 |
+
"""
|
| 245 |
+
try:
|
| 246 |
+
zones = {}
|
| 247 |
+
|
| 248 |
+
# 尋找休閒物件(運動球、風箏等)
|
| 249 |
+
rec_items = []
|
| 250 |
+
rec_regions = {}
|
| 251 |
+
|
| 252 |
+
for obj in detected_objects:
|
| 253 |
+
if obj["class_id"] in [32, 33, 34, 35, 38]: # sports ball, kite, baseball bat, glove, tennis racket
|
| 254 |
+
region = obj["region"]
|
| 255 |
+
if region not in rec_regions:
|
| 256 |
+
rec_regions[region] = []
|
| 257 |
+
rec_regions[region].append(obj)
|
| 258 |
+
rec_items.append(obj["class_name"])
|
| 259 |
+
|
| 260 |
+
if rec_items:
|
| 261 |
+
main_rec_region = max(rec_regions.items(),
|
| 262 |
+
key=lambda x: len(x[1]),
|
| 263 |
+
default=(None, []))
|
| 264 |
+
|
| 265 |
+
if main_rec_region[0] is not None:
|
| 266 |
+
zones["recreational_zone"] = {
|
| 267 |
+
"region": main_rec_region[0],
|
| 268 |
+
"objects": list(set(rec_items)),
|
| 269 |
+
"description": f"Recreational area with {', '.join(list(set(rec_items)))}"
|
| 270 |
+
}
|
| 271 |
+
|
| 272 |
+
return zones
|
| 273 |
+
|
| 274 |
+
except Exception as e:
|
| 275 |
+
logger.error(f"Error identifying park recreational zones: {str(e)}")
|
| 276 |
+
logger.error(traceback.format_exc())
|
| 277 |
+
return {}
|
| 278 |
+
|
| 279 |
+
def identify_parking_zones(self, detected_objects: List[Dict]) -> Dict:
|
| 280 |
+
"""
|
| 281 |
+
停車場的停車區域
|
| 282 |
+
|
| 283 |
+
Args:
|
| 284 |
+
detected_objects: 檢測到的物件列表
|
| 285 |
+
|
| 286 |
+
Returns:
|
| 287 |
+
停車區域字典
|
| 288 |
+
"""
|
| 289 |
+
try:
|
| 290 |
+
zones = {}
|
| 291 |
+
|
| 292 |
+
# 尋找停放的汽車
|
| 293 |
+
car_objs = [obj for obj in detected_objects if obj["class_id"] == 2] # cars
|
| 294 |
+
|
| 295 |
+
if len(car_objs) >= 3:
|
| 296 |
+
# 檢查汽車是否按模式排列
|
| 297 |
+
car_positions = [obj["normalized_center"] for obj in car_objs]
|
| 298 |
+
|
| 299 |
+
# 通過分析垂直位置檢查行模式
|
| 300 |
+
y_coords = [pos[1] for pos in car_positions]
|
| 301 |
+
y_clusters = {}
|
| 302 |
+
|
| 303 |
+
# 按相似y坐標分組汽車
|
| 304 |
+
for i, y in enumerate(y_coords):
|
| 305 |
+
assigned = False
|
| 306 |
+
for cluster_y in y_clusters.keys():
|
| 307 |
+
if abs(y - cluster_y) < 0.1: # 圖像高度的10%內
|
| 308 |
+
y_clusters[cluster_y].append(i)
|
| 309 |
+
assigned = True
|
| 310 |
+
break
|
| 311 |
+
|
| 312 |
+
if not assigned:
|
| 313 |
+
y_clusters[y] = [i]
|
| 314 |
+
|
| 315 |
+
# 如果有行模式
|
| 316 |
+
if max(len(indices) for indices in y_clusters.values()) >= 2:
|
| 317 |
+
zones["parking_row"] = {
|
| 318 |
+
"region": "central",
|
| 319 |
+
"objects": ["car"] * len(car_objs),
|
| 320 |
+
"description": f"Organized parking area with vehicles arranged in rows"
|
| 321 |
+
}
|
| 322 |
+
else:
|
| 323 |
+
zones["parking_area"] = {
|
| 324 |
+
"region": "wide",
|
| 325 |
+
"objects": ["car"] * len(car_objs),
|
| 326 |
+
"description": f"Parking area with {len(car_objs)} vehicles"
|
| 327 |
+
}
|
| 328 |
+
|
| 329 |
+
return zones
|
| 330 |
+
|
| 331 |
+
except Exception as e:
|
| 332 |
+
logger.error(f"Error identifying parking zones: {str(e)}")
|
| 333 |
+
logger.error(traceback.format_exc())
|
| 334 |
+
return {}
|
| 335 |
+
|
| 336 |
+
def _get_directional_description_local(self, region: str) -> str:
|
| 337 |
+
"""
|
| 338 |
+
本地方向描述方法
|
| 339 |
+
將區域名稱轉換為方位描述(東西南北)
|
| 340 |
+
|
| 341 |
+
Args:
|
| 342 |
+
region: 區域名稱
|
| 343 |
+
|
| 344 |
+
Returns:
|
| 345 |
+
方位描述字串
|
| 346 |
+
"""
|
| 347 |
+
try:
|
| 348 |
+
region_lower = region.lower()
|
| 349 |
+
|
| 350 |
+
if "top" in region_lower and "left" in region_lower:
|
| 351 |
+
return "northwest"
|
| 352 |
+
elif "top" in region_lower and "right" in region_lower:
|
| 353 |
+
return "northeast"
|
| 354 |
+
elif "bottom" in region_lower and "left" in region_lower:
|
| 355 |
+
return "southwest"
|
| 356 |
+
elif "bottom" in region_lower and "right" in region_lower:
|
| 357 |
+
return "southeast"
|
| 358 |
+
elif "top" in region_lower:
|
| 359 |
+
return "north"
|
| 360 |
+
elif "bottom" in region_lower:
|
| 361 |
+
return "south"
|
| 362 |
+
elif "left" in region_lower:
|
| 363 |
+
return "west"
|
| 364 |
+
elif "right" in region_lower:
|
| 365 |
+
return "east"
|
| 366 |
+
else:
|
| 367 |
+
return "central"
|
| 368 |
+
|
| 369 |
+
except Exception as e:
|
| 370 |
+
logger.error(f"Error getting directional description for region '{region}': {str(e)}")
|
| 371 |
+
return "central"
|
prominence_calculator.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import numpy as np
|
| 3 |
+
from typing import Dict, List, Optional, Any
|
| 4 |
+
|
| 5 |
+
class ProminenceCalculator:
|
| 6 |
+
"""
|
| 7 |
+
重要性計算器 - 專門處理物件重要性評估和篩選邏輯
|
| 8 |
+
負責計算物件的重要性分數、類別重要性係數以及重要物件的篩選
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
def __init__(self, min_prominence_score: float = 0.1):
|
| 12 |
+
"""
|
| 13 |
+
初始化重要性計算器
|
| 14 |
+
|
| 15 |
+
Args:
|
| 16 |
+
min_prominence_score: 物件顯著性的最低分數閾值
|
| 17 |
+
"""
|
| 18 |
+
self.logger = logging.getLogger(self.__class__.__name__)
|
| 19 |
+
self.min_prominence_score = min_prominence_score
|
| 20 |
+
|
| 21 |
+
def calculate_prominence_score(self, obj: Dict) -> float:
|
| 22 |
+
"""
|
| 23 |
+
計算物件的重要性評分
|
| 24 |
+
基本上權重設定為信心度 > 尺寸 > 空間 > 類別重要性
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
obj: 物件字典,包含檢測信息
|
| 28 |
+
|
| 29 |
+
Returns:
|
| 30 |
+
float: 重要性評分 (0.0-1.0)
|
| 31 |
+
"""
|
| 32 |
+
try:
|
| 33 |
+
# 基礎置信度評分 (權重: 40%)
|
| 34 |
+
confidence = obj.get("confidence", 0.5)
|
| 35 |
+
confidence_score = confidence * 0.4
|
| 36 |
+
|
| 37 |
+
# 大小評分 (權重: 30%)
|
| 38 |
+
normalized_area = obj.get("normalized_area", 0.1)
|
| 39 |
+
# 使用對數縮放避免過大物件主導評分
|
| 40 |
+
size_score = min(np.log(normalized_area * 10 + 1) / np.log(11), 1.0) * 0.3
|
| 41 |
+
|
| 42 |
+
# 位置評分 (權重: 20%)
|
| 43 |
+
# 中心區域的物件通常更重要
|
| 44 |
+
center_x, center_y = obj.get("normalized_center", [0.5, 0.5])
|
| 45 |
+
distance_from_center = np.sqrt((center_x - 0.5)**2 + (center_y - 0.5)**2)
|
| 46 |
+
position_score = (1 - min(distance_from_center * 2, 1.0)) * 0.2
|
| 47 |
+
|
| 48 |
+
# 類別重要性評分 (權重: 10%)
|
| 49 |
+
class_importance = self.get_class_importance(obj.get("class_name", "unknown"))
|
| 50 |
+
class_score = class_importance * 0.1
|
| 51 |
+
|
| 52 |
+
total_score = confidence_score + size_score + position_score + class_score
|
| 53 |
+
|
| 54 |
+
# 確保評分在有效範圍內
|
| 55 |
+
return max(0.0, min(1.0, total_score))
|
| 56 |
+
|
| 57 |
+
except Exception as e:
|
| 58 |
+
self.logger.warning(f"Error calculating prominence score for object: {str(e)}")
|
| 59 |
+
return 0.5 # 返回中等評分作為備用
|
| 60 |
+
|
| 61 |
+
def get_class_importance(self, class_name: str) -> float:
|
| 62 |
+
"""
|
| 63 |
+
根據物件類別返回重要性係數
|
| 64 |
+
|
| 65 |
+
Args:
|
| 66 |
+
class_name: 物件類別名稱
|
| 67 |
+
|
| 68 |
+
Returns:
|
| 69 |
+
float: 類別重要性係數 (0.0-1.0)
|
| 70 |
+
"""
|
| 71 |
+
# 高重要性物件(人、車輛、建築)
|
| 72 |
+
high_importance = ["person", "car", "truck", "bus", "motorcycle", "bicycle", "building"]
|
| 73 |
+
|
| 74 |
+
# 中等重要性物件(家具、電器)
|
| 75 |
+
medium_importance = ["chair", "couch", "tv", "laptop", "refrigerator", "dining table", "bed"]
|
| 76 |
+
|
| 77 |
+
# 低重要性物件(小物品、配件)
|
| 78 |
+
low_importance = ["handbag", "backpack", "umbrella", "cell phone", "remote", "mouse"]
|
| 79 |
+
|
| 80 |
+
class_name_lower = class_name.lower()
|
| 81 |
+
|
| 82 |
+
if any(item in class_name_lower for item in high_importance):
|
| 83 |
+
return 1.0
|
| 84 |
+
elif any(item in class_name_lower for item in medium_importance):
|
| 85 |
+
return 0.7
|
| 86 |
+
elif any(item in class_name_lower for item in low_importance):
|
| 87 |
+
return 0.4
|
| 88 |
+
else:
|
| 89 |
+
return 0.6 # 預設中等重要性
|
| 90 |
+
|
| 91 |
+
def filter_prominent_objects(self, detected_objects: List[Dict],
|
| 92 |
+
min_prominence_score: float = 0.5,
|
| 93 |
+
max_categories_to_return: Optional[int] = None) -> List[Dict]:
|
| 94 |
+
"""
|
| 95 |
+
獲取最重要的物件,基於置信度、大小和位置計算重要性評分
|
| 96 |
+
|
| 97 |
+
Args:
|
| 98 |
+
detected_objects: 檢測到的物件列表
|
| 99 |
+
min_prominence_score: 最小重要性分數閾值,範圍 0.0-1.0
|
| 100 |
+
max_categories_to_return: 可選的最大返回類別數量限制
|
| 101 |
+
|
| 102 |
+
Returns:
|
| 103 |
+
List[Dict]: 按重要性排序的物件列表
|
| 104 |
+
"""
|
| 105 |
+
try:
|
| 106 |
+
if not detected_objects:
|
| 107 |
+
return []
|
| 108 |
+
|
| 109 |
+
prominent_objects = []
|
| 110 |
+
|
| 111 |
+
for obj in detected_objects:
|
| 112 |
+
# 計算重要性評分
|
| 113 |
+
prominence_score = self.calculate_prominence_score(obj)
|
| 114 |
+
|
| 115 |
+
# 只保留超過閾值的物件
|
| 116 |
+
if prominence_score >= min_prominence_score:
|
| 117 |
+
obj_copy = obj.copy()
|
| 118 |
+
obj_copy['prominence_score'] = prominence_score
|
| 119 |
+
prominent_objects.append(obj_copy)
|
| 120 |
+
|
| 121 |
+
# 按重要性評分排序(從高到低)
|
| 122 |
+
prominent_objects.sort(key=lambda x: x.get('prominence_score', 0), reverse=True)
|
| 123 |
+
|
| 124 |
+
# 如果指定了最大類別數量限制,進行過濾
|
| 125 |
+
if max_categories_to_return is not None and max_categories_to_return > 0:
|
| 126 |
+
categories_seen = set()
|
| 127 |
+
filtered_objects = []
|
| 128 |
+
|
| 129 |
+
for obj in prominent_objects:
|
| 130 |
+
class_name = obj.get("class_name", "unknown")
|
| 131 |
+
|
| 132 |
+
# 如果是新類別且未達到限制
|
| 133 |
+
if class_name not in categories_seen:
|
| 134 |
+
if len(categories_seen) < max_categories_to_return:
|
| 135 |
+
categories_seen.add(class_name)
|
| 136 |
+
filtered_objects.append(obj)
|
| 137 |
+
else:
|
| 138 |
+
# 已見過的類別,直接添加
|
| 139 |
+
filtered_objects.append(obj)
|
| 140 |
+
|
| 141 |
+
return filtered_objects
|
| 142 |
+
|
| 143 |
+
return prominent_objects
|
| 144 |
+
|
| 145 |
+
except Exception as e:
|
| 146 |
+
self.logger.error(f"Error calculating prominent objects: {str(e)}")
|
| 147 |
+
return []
|
scene_zone_identifier.py
CHANGED
|
@@ -3,6 +3,9 @@ import logging
|
|
| 3 |
import traceback
|
| 4 |
import numpy as np
|
| 5 |
from typing import Dict, List, Any, Optional
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
logger = logging.getLogger(__name__)
|
| 8 |
|
|
@@ -10,11 +13,17 @@ class SceneZoneIdentifier:
|
|
| 10 |
"""
|
| 11 |
負責不同場景類型的區域識別邏輯
|
| 12 |
專注於根據場景類型執行相應的功能區域識別策略
|
|
|
|
| 13 |
"""
|
| 14 |
|
| 15 |
def __init__(self):
|
| 16 |
"""初始化場景區域辨識器"""
|
| 17 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
logger.info("SceneZoneIdentifier initialized successfully")
|
| 19 |
|
| 20 |
except Exception as e:
|
|
@@ -39,18 +48,18 @@ class SceneZoneIdentifier:
|
|
| 39 |
zones = {}
|
| 40 |
|
| 41 |
# 主要功能區域(基於物件關聯性而非場景類型)
|
| 42 |
-
primary_zone = self.
|
| 43 |
if primary_zone:
|
| 44 |
# 基於區域內容生成描述性鍵名
|
| 45 |
-
descriptive_key = self.
|
| 46 |
zones[descriptive_key] = primary_zone
|
| 47 |
|
| 48 |
# 只有明確證據且物件數量足夠時創建次要功能區域
|
| 49 |
if len(zones) >= 1 and len(detected_objects) >= 6:
|
| 50 |
-
secondary_zone = self.
|
| 51 |
if secondary_zone:
|
| 52 |
# 基於區域內容生成描述性鍵名
|
| 53 |
-
descriptive_key = self.
|
| 54 |
zones[descriptive_key] = secondary_zone
|
| 55 |
|
| 56 |
logger.info(f"Identified {len(zones)} indoor zones for scene type '{scene_type}'")
|
|
@@ -61,92 +70,9 @@ class SceneZoneIdentifier:
|
|
| 61 |
logger.error(traceback.format_exc())
|
| 62 |
return {}
|
| 63 |
|
| 64 |
-
def _generate_descriptive_zone_key_from_data(self, zone_data: Dict, priority_level: str) -> str:
|
| 65 |
-
"""
|
| 66 |
-
基於區域數據生成描述性鍵名
|
| 67 |
-
|
| 68 |
-
Args:
|
| 69 |
-
zone_data: 區域數據字典
|
| 70 |
-
priority_level: 優先級別(primary/secondary)
|
| 71 |
-
|
| 72 |
-
Returns:
|
| 73 |
-
str: 描述性區域鍵名
|
| 74 |
-
"""
|
| 75 |
-
try:
|
| 76 |
-
objects = zone_data.get("objects", [])
|
| 77 |
-
region = zone_data.get("region", "")
|
| 78 |
-
description = zone_data.get("description", "")
|
| 79 |
-
|
| 80 |
-
# 基於物件內容確定功能類型
|
| 81 |
-
if any("dining" in obj.lower() or "table" in obj.lower() for obj in objects):
|
| 82 |
-
base_name = "dining area"
|
| 83 |
-
elif any("chair" in obj.lower() or "sofa" in obj.lower() for obj in objects):
|
| 84 |
-
base_name = "seating area"
|
| 85 |
-
elif any("bed" in obj.lower() for obj in objects):
|
| 86 |
-
base_name = "sleeping area"
|
| 87 |
-
elif any("laptop" in obj.lower() or "keyboard" in obj.lower() for obj in objects):
|
| 88 |
-
base_name = "workspace area"
|
| 89 |
-
elif any("plant" in obj.lower() or "vase" in obj.lower() for obj in objects):
|
| 90 |
-
base_name = "decorative area"
|
| 91 |
-
elif any("refrigerator" in obj.lower() or "microwave" in obj.lower() for obj in objects):
|
| 92 |
-
base_name = "kitchen area"
|
| 93 |
-
else:
|
| 94 |
-
# 基於描述內容推斷
|
| 95 |
-
if "dining" in description.lower():
|
| 96 |
-
base_name = "dining area"
|
| 97 |
-
elif "seating" in description.lower() or "relaxation" in description.lower():
|
| 98 |
-
base_name = "seating area"
|
| 99 |
-
elif "work" in description.lower():
|
| 100 |
-
base_name = "workspace area"
|
| 101 |
-
elif "decorative" in description.lower():
|
| 102 |
-
base_name = "decorative area"
|
| 103 |
-
else:
|
| 104 |
-
base_name = "functional area"
|
| 105 |
-
|
| 106 |
-
# 為次要區域添加位置標識以區分
|
| 107 |
-
if priority_level == "secondary" and region:
|
| 108 |
-
spatial_context = self._get_spatial_context_description(region)
|
| 109 |
-
if spatial_context:
|
| 110 |
-
return f"{spatial_context} {base_name}"
|
| 111 |
-
|
| 112 |
-
return base_name
|
| 113 |
-
|
| 114 |
-
except Exception as e:
|
| 115 |
-
logger.warning(f"Error generating descriptive zone key: {str(e)}")
|
| 116 |
-
return "activity area"
|
| 117 |
-
|
| 118 |
-
def _get_spatial_context_description(self, region: str) -> str:
|
| 119 |
-
"""
|
| 120 |
-
獲取空間上下文描述
|
| 121 |
-
|
| 122 |
-
Args:
|
| 123 |
-
region: 區域位置標識
|
| 124 |
-
|
| 125 |
-
Returns:
|
| 126 |
-
str: 空間上下文描述
|
| 127 |
-
"""
|
| 128 |
-
try:
|
| 129 |
-
spatial_mapping = {
|
| 130 |
-
"top_left": "upper left",
|
| 131 |
-
"top_center": "upper",
|
| 132 |
-
"top_right": "upper right",
|
| 133 |
-
"middle_left": "left side",
|
| 134 |
-
"middle_center": "central",
|
| 135 |
-
"middle_right": "right side",
|
| 136 |
-
"bottom_left": "lower left",
|
| 137 |
-
"bottom_center": "lower",
|
| 138 |
-
"bottom_right": "lower right"
|
| 139 |
-
}
|
| 140 |
-
|
| 141 |
-
return spatial_mapping.get(region, "")
|
| 142 |
-
|
| 143 |
-
except Exception as e:
|
| 144 |
-
logger.warning(f"Error getting spatial context for region '{region}': {str(e)}")
|
| 145 |
-
return ""
|
| 146 |
-
|
| 147 |
def identify_outdoor_general_zones(self, category_regions: Dict, detected_objects: List[Dict], scene_type: str) -> Dict:
|
| 148 |
"""
|
| 149 |
-
|
| 150 |
|
| 151 |
Args:
|
| 152 |
category_regions: 按類別和區域分組的物件字典
|
|
@@ -215,11 +141,11 @@ class SceneZoneIdentifier:
|
|
| 215 |
|
| 216 |
# 針對公園區域的特殊處理
|
| 217 |
if scene_type == "park_area":
|
| 218 |
-
zones.update(self.
|
| 219 |
|
| 220 |
# 針對停車場的特殊處理
|
| 221 |
if scene_type == "parking_lot":
|
| 222 |
-
zones.update(self.
|
| 223 |
|
| 224 |
logger.info(f"Identified {len(zones)} outdoor zones for scene type '{scene_type}'")
|
| 225 |
return zones
|
|
@@ -232,7 +158,7 @@ class SceneZoneIdentifier:
|
|
| 232 |
def identify_intersection_zones(self, category_regions: Dict, detected_objects: List[Dict], viewpoint: str) -> Dict:
|
| 233 |
"""
|
| 234 |
辨識城市十字路口的功能區域,無論是否有行人,只要偵測到紅綠燈就一定顯示 Traffic Control Area;
|
| 235 |
-
|
| 236 |
|
| 237 |
Args:
|
| 238 |
category_regions: 按類別和 region 分組的物件字典
|
|
@@ -251,7 +177,7 @@ class SceneZoneIdentifier:
|
|
| 251 |
traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]
|
| 252 |
|
| 253 |
# 2. Step A: 無條件建立 Traffic Control Area
|
| 254 |
-
# 把每個 region 下的紅綠燈都先分群,生成對應 zone
|
| 255 |
signal_regions_all = {}
|
| 256 |
for t in traffic_light_objs:
|
| 257 |
region = t["region"]
|
|
@@ -285,8 +211,8 @@ class SceneZoneIdentifier:
|
|
| 285 |
|
| 286 |
# 3. Step B: 如果有行人,就建立 Crossing Zone,並移除已被打包的紅綠燈
|
| 287 |
if pedestrian_objs:
|
| 288 |
-
# 先呼叫
|
| 289 |
-
crossing_zones = self.
|
| 290 |
|
| 291 |
# 把 Crossing Zone 加到最終 zones,並同時記錄已使用掉的紅綠燈數量
|
| 292 |
for zone_key, zone_info in crossing_zones.items():
|
|
@@ -323,8 +249,8 @@ class SceneZoneIdentifier:
|
|
| 323 |
|
| 324 |
# 5. Step D: 分析車輛交通區域(Vehicle Zones)
|
| 325 |
if vehicle_objs:
|
| 326 |
-
traffic_zones = self.
|
| 327 |
-
#
|
| 328 |
for zone_key, zone_info in traffic_zones.items():
|
| 329 |
if zone_key in zones:
|
| 330 |
suffix = 1
|
|
@@ -396,15 +322,15 @@ class SceneZoneIdentifier:
|
|
| 396 |
# 識別車輛模式進行交通分析
|
| 397 |
vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [1, 2, 3, 5, 6, 7]]
|
| 398 |
if vehicle_objs:
|
| 399 |
-
zones.update(self.
|
| 400 |
|
| 401 |
# 針對十字路口特定空中視角的處理
|
| 402 |
if "intersection" in scene_type:
|
| 403 |
-
zones.update(self.
|
| 404 |
|
| 405 |
# 針對廣場空中視角的處理
|
| 406 |
if "plaza" in scene_type:
|
| 407 |
-
zones.update(self.
|
| 408 |
|
| 409 |
logger.info(f"Identified {len(zones)} aerial view zones")
|
| 410 |
return zones
|
|
@@ -460,11 +386,11 @@ class SceneZoneIdentifier:
|
|
| 460 |
"description": f"Asian commercial storefront with pedestrian activity"
|
| 461 |
}
|
| 462 |
|
| 463 |
-
# 辨識行人通道
|
| 464 |
-
zones.update(self.
|
| 465 |
|
| 466 |
# 辨識攤販區域(小攤/商店 - 從情境推斷)
|
| 467 |
-
zones.update(self.
|
| 468 |
|
| 469 |
# 針對夜市的特殊處理
|
| 470 |
if scene_type == "asian_night_market":
|
|
@@ -521,13 +447,13 @@ class SceneZoneIdentifier:
|
|
| 521 |
}
|
| 522 |
|
| 523 |
# 識別裝飾區域,增強檢測
|
| 524 |
-
zones.update(self.
|
| 525 |
|
| 526 |
# 識別座位安排區域
|
| 527 |
-
zones.update(self.
|
| 528 |
|
| 529 |
# 識別服務區域(如果與餐飲區域不同)
|
| 530 |
-
zones.update(self.
|
| 531 |
|
| 532 |
logger.info(f"Identified {len(zones)} upscale dining zones")
|
| 533 |
return zones
|
|
@@ -576,10 +502,10 @@ class SceneZoneIdentifier:
|
|
| 576 |
}
|
| 577 |
|
| 578 |
# 側邊建築區域(從場景情境推斷)
|
| 579 |
-
zones.update(self.
|
| 580 |
|
| 581 |
# 行人區域
|
| 582 |
-
zones.update(self.
|
| 583 |
|
| 584 |
logger.info(f"Identified {len(zones)} financial district zones")
|
| 585 |
return zones
|
|
@@ -666,7 +592,7 @@ class SceneZoneIdentifier:
|
|
| 666 |
}
|
| 667 |
|
| 668 |
# 創建相關輔助功能區,如攝影區、紀念品販賣區
|
| 669 |
-
auxiliary_zones = self.
|
| 670 |
if auxiliary_zones:
|
| 671 |
landmark_zones.update(auxiliary_zones)
|
| 672 |
|
|
@@ -678,357 +604,10 @@ class SceneZoneIdentifier:
|
|
| 678 |
logger.error(traceback.format_exc())
|
| 679 |
return {}
|
| 680 |
|
| 681 |
-
|
| 682 |
-
def _identify_primary_functional_area(self, detected_objects: List[Dict]) -> Dict:
|
| 683 |
-
"""
|
| 684 |
-
識別主要功能區域,基於最強的物件關聯性組合
|
| 685 |
-
採用通用邏輯處理各種室內場景
|
| 686 |
-
|
| 687 |
-
Args:
|
| 688 |
-
detected_objects: 檢測到的物件列表
|
| 689 |
-
|
| 690 |
-
Returns:
|
| 691 |
-
主要功能區域字典或None
|
| 692 |
-
"""
|
| 693 |
-
try:
|
| 694 |
-
# 用餐區域檢測(桌椅組合)
|
| 695 |
-
dining_area = self._detect_functional_combination(
|
| 696 |
-
detected_objects,
|
| 697 |
-
primary_objects=[60], # dining table
|
| 698 |
-
supporting_objects=[56, 40, 41, 42, 43], # chair, wine glass, cup, fork, knife
|
| 699 |
-
min_supporting=2,
|
| 700 |
-
description_template="Dining area with table and seating arrangement"
|
| 701 |
-
)
|
| 702 |
-
if dining_area:
|
| 703 |
-
return dining_area
|
| 704 |
-
|
| 705 |
-
# 休息區域檢測(沙發電視組合或床)
|
| 706 |
-
seating_area = self._detect_functional_combination(
|
| 707 |
-
detected_objects,
|
| 708 |
-
primary_objects=[57, 59], # sofa, bed
|
| 709 |
-
supporting_objects=[62, 58, 56], # tv, potted plant, chair
|
| 710 |
-
min_supporting=1,
|
| 711 |
-
description_template="Seating and relaxation area"
|
| 712 |
-
)
|
| 713 |
-
if seating_area:
|
| 714 |
-
return seating_area
|
| 715 |
-
|
| 716 |
-
# 工作區域檢測(電子設備與家具組合)
|
| 717 |
-
work_area = self._detect_functional_combination(
|
| 718 |
-
detected_objects,
|
| 719 |
-
primary_objects=[63, 66], # laptop, keyboard
|
| 720 |
-
supporting_objects=[60, 56, 64], # dining table, chair, mouse
|
| 721 |
-
min_supporting=2,
|
| 722 |
-
description_template="Workspace area with electronics and furniture"
|
| 723 |
-
)
|
| 724 |
-
if work_area:
|
| 725 |
-
return work_area
|
| 726 |
-
|
| 727 |
-
return None
|
| 728 |
-
|
| 729 |
-
except Exception as e:
|
| 730 |
-
logger.error(f"Error identifying primary functional area: {str(e)}")
|
| 731 |
-
logger.error(traceback.format_exc())
|
| 732 |
-
return None
|
| 733 |
-
|
| 734 |
-
def _identify_secondary_functional_area(self, detected_objects: List[Dict], existing_zones: Dict) -> Dict:
|
| 735 |
-
"""
|
| 736 |
-
識別次要功能區域,避免與主要區域重疊
|
| 737 |
-
|
| 738 |
-
Args:
|
| 739 |
-
detected_objects: 檢測到的物件列表
|
| 740 |
-
existing_zones: 已存在的功能區域
|
| 741 |
-
|
| 742 |
-
Returns:
|
| 743 |
-
次要功能區域字典或None
|
| 744 |
-
"""
|
| 745 |
-
try:
|
| 746 |
-
# 獲取已使用的區域
|
| 747 |
-
used_regions = set(zone.get("region") for zone in existing_zones.values())
|
| 748 |
-
|
| 749 |
-
# 裝飾區域檢測(植物集中區域)
|
| 750 |
-
decorative_area = self._detect_functional_combination(
|
| 751 |
-
detected_objects,
|
| 752 |
-
primary_objects=[58], # potted plant
|
| 753 |
-
supporting_objects=[75], # vase
|
| 754 |
-
min_supporting=0,
|
| 755 |
-
min_primary=3, # 至少需要3個植物
|
| 756 |
-
description_template="Decorative area with plants and ornamental items",
|
| 757 |
-
exclude_regions=used_regions
|
| 758 |
-
)
|
| 759 |
-
if decorative_area:
|
| 760 |
-
return decorative_area
|
| 761 |
-
|
| 762 |
-
# 儲存區域檢測(廚房電器組合)
|
| 763 |
-
storage_area = self._detect_functional_combination(
|
| 764 |
-
detected_objects,
|
| 765 |
-
primary_objects=[72, 68, 69], # refrigerator, microwave, oven
|
| 766 |
-
supporting_objects=[71], # sink
|
| 767 |
-
min_supporting=0,
|
| 768 |
-
min_primary=2,
|
| 769 |
-
description_template="Kitchen appliance and storage area",
|
| 770 |
-
exclude_regions=used_regions
|
| 771 |
-
)
|
| 772 |
-
if storage_area:
|
| 773 |
-
return storage_area
|
| 774 |
-
|
| 775 |
-
return None
|
| 776 |
-
|
| 777 |
-
except Exception as e:
|
| 778 |
-
logger.error(f"Error identifying secondary functional area: {str(e)}")
|
| 779 |
-
logger.error(traceback.format_exc())
|
| 780 |
-
return None
|
| 781 |
-
|
| 782 |
-
def _detect_functional_combination(self, detected_objects: List[Dict], primary_objects: List[int],
|
| 783 |
-
supporting_objects: List[int], min_supporting: int,
|
| 784 |
-
description_template: str, min_primary: int = 1,
|
| 785 |
-
exclude_regions: set = None) -> Dict:
|
| 786 |
-
"""
|
| 787 |
-
通用的功能組合檢測方法
|
| 788 |
-
基於主要物件和支持物件的組合判斷功能區域
|
| 789 |
-
|
| 790 |
-
Args:
|
| 791 |
-
detected_objects: 檢測到的物件列表
|
| 792 |
-
primary_objects: 主要物件的class_id列表
|
| 793 |
-
supporting_objects: 支持物件的class_id列表
|
| 794 |
-
min_supporting: 最少需要的支持物件數量
|
| 795 |
-
description_template: 描述模板
|
| 796 |
-
min_primary: 最少需要的主要物件數量
|
| 797 |
-
exclude_regions: 需要排除的區域集合
|
| 798 |
-
|
| 799 |
-
Returns:
|
| 800 |
-
功能區域資訊字典,如果不符合條件則返回None
|
| 801 |
-
"""
|
| 802 |
-
try:
|
| 803 |
-
if exclude_regions is None:
|
| 804 |
-
exclude_regions = set()
|
| 805 |
-
|
| 806 |
-
# 收集主要物件
|
| 807 |
-
primary_objs = [obj for obj in detected_objects
|
| 808 |
-
if obj.get("class_id") in primary_objects and obj.get("confidence", 0) >= 0.4]
|
| 809 |
-
|
| 810 |
-
# 收集支持物件
|
| 811 |
-
supporting_objs = [obj for obj in detected_objects
|
| 812 |
-
if obj.get("class_id") in supporting_objects and obj.get("confidence", 0) >= 0.4]
|
| 813 |
-
|
| 814 |
-
# 檢查是否滿足最少數量要求
|
| 815 |
-
if len(primary_objs) < min_primary or len(supporting_objs) < min_supporting:
|
| 816 |
-
return None
|
| 817 |
-
|
| 818 |
-
# 按區域組織物件
|
| 819 |
-
region_combinations = {}
|
| 820 |
-
all_relevant_objs = primary_objs + supporting_objs
|
| 821 |
-
|
| 822 |
-
for obj in all_relevant_objs:
|
| 823 |
-
region = obj.get("region")
|
| 824 |
-
|
| 825 |
-
# 排除指定區域
|
| 826 |
-
if region in exclude_regions:
|
| 827 |
-
continue
|
| 828 |
-
|
| 829 |
-
if region not in region_combinations:
|
| 830 |
-
region_combinations[region] = {"primary": [], "supporting": [], "all": []}
|
| 831 |
-
|
| 832 |
-
region_combinations[region]["all"].append(obj)
|
| 833 |
-
|
| 834 |
-
if obj.get("class_id") in primary_objects:
|
| 835 |
-
region_combinations[region]["primary"].append(obj)
|
| 836 |
-
else:
|
| 837 |
-
region_combinations[region]["supporting"].append(obj)
|
| 838 |
-
|
| 839 |
-
# 找到最佳區域組合
|
| 840 |
-
best_region = None
|
| 841 |
-
best_score = 0
|
| 842 |
-
|
| 843 |
-
for region, objs in region_combinations.items():
|
| 844 |
-
# 計算該區域的評分
|
| 845 |
-
primary_count = len(objs["primary"])
|
| 846 |
-
supporting_count = len(objs["supporting"])
|
| 847 |
-
|
| 848 |
-
# 必須滿足最低要求
|
| 849 |
-
if primary_count < min_primary or supporting_count < min_supporting:
|
| 850 |
-
continue
|
| 851 |
-
|
| 852 |
-
# 計算組合評分(主要物件權重較高)
|
| 853 |
-
score = primary_count * 2 + supporting_count
|
| 854 |
-
|
| 855 |
-
if score > best_score:
|
| 856 |
-
best_score = score
|
| 857 |
-
best_region = region
|
| 858 |
-
|
| 859 |
-
if best_region is None:
|
| 860 |
-
return None
|
| 861 |
-
|
| 862 |
-
best_combination = region_combinations[best_region]
|
| 863 |
-
all_objects = [obj["class_name"] for obj in best_combination["all"]]
|
| 864 |
-
|
| 865 |
-
return {
|
| 866 |
-
"region": best_region,
|
| 867 |
-
"objects": all_objects,
|
| 868 |
-
"description": description_template
|
| 869 |
-
}
|
| 870 |
-
|
| 871 |
-
except Exception as e:
|
| 872 |
-
logger.error(f"Error detecting functional combination: {str(e)}")
|
| 873 |
-
logger.error(traceback.format_exc())
|
| 874 |
-
return None
|
| 875 |
-
|
| 876 |
-
def _analyze_crossing_patterns(self, pedestrians: List[Dict], traffic_lights: List[Dict]) -> Dict:
|
| 877 |
-
"""
|
| 878 |
-
Analyze pedestrian crossing patterns to identify crossing zones.
|
| 879 |
-
若同一 region 中同時有行人與紅綠燈,則將兩者都放入該區域的 objects。
|
| 880 |
-
|
| 881 |
-
Args:
|
| 882 |
-
pedestrians: 行人物件列表(每個 obj 應包含 'class_id', 'region', 'confidence' 等)
|
| 883 |
-
traffic_lights: 紅綠燈物件列表(每個 obj 應包含 'class_id', 'region', 'confidence' 等)
|
| 884 |
-
|
| 885 |
-
Returns:
|
| 886 |
-
crossing_zones: 字典,key 為 zone 名稱,value 包含 'region', 'objects', 'description'
|
| 887 |
-
"""
|
| 888 |
-
try:
|
| 889 |
-
crossing_zones = {}
|
| 890 |
-
|
| 891 |
-
# 如果沒有任何行人,就不辨識任何 crossing zone
|
| 892 |
-
if not pedestrians:
|
| 893 |
-
return crossing_zones
|
| 894 |
-
|
| 895 |
-
# (1) 按照 region 分組行人
|
| 896 |
-
pedestrian_regions = {}
|
| 897 |
-
for p in pedestrians:
|
| 898 |
-
region = p["region"]
|
| 899 |
-
pedestrian_regions.setdefault(region, []).append(p)
|
| 900 |
-
|
| 901 |
-
# (2) 針對每個 region,看是否同時有紅綠燈
|
| 902 |
-
# 建立一個 mapping: region -> { "pedestrians": [...], "traffic_lights": [...] }
|
| 903 |
-
combined_regions = {}
|
| 904 |
-
for region, peds in pedestrian_regions.items():
|
| 905 |
-
# 取得該 region 下所有紅綠燈
|
| 906 |
-
tls_in_region = [t for t in traffic_lights if t["region"] == region]
|
| 907 |
-
combined_regions[region] = {
|
| 908 |
-
"pedestrians": peds,
|
| 909 |
-
"traffic_lights": tls_in_region
|
| 910 |
-
}
|
| 911 |
-
|
| 912 |
-
# (3) 按照行人數量排序,找出前兩個需要建立 crossing zone 的 region
|
| 913 |
-
sorted_regions = sorted(
|
| 914 |
-
combined_regions.items(),
|
| 915 |
-
key=lambda x: len(x[1]["pedestrians"]),
|
| 916 |
-
reverse=True
|
| 917 |
-
)
|
| 918 |
-
|
| 919 |
-
# (4) 將前兩個 region 建立 Crossing Zone,objects 同時包含行人與紅綠燈
|
| 920 |
-
for idx, (region, group) in enumerate(sorted_regions[:2]):
|
| 921 |
-
peds = group["pedestrians"]
|
| 922 |
-
tls = group["traffic_lights"]
|
| 923 |
-
has_nearby_signals = len(tls) > 0
|
| 924 |
-
|
| 925 |
-
# 生成 zone_name(基於 region 方向 + idx 決定主/次 crossing)
|
| 926 |
-
direction = self._get_directional_description(region)
|
| 927 |
-
if direction and direction != "central":
|
| 928 |
-
zone_name = f"{direction} crossing area"
|
| 929 |
-
else:
|
| 930 |
-
zone_name = "main crossing area" if idx == 0 else "secondary crossing area"
|
| 931 |
-
|
| 932 |
-
# 組合 description
|
| 933 |
-
description = f"Pedestrian crossing area with {len(peds)} "
|
| 934 |
-
description += "person" if len(peds) == 1 else "people"
|
| 935 |
-
if direction:
|
| 936 |
-
description += f" in {direction} direction"
|
| 937 |
-
if has_nearby_signals:
|
| 938 |
-
description += " near traffic signals"
|
| 939 |
-
|
| 940 |
-
# ======= 將行人 + 同區紅綠燈一併放入 objects =======
|
| 941 |
-
obj_list = ["pedestrian"] * len(peds)
|
| 942 |
-
if has_nearby_signals:
|
| 943 |
-
obj_list += ["traffic light"] * len(tls)
|
| 944 |
-
|
| 945 |
-
crossing_zones[zone_name] = {
|
| 946 |
-
"region": region,
|
| 947 |
-
"objects": obj_list,
|
| 948 |
-
"description": description
|
| 949 |
-
}
|
| 950 |
-
|
| 951 |
-
return crossing_zones
|
| 952 |
-
|
| 953 |
-
except Exception as e:
|
| 954 |
-
logger.error(f"Error in _analyze_crossing_patterns: {str(e)}")
|
| 955 |
-
logger.error(traceback.format_exc())
|
| 956 |
-
return {}
|
| 957 |
-
|
| 958 |
-
|
| 959 |
-
def _analyze_traffic_zones(self, vehicles: List[Dict]) -> Dict:
|
| 960 |
-
"""
|
| 961 |
-
分析車輛分布以識別具有方向感知的交通區域
|
| 962 |
-
|
| 963 |
-
Args:
|
| 964 |
-
vehicles: 車輛物件列表
|
| 965 |
-
|
| 966 |
-
Returns:
|
| 967 |
-
識別出的交通區域字典
|
| 968 |
-
"""
|
| 969 |
-
try:
|
| 970 |
-
traffic_zones = {}
|
| 971 |
-
|
| 972 |
-
if not vehicles:
|
| 973 |
-
return traffic_zones
|
| 974 |
-
|
| 975 |
-
# 按區域分組車輛
|
| 976 |
-
vehicle_regions = {}
|
| 977 |
-
for v in vehicles:
|
| 978 |
-
region = v["region"]
|
| 979 |
-
if region not in vehicle_regions:
|
| 980 |
-
vehicle_regions[region] = []
|
| 981 |
-
vehicle_regions[region].append(v)
|
| 982 |
-
|
| 983 |
-
# 為有車輛的區域創建交通區域
|
| 984 |
-
main_traffic_region = max(vehicle_regions.items(), key=lambda x: len(x[1]), default=(None, []))
|
| 985 |
-
|
| 986 |
-
if main_traffic_region[0] is not None:
|
| 987 |
-
region = main_traffic_region[0]
|
| 988 |
-
vehicles_in_region = main_traffic_region[1]
|
| 989 |
-
|
| 990 |
-
# 獲取車輛類型列表用於描述
|
| 991 |
-
vehicle_types = [v["class_name"] for v in vehicles_in_region]
|
| 992 |
-
unique_types = list(set(vehicle_types))
|
| 993 |
-
|
| 994 |
-
# 獲取方向描述
|
| 995 |
-
direction = self._get_directional_description(region)
|
| 996 |
-
|
| 997 |
-
# 創建描述性區域
|
| 998 |
-
traffic_zones["vehicle_zone"] = {
|
| 999 |
-
"region": region,
|
| 1000 |
-
"objects": vehicle_types,
|
| 1001 |
-
"description": f"Vehicle traffic area with {', '.join(unique_types[:3])}" +
|
| 1002 |
-
(f" in {direction} area" if direction else "")
|
| 1003 |
-
}
|
| 1004 |
-
|
| 1005 |
-
# 如果車輛分布在多個區域,創建次要區域
|
| 1006 |
-
if len(vehicle_regions) > 1:
|
| 1007 |
-
# 獲取第二大車輛聚集區域
|
| 1008 |
-
sorted_regions = sorted(vehicle_regions.items(), key=lambda x: len(x[1]), reverse=True)
|
| 1009 |
-
if len(sorted_regions) > 1:
|
| 1010 |
-
second_region, second_vehicles = sorted_regions[1]
|
| 1011 |
-
direction = self._get_directional_description(second_region)
|
| 1012 |
-
vehicle_types = [v["class_name"] for v in second_vehicles]
|
| 1013 |
-
unique_types = list(set(vehicle_types))
|
| 1014 |
-
|
| 1015 |
-
traffic_zones["secondary_vehicle_zone"] = {
|
| 1016 |
-
"region": second_region,
|
| 1017 |
-
"objects": vehicle_types,
|
| 1018 |
-
"description": f"Secondary traffic area with {', '.join(unique_types[:2])}" +
|
| 1019 |
-
(f" in {direction} direction" if direction else "")
|
| 1020 |
-
}
|
| 1021 |
-
|
| 1022 |
-
return traffic_zones
|
| 1023 |
-
|
| 1024 |
-
except Exception as e:
|
| 1025 |
-
logger.error(f"Error analyzing traffic zones: {str(e)}")
|
| 1026 |
-
logger.error(traceback.format_exc())
|
| 1027 |
-
return {}
|
| 1028 |
-
|
| 1029 |
def _get_directional_description(self, region: str) -> str:
|
| 1030 |
"""
|
| 1031 |
將區域名稱轉換為方位描述(東西南北)
|
|
|
|
| 1032 |
|
| 1033 |
Args:
|
| 1034 |
region: 區域名稱
|
|
@@ -1061,668 +640,3 @@ class SceneZoneIdentifier:
|
|
| 1061 |
except Exception as e:
|
| 1062 |
logger.error(f"Error getting directional description for region '{region}': {str(e)}")
|
| 1063 |
return "central"
|
| 1064 |
-
|
| 1065 |
-
def _identify_park_recreational_zones(self, detected_objects: List[Dict]) -> Dict:
|
| 1066 |
-
"""
|
| 1067 |
-
識別公園的休閒活動區域
|
| 1068 |
-
|
| 1069 |
-
Args:
|
| 1070 |
-
detected_objects: 檢測到的物件列表
|
| 1071 |
-
|
| 1072 |
-
Returns:
|
| 1073 |
-
休閒區域字典
|
| 1074 |
-
"""
|
| 1075 |
-
try:
|
| 1076 |
-
zones = {}
|
| 1077 |
-
|
| 1078 |
-
# 尋找休閒物件(運動球、風箏等)
|
| 1079 |
-
rec_items = []
|
| 1080 |
-
rec_regions = {}
|
| 1081 |
-
|
| 1082 |
-
for obj in detected_objects:
|
| 1083 |
-
if obj["class_id"] in [32, 33, 34, 35, 38]: # sports ball, kite, baseball bat, glove, tennis racket
|
| 1084 |
-
region = obj["region"]
|
| 1085 |
-
if region not in rec_regions:
|
| 1086 |
-
rec_regions[region] = []
|
| 1087 |
-
rec_regions[region].append(obj)
|
| 1088 |
-
rec_items.append(obj["class_name"])
|
| 1089 |
-
|
| 1090 |
-
if rec_items:
|
| 1091 |
-
main_rec_region = max(rec_regions.items(),
|
| 1092 |
-
key=lambda x: len(x[1]),
|
| 1093 |
-
default=(None, []))
|
| 1094 |
-
|
| 1095 |
-
if main_rec_region[0] is not None:
|
| 1096 |
-
zones["recreational_zone"] = {
|
| 1097 |
-
"region": main_rec_region[0],
|
| 1098 |
-
"objects": list(set(rec_items)),
|
| 1099 |
-
"description": f"Recreational area with {', '.join(list(set(rec_items)))}"
|
| 1100 |
-
}
|
| 1101 |
-
|
| 1102 |
-
return zones
|
| 1103 |
-
|
| 1104 |
-
except Exception as e:
|
| 1105 |
-
logger.error(f"Error identifying park recreational zones: {str(e)}")
|
| 1106 |
-
logger.error(traceback.format_exc())
|
| 1107 |
-
return {}
|
| 1108 |
-
|
| 1109 |
-
def _identify_parking_zones(self, detected_objects: List[Dict]) -> Dict:
|
| 1110 |
-
"""
|
| 1111 |
-
停車場的停車區域
|
| 1112 |
-
|
| 1113 |
-
Args:
|
| 1114 |
-
detected_objects: 檢測到的物件列表
|
| 1115 |
-
|
| 1116 |
-
Returns:
|
| 1117 |
-
停車區域字典
|
| 1118 |
-
"""
|
| 1119 |
-
try:
|
| 1120 |
-
zones = {}
|
| 1121 |
-
|
| 1122 |
-
# 尋找停放的汽車
|
| 1123 |
-
car_objs = [obj for obj in detected_objects if obj["class_id"] == 2] # cars
|
| 1124 |
-
|
| 1125 |
-
if len(car_objs) >= 3:
|
| 1126 |
-
# 檢查汽車是否按模式排列(簡化)
|
| 1127 |
-
car_positions = [obj["normalized_center"] for obj in car_objs]
|
| 1128 |
-
|
| 1129 |
-
# 通過分析垂直位置檢查行模式
|
| 1130 |
-
y_coords = [pos[1] for pos in car_positions]
|
| 1131 |
-
y_clusters = {}
|
| 1132 |
-
|
| 1133 |
-
# 簡化聚類 - 按相似y坐標分組汽車
|
| 1134 |
-
for i, y in enumerate(y_coords):
|
| 1135 |
-
assigned = False
|
| 1136 |
-
for cluster_y in y_clusters.keys():
|
| 1137 |
-
if abs(y - cluster_y) < 0.1: # 圖像高度的10%內
|
| 1138 |
-
y_clusters[cluster_y].append(i)
|
| 1139 |
-
assigned = True
|
| 1140 |
-
break
|
| 1141 |
-
|
| 1142 |
-
if not assigned:
|
| 1143 |
-
y_clusters[y] = [i]
|
| 1144 |
-
|
| 1145 |
-
# 如果有行模式
|
| 1146 |
-
if max(len(indices) for indices in y_clusters.values()) >= 2:
|
| 1147 |
-
zones["parking_row"] = {
|
| 1148 |
-
"region": "central",
|
| 1149 |
-
"objects": ["car"] * len(car_objs),
|
| 1150 |
-
"description": f"Organized parking area with vehicles arranged in rows"
|
| 1151 |
-
}
|
| 1152 |
-
else:
|
| 1153 |
-
zones["parking_area"] = {
|
| 1154 |
-
"region": "wide",
|
| 1155 |
-
"objects": ["car"] * len(car_objs),
|
| 1156 |
-
"description": f"Parking area with {len(car_objs)} vehicles"
|
| 1157 |
-
}
|
| 1158 |
-
|
| 1159 |
-
return zones
|
| 1160 |
-
|
| 1161 |
-
except Exception as e:
|
| 1162 |
-
logger.error(f"Error identifying parking zones: {str(e)}")
|
| 1163 |
-
logger.error(traceback.format_exc())
|
| 1164 |
-
return {}
|
| 1165 |
-
|
| 1166 |
-
def _analyze_aerial_traffic_patterns(self, vehicle_objs: List[Dict]) -> Dict:
|
| 1167 |
-
"""
|
| 1168 |
-
分析空中視角的車輛交通模式
|
| 1169 |
-
|
| 1170 |
-
Args:
|
| 1171 |
-
vehicle_objs: 車輛物件列表
|
| 1172 |
-
|
| 1173 |
-
Returns:
|
| 1174 |
-
交通模式區域字典
|
| 1175 |
-
"""
|
| 1176 |
-
try:
|
| 1177 |
-
zones = {}
|
| 1178 |
-
|
| 1179 |
-
if not vehicle_objs:
|
| 1180 |
-
return zones
|
| 1181 |
-
|
| 1182 |
-
# 將位置轉換為數組進行模式分析
|
| 1183 |
-
positions = np.array([obj["normalized_center"] for obj in vehicle_objs])
|
| 1184 |
-
|
| 1185 |
-
if len(positions) >= 2:
|
| 1186 |
-
# 計算分布指標
|
| 1187 |
-
x_coords = positions[:, 0]
|
| 1188 |
-
y_coords = positions[:, 1]
|
| 1189 |
-
|
| 1190 |
-
x_mean = np.mean(x_coords)
|
| 1191 |
-
y_mean = np.mean(y_coords)
|
| 1192 |
-
x_std = np.std(x_coords)
|
| 1193 |
-
y_std = np.std(y_coords)
|
| 1194 |
-
|
| 1195 |
-
# 判斷車輛是否組織成車道
|
| 1196 |
-
if x_std < y_std * 0.5:
|
| 1197 |
-
# 車輛垂直對齊 - 表示南北交通
|
| 1198 |
-
zones["vertical_traffic_flow"] = {
|
| 1199 |
-
"region": "central_vertical",
|
| 1200 |
-
"objects": [obj["class_name"] for obj in vehicle_objs[:5]],
|
| 1201 |
-
"description": "North-south traffic flow visible from aerial view"
|
| 1202 |
-
}
|
| 1203 |
-
elif y_std < x_std * 0.5:
|
| 1204 |
-
# 車輛水平對齊 - 表示東西交通
|
| 1205 |
-
zones["horizontal_traffic_flow"] = {
|
| 1206 |
-
"region": "central_horizontal",
|
| 1207 |
-
"objects": [obj["class_name"] for obj in vehicle_objs[:5]],
|
| 1208 |
-
"description": "East-west traffic flow visible from aerial view"
|
| 1209 |
-
}
|
| 1210 |
-
else:
|
| 1211 |
-
# 車輛多方向 - 表示十字路口
|
| 1212 |
-
zones["intersection_traffic"] = {
|
| 1213 |
-
"region": "central",
|
| 1214 |
-
"objects": [obj["class_name"] for obj in vehicle_objs[:5]],
|
| 1215 |
-
"description": "Multi-directional traffic at intersection visible from aerial view"
|
| 1216 |
-
}
|
| 1217 |
-
|
| 1218 |
-
return zones
|
| 1219 |
-
|
| 1220 |
-
except Exception as e:
|
| 1221 |
-
logger.error(f"Error analyzing aerial traffic patterns: {str(e)}")
|
| 1222 |
-
logger.error(traceback.format_exc())
|
| 1223 |
-
return {}
|
| 1224 |
-
|
| 1225 |
-
def _identify_aerial_intersection_features(self, detected_objects: List[Dict]) -> Dict:
|
| 1226 |
-
"""
|
| 1227 |
-
空中視角十字路口特徵
|
| 1228 |
-
|
| 1229 |
-
Args:
|
| 1230 |
-
detected_objects: 檢測到的物件列表
|
| 1231 |
-
|
| 1232 |
-
Returns:
|
| 1233 |
-
十字路口特徵區域字典
|
| 1234 |
-
"""
|
| 1235 |
-
try:
|
| 1236 |
-
zones = {}
|
| 1237 |
-
|
| 1238 |
-
# 檢查交通信號
|
| 1239 |
-
traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]
|
| 1240 |
-
if traffic_light_objs:
|
| 1241 |
-
zones["traffic_control_pattern"] = {
|
| 1242 |
-
"region": "intersection",
|
| 1243 |
-
"objects": ["traffic light"] * len(traffic_light_objs),
|
| 1244 |
-
"description": f"Intersection traffic control with {len(traffic_light_objs)} signals visible from above"
|
| 1245 |
-
}
|
| 1246 |
-
|
| 1247 |
-
# 人行道從空中視角的情境推斷
|
| 1248 |
-
zones["crossing_pattern"] = {
|
| 1249 |
-
"region": "central",
|
| 1250 |
-
"objects": ["inferred crosswalk"],
|
| 1251 |
-
"description": "Crossing pattern visible from aerial perspective"
|
| 1252 |
-
}
|
| 1253 |
-
|
| 1254 |
-
return zones
|
| 1255 |
-
|
| 1256 |
-
except Exception as e:
|
| 1257 |
-
logger.error(f"Error identifying aerial intersection features: {str(e)}")
|
| 1258 |
-
logger.error(traceback.format_exc())
|
| 1259 |
-
return {}
|
| 1260 |
-
|
| 1261 |
-
def _identify_aerial_plaza_features(self, people_objs: List[Dict]) -> Dict:
|
| 1262 |
-
"""
|
| 1263 |
-
識別空中視角廣場特徵
|
| 1264 |
-
|
| 1265 |
-
Args:
|
| 1266 |
-
people_objs: 行人物件列表
|
| 1267 |
-
|
| 1268 |
-
Returns:
|
| 1269 |
-
廣場特徵區域字典
|
| 1270 |
-
"""
|
| 1271 |
-
try:
|
| 1272 |
-
zones = {}
|
| 1273 |
-
|
| 1274 |
-
if people_objs:
|
| 1275 |
-
# 檢查人群是否聚集在中央區域
|
| 1276 |
-
central_people = [obj for obj in people_objs
|
| 1277 |
-
if "middle" in obj["region"]]
|
| 1278 |
-
|
| 1279 |
-
if central_people:
|
| 1280 |
-
zones["central_gathering"] = {
|
| 1281 |
-
"region": "middle_center",
|
| 1282 |
-
"objects": ["person"] * len(central_people),
|
| 1283 |
-
"description": f"Central plaza gathering area with {len(central_people)} people viewed from above"
|
| 1284 |
-
}
|
| 1285 |
-
|
| 1286 |
-
return zones
|
| 1287 |
-
|
| 1288 |
-
except Exception as e:
|
| 1289 |
-
logger.error(f"Error identifying aerial plaza features: {str(e)}")
|
| 1290 |
-
logger.error(traceback.format_exc())
|
| 1291 |
-
return {}
|
| 1292 |
-
|
| 1293 |
-
def _identify_asian_pedestrian_pathway(self, detected_objects: List[Dict]) -> Dict:
|
| 1294 |
-
"""
|
| 1295 |
-
亞洲文化場景中的行人通道
|
| 1296 |
-
|
| 1297 |
-
Args:
|
| 1298 |
-
detected_objects: 檢測到的物件列表
|
| 1299 |
-
|
| 1300 |
-
Returns:
|
| 1301 |
-
行人通道區域字典
|
| 1302 |
-
"""
|
| 1303 |
-
try:
|
| 1304 |
-
zones = {}
|
| 1305 |
-
|
| 1306 |
-
pathway_items = []
|
| 1307 |
-
pathway_regions = {}
|
| 1308 |
-
|
| 1309 |
-
# 提取人群用於通道分析
|
| 1310 |
-
people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
|
| 1311 |
-
|
| 1312 |
-
# 分析人群是否形成線形(商業街的特徵)
|
| 1313 |
-
people_positions = [obj["normalized_center"] for obj in people_objs]
|
| 1314 |
-
|
| 1315 |
-
structured_path = False
|
| 1316 |
-
path_direction = "meandering"
|
| 1317 |
-
|
| 1318 |
-
if len(people_positions) >= 3:
|
| 1319 |
-
# 檢查人群是否沿相似y坐標排列(水平路徑)
|
| 1320 |
-
y_coords = [pos[1] for pos in people_positions]
|
| 1321 |
-
y_mean = sum(y_coords) / len(y_coords)
|
| 1322 |
-
y_variance = sum((y - y_mean)**2 for y in y_coords) / len(y_coords)
|
| 1323 |
-
|
| 1324 |
-
horizontal_path = y_variance < 0.05 # 低變異表示水平對齊
|
| 1325 |
-
|
| 1326 |
-
# 檢查人群是否沿相似x坐標排列(垂直路徑)
|
| 1327 |
-
x_coords = [pos[0] for pos in people_positions]
|
| 1328 |
-
x_mean = sum(x_coords) / len(x_coords)
|
| 1329 |
-
x_variance = sum((x - x_mean)**2 for x in x_coords) / len(x_coords)
|
| 1330 |
-
|
| 1331 |
-
vertical_path = x_variance < 0.05 # 低變異表示垂直對齊
|
| 1332 |
-
|
| 1333 |
-
structured_path = horizontal_path or vertical_path
|
| 1334 |
-
path_direction = "horizontal" if horizontal_path else "vertical" if vertical_path else "meandering"
|
| 1335 |
-
|
| 1336 |
-
# 收集通道物件(人、自行車、摩托車在中間區域)
|
| 1337 |
-
for obj in detected_objects:
|
| 1338 |
-
if obj["class_id"] in [0, 1, 3]: # Person, bicycle, motorcycle
|
| 1339 |
-
y_pos = obj["normalized_center"][1]
|
| 1340 |
-
# 按垂直位置分組(圖像中間可能是通道)
|
| 1341 |
-
if 0.25 <= y_pos <= 0.75:
|
| 1342 |
-
region = obj["region"]
|
| 1343 |
-
if region not in pathway_regions:
|
| 1344 |
-
pathway_regions[region] = []
|
| 1345 |
-
pathway_regions[region].append(obj)
|
| 1346 |
-
pathway_items.append(obj["class_name"])
|
| 1347 |
-
|
| 1348 |
-
if pathway_items:
|
| 1349 |
-
path_desc = "Pedestrian walkway with people moving through the commercial area"
|
| 1350 |
-
if structured_path:
|
| 1351 |
-
path_desc = f"{path_direction.capitalize()} pedestrian walkway with organized foot traffic"
|
| 1352 |
-
|
| 1353 |
-
zones["pedestrian_pathway"] = {
|
| 1354 |
-
"region": "middle_center", # 假設:通道通常在中間
|
| 1355 |
-
"objects": list(set(pathway_items)),
|
| 1356 |
-
"description": path_desc
|
| 1357 |
-
}
|
| 1358 |
-
|
| 1359 |
-
return zones
|
| 1360 |
-
|
| 1361 |
-
except Exception as e:
|
| 1362 |
-
logger.error(f"Error identifying Asian pedestrian pathway: {str(e)}")
|
| 1363 |
-
logger.error(traceback.format_exc())
|
| 1364 |
-
return {}
|
| 1365 |
-
|
| 1366 |
-
def _identify_vendor_zones(self, detected_objects: List[Dict]) -> Dict:
|
| 1367 |
-
"""
|
| 1368 |
-
識別攤販區域
|
| 1369 |
-
|
| 1370 |
-
Args:
|
| 1371 |
-
detected_objects: 檢測到的物件列表
|
| 1372 |
-
|
| 1373 |
-
Returns:
|
| 1374 |
-
攤販區域字典
|
| 1375 |
-
"""
|
| 1376 |
-
try:
|
| 1377 |
-
zones = {}
|
| 1378 |
-
|
| 1379 |
-
# 識別攤販區域(小攤/商店 - 從情境推斷)
|
| 1380 |
-
has_small_objects = any(obj["class_id"] in [24, 26, 39, 41] for obj in detected_objects) # bags, bottles, cups
|
| 1381 |
-
has_people = any(obj["class_id"] == 0 for obj in detected_objects)
|
| 1382 |
-
|
| 1383 |
-
if has_small_objects and has_people:
|
| 1384 |
-
# 可能的攤販區域是人群和小物件聚集的地方
|
| 1385 |
-
small_obj_regions = {}
|
| 1386 |
-
|
| 1387 |
-
for obj in detected_objects:
|
| 1388 |
-
if obj["class_id"] in [24, 26, 39, 41, 67]: # bags, bottles, cups, phones
|
| 1389 |
-
region = obj["region"]
|
| 1390 |
-
if region not in small_obj_regions:
|
| 1391 |
-
small_obj_regions[region] = []
|
| 1392 |
-
small_obj_regions[region].append(obj)
|
| 1393 |
-
|
| 1394 |
-
if small_obj_regions:
|
| 1395 |
-
main_vendor_region = max(small_obj_regions.items(),
|
| 1396 |
-
key=lambda x: len(x[1]),
|
| 1397 |
-
default=(None, []))
|
| 1398 |
-
|
| 1399 |
-
if main_vendor_region[0] is not None:
|
| 1400 |
-
vendor_items = [obj["class_name"] for obj in main_vendor_region[1]]
|
| 1401 |
-
zones["vendor_zone"] = {
|
| 1402 |
-
"region": main_vendor_region[0],
|
| 1403 |
-
"objects": list(set(vendor_items)),
|
| 1404 |
-
"description": "Vendor or market stall area with small merchandise"
|
| 1405 |
-
}
|
| 1406 |
-
|
| 1407 |
-
return zones
|
| 1408 |
-
|
| 1409 |
-
except Exception as e:
|
| 1410 |
-
logger.error(f"Error identifying vendor zones: {str(e)}")
|
| 1411 |
-
logger.error(traceback.format_exc())
|
| 1412 |
-
return {}
|
| 1413 |
-
|
| 1414 |
-
def _identify_upscale_decorative_zones(self, detected_objects: List[Dict]) -> Dict:
|
| 1415 |
-
"""
|
| 1416 |
-
識別高級餐飲的裝飾區域
|
| 1417 |
-
|
| 1418 |
-
Args:
|
| 1419 |
-
detected_objects: 檢測到的物件列表
|
| 1420 |
-
|
| 1421 |
-
Returns:
|
| 1422 |
-
裝飾區域字典
|
| 1423 |
-
"""
|
| 1424 |
-
try:
|
| 1425 |
-
zones = {}
|
| 1426 |
-
|
| 1427 |
-
decor_items = []
|
| 1428 |
-
decor_regions = {}
|
| 1429 |
-
|
| 1430 |
-
# 尋找裝飾元素(花瓶、酒杯、未使用的餐具)
|
| 1431 |
-
for obj in detected_objects:
|
| 1432 |
-
if obj["class_id"] in [75, 40]: # Vase, wine glass
|
| 1433 |
-
region = obj["region"]
|
| 1434 |
-
if region not in decor_regions:
|
| 1435 |
-
decor_regions[region] = []
|
| 1436 |
-
decor_regions[region].append(obj)
|
| 1437 |
-
decor_items.append(obj["class_name"])
|
| 1438 |
-
|
| 1439 |
-
if decor_items:
|
| 1440 |
-
main_decor_region = max(decor_regions.items(),
|
| 1441 |
-
key=lambda x: len(x[1]),
|
| 1442 |
-
default=(None, []))
|
| 1443 |
-
|
| 1444 |
-
if main_decor_region[0] is not None:
|
| 1445 |
-
zones["decorative_zone"] = {
|
| 1446 |
-
"region": main_decor_region[0],
|
| 1447 |
-
"objects": list(set(decor_items)),
|
| 1448 |
-
"description": f"Decorative area with {', '.join(list(set(decor_items)))}"
|
| 1449 |
-
}
|
| 1450 |
-
|
| 1451 |
-
return zones
|
| 1452 |
-
|
| 1453 |
-
except Exception as e:
|
| 1454 |
-
logger.error(f"Error identifying upscale decorative zones: {str(e)}")
|
| 1455 |
-
logger.error(traceback.format_exc())
|
| 1456 |
-
return {}
|
| 1457 |
-
|
| 1458 |
-
def _identify_dining_seating_zones(self, detected_objects: List[Dict]) -> Dict:
|
| 1459 |
-
"""
|
| 1460 |
-
識別餐廳座位安排區域
|
| 1461 |
-
|
| 1462 |
-
Args:
|
| 1463 |
-
detected_objects: 檢測到的物件列表
|
| 1464 |
-
|
| 1465 |
-
Returns:
|
| 1466 |
-
座位區域字典
|
| 1467 |
-
"""
|
| 1468 |
-
try:
|
| 1469 |
-
zones = {}
|
| 1470 |
-
|
| 1471 |
-
# 識別座位安排區域
|
| 1472 |
-
chairs = [obj for obj in detected_objects if obj["class_id"] == 56] # chairs
|
| 1473 |
-
if len(chairs) >= 2:
|
| 1474 |
-
chair_regions = {}
|
| 1475 |
-
for obj in chairs:
|
| 1476 |
-
region = obj["region"]
|
| 1477 |
-
if region not in chair_regions:
|
| 1478 |
-
chair_regions[region] = []
|
| 1479 |
-
chair_regions[region].append(obj)
|
| 1480 |
-
|
| 1481 |
-
if chair_regions:
|
| 1482 |
-
main_seating_region = max(chair_regions.items(),
|
| 1483 |
-
key=lambda x: len(x[1]),
|
| 1484 |
-
default=(None, []))
|
| 1485 |
-
|
| 1486 |
-
if main_seating_region[0] is not None:
|
| 1487 |
-
zones["dining_seating_zone"] = {
|
| 1488 |
-
"region": main_seating_region[0],
|
| 1489 |
-
"objects": ["chair"] * len(main_seating_region[1]),
|
| 1490 |
-
"description": f"Formal dining seating arrangement with {len(main_seating_region[1])} chairs"
|
| 1491 |
-
}
|
| 1492 |
-
|
| 1493 |
-
return zones
|
| 1494 |
-
|
| 1495 |
-
except Exception as e:
|
| 1496 |
-
logger.error(f"Error identifying dining seating zones: {str(e)}")
|
| 1497 |
-
logger.error(traceback.format_exc())
|
| 1498 |
-
return {}
|
| 1499 |
-
|
| 1500 |
-
def _identify_serving_zones(self, detected_objects: List[Dict], existing_zones: Dict) -> Dict:
|
| 1501 |
-
"""
|
| 1502 |
-
識別服務區域
|
| 1503 |
-
|
| 1504 |
-
Args:
|
| 1505 |
-
detected_objects: 檢測到的物件列表
|
| 1506 |
-
existing_zones: 已存在的功能區域
|
| 1507 |
-
|
| 1508 |
-
Returns:
|
| 1509 |
-
服務區域字典
|
| 1510 |
-
"""
|
| 1511 |
-
try:
|
| 1512 |
-
zones = {}
|
| 1513 |
-
|
| 1514 |
-
serving_items = []
|
| 1515 |
-
serving_regions = {}
|
| 1516 |
-
|
| 1517 |
-
# 服務區域可能有瓶子、碗、容器
|
| 1518 |
-
for obj in detected_objects:
|
| 1519 |
-
if obj["class_id"] in [39, 45]: # Bottle, bowl
|
| 1520 |
-
# 檢查是否在與主餐桌不同的區域
|
| 1521 |
-
if "formal_dining_zone" in existing_zones and obj["region"] != existing_zones["formal_dining_zone"]["region"]:
|
| 1522 |
-
region = obj["region"]
|
| 1523 |
-
if region not in serving_regions:
|
| 1524 |
-
serving_regions[region] = []
|
| 1525 |
-
serving_regions[region].append(obj)
|
| 1526 |
-
serving_items.append(obj["class_name"])
|
| 1527 |
-
|
| 1528 |
-
if serving_items:
|
| 1529 |
-
main_serving_region = max(serving_regions.items(),
|
| 1530 |
-
key=lambda x: len(x[1]),
|
| 1531 |
-
default=(None, []))
|
| 1532 |
-
|
| 1533 |
-
if main_serving_region[0] is not None:
|
| 1534 |
-
zones["serving_zone"] = {
|
| 1535 |
-
"region": main_serving_region[0],
|
| 1536 |
-
"objects": list(set(serving_items)),
|
| 1537 |
-
"description": f"Serving or sideboard area with {', '.join(list(set(serving_items)))}"
|
| 1538 |
-
}
|
| 1539 |
-
|
| 1540 |
-
return zones
|
| 1541 |
-
|
| 1542 |
-
except Exception as e:
|
| 1543 |
-
logger.error(f"Error identifying serving zones: {str(e)}")
|
| 1544 |
-
logger.error(traceback.format_exc())
|
| 1545 |
-
return {}
|
| 1546 |
-
|
| 1547 |
-
def _identify_building_zones(self, detected_objects: List[Dict]) -> Dict:
|
| 1548 |
-
"""
|
| 1549 |
-
識別建築區域(從場景情境推斷)
|
| 1550 |
-
|
| 1551 |
-
Args:
|
| 1552 |
-
detected_objects: 檢測到的物件列表
|
| 1553 |
-
|
| 1554 |
-
Returns:
|
| 1555 |
-
建築區域字典
|
| 1556 |
-
"""
|
| 1557 |
-
try:
|
| 1558 |
-
zones = {}
|
| 1559 |
-
|
| 1560 |
-
# 側邊建築區域(從場景情境推斷)
|
| 1561 |
-
# 檢查是否有實際可能包含建築物的區域
|
| 1562 |
-
left_side_regions = ["top_left", "middle_left", "bottom_left"]
|
| 1563 |
-
right_side_regions = ["top_right", "middle_right", "bottom_right"]
|
| 1564 |
-
|
| 1565 |
-
# 檢查左側
|
| 1566 |
-
left_building_evidence = True
|
| 1567 |
-
for region in left_side_regions:
|
| 1568 |
-
# 如果此區域有很多車輛或人群,不太可能是建築物
|
| 1569 |
-
vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
|
| 1570 |
-
for obj in detected_objects)
|
| 1571 |
-
people_in_region = any(obj["region"] == region and obj["class_id"] == 0
|
| 1572 |
-
for obj in detected_objects)
|
| 1573 |
-
|
| 1574 |
-
if vehicle_in_region or people_in_region:
|
| 1575 |
-
left_building_evidence = False
|
| 1576 |
-
break
|
| 1577 |
-
|
| 1578 |
-
# 檢查右側
|
| 1579 |
-
right_building_evidence = True
|
| 1580 |
-
for region in right_side_regions:
|
| 1581 |
-
# 如果此區域有很多車輛或人群,不太可能是建築物
|
| 1582 |
-
vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
|
| 1583 |
-
for obj in detected_objects)
|
| 1584 |
-
people_in_region = any(obj["region"] == region and obj["class_id"] == 0
|
| 1585 |
-
for obj in detected_objects)
|
| 1586 |
-
|
| 1587 |
-
if vehicle_in_region or people_in_region:
|
| 1588 |
-
right_building_evidence = False
|
| 1589 |
-
break
|
| 1590 |
-
|
| 1591 |
-
# 如果證據支持,添加建築區域
|
| 1592 |
-
if left_building_evidence:
|
| 1593 |
-
zones["building_zone_left"] = {
|
| 1594 |
-
"region": "middle_left",
|
| 1595 |
-
"objects": ["building"], # 推斷
|
| 1596 |
-
"description": "Tall buildings line the left side of the street"
|
| 1597 |
-
}
|
| 1598 |
-
|
| 1599 |
-
if right_building_evidence:
|
| 1600 |
-
zones["building_zone_right"] = {
|
| 1601 |
-
"region": "middle_right",
|
| 1602 |
-
"objects": ["building"], # 推斷
|
| 1603 |
-
"description": "Tall buildings line the right side of the street"
|
| 1604 |
-
}
|
| 1605 |
-
|
| 1606 |
-
return zones
|
| 1607 |
-
|
| 1608 |
-
except Exception as e:
|
| 1609 |
-
logger.error(f"Error identifying building zones: {str(e)}")
|
| 1610 |
-
logger.error(traceback.format_exc())
|
| 1611 |
-
return {}
|
| 1612 |
-
|
| 1613 |
-
def _identify_financial_pedestrian_zones(self, detected_objects: List[Dict]) -> Dict:
|
| 1614 |
-
"""
|
| 1615 |
-
識別金融區的行人區域
|
| 1616 |
-
|
| 1617 |
-
Args:
|
| 1618 |
-
detected_objects: 檢測到的物件列表
|
| 1619 |
-
|
| 1620 |
-
Returns:
|
| 1621 |
-
行人區域字典
|
| 1622 |
-
"""
|
| 1623 |
-
try:
|
| 1624 |
-
zones = {}
|
| 1625 |
-
|
| 1626 |
-
# 識別行人區域(如果有人群)
|
| 1627 |
-
people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
|
| 1628 |
-
if people_objs:
|
| 1629 |
-
people_regions = {}
|
| 1630 |
-
for obj in people_objs:
|
| 1631 |
-
region = obj["region"]
|
| 1632 |
-
if region not in people_regions:
|
| 1633 |
-
people_regions[region] = []
|
| 1634 |
-
people_regions[region].append(obj)
|
| 1635 |
-
|
| 1636 |
-
if people_regions:
|
| 1637 |
-
main_pedestrian_region = max(people_regions.items(),
|
| 1638 |
-
key=lambda x: len(x[1]),
|
| 1639 |
-
default=(None, []))
|
| 1640 |
-
|
| 1641 |
-
if main_pedestrian_region[0] is not None:
|
| 1642 |
-
zones["pedestrian_zone"] = {
|
| 1643 |
-
"region": main_pedestrian_region[0],
|
| 1644 |
-
"objects": ["person"] * len(main_pedestrian_region[1]),
|
| 1645 |
-
"description": f"Pedestrian area with {len(main_pedestrian_region[1])} people navigating the financial district"
|
| 1646 |
-
}
|
| 1647 |
-
|
| 1648 |
-
return zones
|
| 1649 |
-
|
| 1650 |
-
except Exception as e:
|
| 1651 |
-
logger.error(f"Error identifying financial pedestrian zones: {str(e)}")
|
| 1652 |
-
logger.error(traceback.format_exc())
|
| 1653 |
-
return {}
|
| 1654 |
-
|
| 1655 |
-
def _create_landmark_auxiliary_zones(self, landmark: Dict, index: int) -> Dict:
|
| 1656 |
-
"""
|
| 1657 |
-
創建地標相關的輔助區域(攝影區、紀念品區等)
|
| 1658 |
-
|
| 1659 |
-
Args:
|
| 1660 |
-
landmark: 地標物件字典
|
| 1661 |
-
index: 地標索引
|
| 1662 |
-
|
| 1663 |
-
Returns:
|
| 1664 |
-
輔助區域字典
|
| 1665 |
-
"""
|
| 1666 |
-
try:
|
| 1667 |
-
auxiliary_zones = {}
|
| 1668 |
-
landmark_region = landmark.get("region", "middle_center")
|
| 1669 |
-
landmark_name = landmark.get("class_name", "Landmark")
|
| 1670 |
-
|
| 1671 |
-
# 創建攝影區
|
| 1672 |
-
# 根據地標位置調整攝影區位置(地標前方通常是攝影區)
|
| 1673 |
-
region_mapping = {
|
| 1674 |
-
"top_left": "bottom_right",
|
| 1675 |
-
"top_center": "bottom_center",
|
| 1676 |
-
"top_right": "bottom_left",
|
| 1677 |
-
"middle_left": "middle_right",
|
| 1678 |
-
"middle_center": "bottom_center",
|
| 1679 |
-
"middle_right": "middle_left",
|
| 1680 |
-
"bottom_left": "top_right",
|
| 1681 |
-
"bottom_center": "top_center",
|
| 1682 |
-
"bottom_right": "top_left"
|
| 1683 |
-
}
|
| 1684 |
-
|
| 1685 |
-
photo_region = region_mapping.get(landmark_region, landmark_region)
|
| 1686 |
-
|
| 1687 |
-
photo_key = f"{landmark_name.lower().replace(' ', '_')}_photography_spot"
|
| 1688 |
-
auxiliary_zones[photo_key] = {
|
| 1689 |
-
"name": f"{landmark_name} Photography Spot",
|
| 1690 |
-
"description": f"Popular position for photographing {landmark_name} with optimal viewing angle.",
|
| 1691 |
-
"objects": ["camera", "person", "cell phone"],
|
| 1692 |
-
"region": photo_region,
|
| 1693 |
-
"primary_function": "Tourist photography"
|
| 1694 |
-
}
|
| 1695 |
-
|
| 1696 |
-
# 如果是著名地標,可能有紀念品販售區
|
| 1697 |
-
if landmark.get("confidence", 0) > 0.7: # 高置信度地標更可能有紀念品區
|
| 1698 |
-
# 根據地標位置找到適合的紀念品區位置(通常在地標附近但不直接在地標上)
|
| 1699 |
-
adjacent_regions = {
|
| 1700 |
-
"top_left": ["top_center", "middle_left"],
|
| 1701 |
-
"top_center": ["top_left", "top_right"],
|
| 1702 |
-
"top_right": ["top_center", "middle_right"],
|
| 1703 |
-
"middle_left": ["top_left", "bottom_left"],
|
| 1704 |
-
"middle_center": ["middle_left", "middle_right"],
|
| 1705 |
-
"middle_right": ["top_right", "bottom_right"],
|
| 1706 |
-
"bottom_left": ["middle_left", "bottom_center"],
|
| 1707 |
-
"bottom_center": ["bottom_left", "bottom_right"],
|
| 1708 |
-
"bottom_right": ["bottom_center", "middle_right"]
|
| 1709 |
-
}
|
| 1710 |
-
|
| 1711 |
-
if landmark_region in adjacent_regions:
|
| 1712 |
-
souvenir_region = adjacent_regions[landmark_region][0] # 選擇第一個相鄰區域
|
| 1713 |
-
|
| 1714 |
-
souvenir_key = f"{landmark_name.lower().replace(' ', '_')}_souvenir_area"
|
| 1715 |
-
auxiliary_zones[souvenir_key] = {
|
| 1716 |
-
"name": f"{landmark_name} Souvenir Area",
|
| 1717 |
-
"description": f"Area where visitors can purchase souvenirs and memorabilia related to {landmark_name}.",
|
| 1718 |
-
"objects": ["person", "handbag", "backpack"],
|
| 1719 |
-
"region": souvenir_region,
|
| 1720 |
-
"primary_function": "Tourism commerce"
|
| 1721 |
-
}
|
| 1722 |
-
|
| 1723 |
-
return auxiliary_zones
|
| 1724 |
-
|
| 1725 |
-
except Exception as e:
|
| 1726 |
-
logger.error(f"Error creating landmark auxiliary zones: {str(e)}")
|
| 1727 |
-
logger.error(traceback.format_exc())
|
| 1728 |
-
return {}
|
|
|
|
| 3 |
import traceback
|
| 4 |
import numpy as np
|
| 5 |
from typing import Dict, List, Any, Optional
|
| 6 |
+
from functional_zone_detector import FunctionalZoneDetector
|
| 7 |
+
from pattern_analyzer import PatternAnalyzer
|
| 8 |
+
from specialized_scene_processor import SpecializedSceneProcessor
|
| 9 |
|
| 10 |
logger = logging.getLogger(__name__)
|
| 11 |
|
|
|
|
| 13 |
"""
|
| 14 |
負責不同場景類型的區域識別邏輯
|
| 15 |
專注於根據場景類型執行相應的功能區域識別策略
|
| 16 |
+
整合所有專門的區域辨識組件,主要須整合至SpatialAnalyzer
|
| 17 |
"""
|
| 18 |
|
| 19 |
def __init__(self):
|
| 20 |
"""初始化場景區域辨識器"""
|
| 21 |
try:
|
| 22 |
+
# 初始化各個專門組件
|
| 23 |
+
self.functional_detector = FunctionalZoneDetector()
|
| 24 |
+
self.pattern_analyzer = PatternAnalyzer()
|
| 25 |
+
self.scene_processor = SpecializedSceneProcessor()
|
| 26 |
+
|
| 27 |
logger.info("SceneZoneIdentifier initialized successfully")
|
| 28 |
|
| 29 |
except Exception as e:
|
|
|
|
| 48 |
zones = {}
|
| 49 |
|
| 50 |
# 主要功能區域(基於物件關聯性而非場景類型)
|
| 51 |
+
primary_zone = self.functional_detector.identify_primary_functional_area(detected_objects)
|
| 52 |
if primary_zone:
|
| 53 |
# 基於區域內容生成描述性鍵名
|
| 54 |
+
descriptive_key = self.functional_detector.generate_descriptive_zone_key_from_data(primary_zone, "primary")
|
| 55 |
zones[descriptive_key] = primary_zone
|
| 56 |
|
| 57 |
# 只有明確證據且物件數量足夠時創建次要功能區域
|
| 58 |
if len(zones) >= 1 and len(detected_objects) >= 6:
|
| 59 |
+
secondary_zone = self.functional_detector.identify_secondary_functional_area(detected_objects, zones)
|
| 60 |
if secondary_zone:
|
| 61 |
# 基於區域內容生成描述性鍵名
|
| 62 |
+
descriptive_key = self.functional_detector.generate_descriptive_zone_key_from_data(secondary_zone, "secondary")
|
| 63 |
zones[descriptive_key] = secondary_zone
|
| 64 |
|
| 65 |
logger.info(f"Identified {len(zones)} indoor zones for scene type '{scene_type}'")
|
|
|
|
| 70 |
logger.error(traceback.format_exc())
|
| 71 |
return {}
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
def identify_outdoor_general_zones(self, category_regions: Dict, detected_objects: List[Dict], scene_type: str) -> Dict:
|
| 74 |
"""
|
| 75 |
+
辨識一般戶外場景的功能區域
|
| 76 |
|
| 77 |
Args:
|
| 78 |
category_regions: 按類別和區域分組的物件字典
|
|
|
|
| 141 |
|
| 142 |
# 針對公園區域的特殊處理
|
| 143 |
if scene_type == "park_area":
|
| 144 |
+
zones.update(self.pattern_analyzer.identify_park_recreational_zones(detected_objects))
|
| 145 |
|
| 146 |
# 針對停車場的特殊處理
|
| 147 |
if scene_type == "parking_lot":
|
| 148 |
+
zones.update(self.pattern_analyzer.identify_parking_zones(detected_objects))
|
| 149 |
|
| 150 |
logger.info(f"Identified {len(zones)} outdoor zones for scene type '{scene_type}'")
|
| 151 |
return zones
|
|
|
|
| 158 |
def identify_intersection_zones(self, category_regions: Dict, detected_objects: List[Dict], viewpoint: str) -> Dict:
|
| 159 |
"""
|
| 160 |
辨識城市十字路口的功能區域,無論是否有行人,只要偵測到紅綠燈就一定顯示 Traffic Control Area;
|
| 161 |
+
如果有行人,則額外建立 Crossing Zone 並把行人 + 同 region 的紅綠燈歸在一起。
|
| 162 |
|
| 163 |
Args:
|
| 164 |
category_regions: 按類別和 region 分組的物件字典
|
|
|
|
| 177 |
traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]
|
| 178 |
|
| 179 |
# 2. Step A: 無條件建立 Traffic Control Area
|
| 180 |
+
# 把每個 region 下的紅綠燈都先分群,生成對應 zone,確保"只要偵測到紅綠燈就一定顯示"
|
| 181 |
signal_regions_all = {}
|
| 182 |
for t in traffic_light_objs:
|
| 183 |
region = t["region"]
|
|
|
|
| 211 |
|
| 212 |
# 3. Step B: 如果有行人,就建立 Crossing Zone,並移除已被打包的紅綠燈
|
| 213 |
if pedestrian_objs:
|
| 214 |
+
# 先呼叫 analyze_crossing_patterns,讓它回傳「行人 + 同 region 的紅綠燈」區
|
| 215 |
+
crossing_zones = self.pattern_analyzer.analyze_crossing_patterns(pedestrian_objs, traffic_light_objs)
|
| 216 |
|
| 217 |
# 把 Crossing Zone 加到最終 zones,並同時記錄已使用掉的紅綠燈數量
|
| 218 |
for zone_key, zone_info in crossing_zones.items():
|
|
|
|
| 249 |
|
| 250 |
# 5. Step D: 分析車輛交通區域(Vehicle Zones)
|
| 251 |
if vehicle_objs:
|
| 252 |
+
traffic_zones = self.pattern_analyzer.analyze_traffic_zones(vehicle_objs)
|
| 253 |
+
# analyze_traffic_zones 內部已用英文 debug,直接更新
|
| 254 |
for zone_key, zone_info in traffic_zones.items():
|
| 255 |
if zone_key in zones:
|
| 256 |
suffix = 1
|
|
|
|
| 322 |
# 識別車輛模式進行交通分析
|
| 323 |
vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [1, 2, 3, 5, 6, 7]]
|
| 324 |
if vehicle_objs:
|
| 325 |
+
zones.update(self.pattern_analyzer.analyze_aerial_traffic_patterns(vehicle_objs))
|
| 326 |
|
| 327 |
# 針對十字路口特定空中視角的處理
|
| 328 |
if "intersection" in scene_type:
|
| 329 |
+
zones.update(self.scene_processor.identify_aerial_intersection_features(detected_objects))
|
| 330 |
|
| 331 |
# 針對廣場空中視角的處理
|
| 332 |
if "plaza" in scene_type:
|
| 333 |
+
zones.update(self.scene_processor.identify_aerial_plaza_features(people_objs))
|
| 334 |
|
| 335 |
logger.info(f"Identified {len(zones)} aerial view zones")
|
| 336 |
return zones
|
|
|
|
| 386 |
"description": f"Asian commercial storefront with pedestrian activity"
|
| 387 |
}
|
| 388 |
|
| 389 |
+
# 辨識行人通道
|
| 390 |
+
zones.update(self.scene_processor.identify_asian_pedestrian_pathway(detected_objects))
|
| 391 |
|
| 392 |
# 辨識攤販區域(小攤/商店 - 從情境推斷)
|
| 393 |
+
zones.update(self.scene_processor.identify_vendor_zones(detected_objects))
|
| 394 |
|
| 395 |
# 針對夜市的特殊處理
|
| 396 |
if scene_type == "asian_night_market":
|
|
|
|
| 447 |
}
|
| 448 |
|
| 449 |
# 識別裝飾區域,增強檢測
|
| 450 |
+
zones.update(self.scene_processor.identify_upscale_decorative_zones(detected_objects))
|
| 451 |
|
| 452 |
# 識別座位安排區域
|
| 453 |
+
zones.update(self.scene_processor.identify_dining_seating_zones(detected_objects))
|
| 454 |
|
| 455 |
# 識別服務區域(如果與餐飲區域不同)
|
| 456 |
+
zones.update(self.scene_processor.identify_serving_zones(detected_objects, zones))
|
| 457 |
|
| 458 |
logger.info(f"Identified {len(zones)} upscale dining zones")
|
| 459 |
return zones
|
|
|
|
| 502 |
}
|
| 503 |
|
| 504 |
# 側邊建築區域(從場景情境推斷)
|
| 505 |
+
zones.update(self.scene_processor.identify_building_zones(detected_objects))
|
| 506 |
|
| 507 |
# 行人區域
|
| 508 |
+
zones.update(self.scene_processor.identify_financial_pedestrian_zones(detected_objects))
|
| 509 |
|
| 510 |
logger.info(f"Identified {len(zones)} financial district zones")
|
| 511 |
return zones
|
|
|
|
| 592 |
}
|
| 593 |
|
| 594 |
# 創建相關輔助功能區,如攝影區、紀念品販賣區
|
| 595 |
+
auxiliary_zones = self.scene_processor.create_landmark_auxiliary_zones(landmark, 0)
|
| 596 |
if auxiliary_zones:
|
| 597 |
landmark_zones.update(auxiliary_zones)
|
| 598 |
|
|
|
|
| 604 |
logger.error(traceback.format_exc())
|
| 605 |
return {}
|
| 606 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 607 |
def _get_directional_description(self, region: str) -> str:
|
| 608 |
"""
|
| 609 |
將區域名稱轉換為方位描述(東西南北)
|
| 610 |
+
這是核心工具方法,供所有組件使用
|
| 611 |
|
| 612 |
Args:
|
| 613 |
region: 區域名稱
|
|
|
|
| 640 |
except Exception as e:
|
| 641 |
logger.error(f"Error getting directional description for region '{region}': {str(e)}")
|
| 642 |
return "central"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
spatial_location_handler.py
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import traceback
|
| 3 |
+
import numpy as np
|
| 4 |
+
from typing import Dict, List, Optional, Any, Tuple
|
| 5 |
+
|
| 6 |
+
class SpatialLocationHandler:
|
| 7 |
+
"""
|
| 8 |
+
空間位置處理器 - 專門處理空間描述生成和排列模式分析
|
| 9 |
+
負責生成物件的空間位置描述、分析排列模式以及與 RegionAnalyzer 的整合
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
def __init__(self, region_analyzer: Optional[Any] = None):
|
| 13 |
+
"""
|
| 14 |
+
初始化空間位置處理器
|
| 15 |
+
|
| 16 |
+
Args:
|
| 17 |
+
region_analyzer: RegionAnalyzer實例
|
| 18 |
+
"""
|
| 19 |
+
self.logger = logging.getLogger(self.__class__.__name__)
|
| 20 |
+
self.region_analyzer = region_analyzer
|
| 21 |
+
|
| 22 |
+
def set_region_analyzer(self, region_analyzer: Any) -> None:
|
| 23 |
+
"""
|
| 24 |
+
設置RegionAnalyzer,用於標準化空間描述生成
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
region_analyzer: RegionAnalyzer實例
|
| 28 |
+
"""
|
| 29 |
+
try:
|
| 30 |
+
self.region_analyzer = region_analyzer
|
| 31 |
+
self.logger.info("RegionAnalyzer instance set for SpatialLocationHandler")
|
| 32 |
+
except Exception as e:
|
| 33 |
+
self.logger.warning(f"Error setting RegionAnalyzer: {str(e)}")
|
| 34 |
+
|
| 35 |
+
def generate_spatial_description(self, obj: Dict, image_width: Optional[int] = None,
|
| 36 |
+
image_height: Optional[int] = None,
|
| 37 |
+
region_analyzer: Optional[Any] = None) -> str:
|
| 38 |
+
"""
|
| 39 |
+
為物件生成空間位置描述
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
obj: 物件字典
|
| 43 |
+
image_width: 可選的圖像寬度
|
| 44 |
+
image_height: 可選的圖像高度
|
| 45 |
+
region_analyzer: 可選的RegionAnalyzer實例,用於生成標準化描述
|
| 46 |
+
|
| 47 |
+
Returns:
|
| 48 |
+
str: 空間描述字符串,空值region時返回空字串
|
| 49 |
+
"""
|
| 50 |
+
try:
|
| 51 |
+
region = obj.get("region") or ""
|
| 52 |
+
object_type = obj.get("class_name", "")
|
| 53 |
+
|
| 54 |
+
# 處理空值或無效region,直接返回空字串避免不完整描述
|
| 55 |
+
if not region.strip() or region == "unknown":
|
| 56 |
+
# 根據物件類型提供合適的預設位置描述
|
| 57 |
+
if object_type and any(vehicle in object_type.lower() for vehicle in ["car", "truck", "bus"]):
|
| 58 |
+
return "positioned in the scene"
|
| 59 |
+
elif object_type and "person" in object_type.lower():
|
| 60 |
+
return "present in the area"
|
| 61 |
+
else:
|
| 62 |
+
return "located in the scene"
|
| 63 |
+
|
| 64 |
+
# 如果提供了RegionAnalyzer實例,使用其標準化方法
|
| 65 |
+
if region_analyzer and hasattr(region_analyzer, 'get_spatial_description_phrase'):
|
| 66 |
+
if hasattr(region_analyzer, 'get_contextual_spatial_description'):
|
| 67 |
+
spatial_desc = region_analyzer.get_contextual_spatial_description(region, object_type)
|
| 68 |
+
else:
|
| 69 |
+
spatial_desc = region_analyzer.get_spatial_description_phrase(region)
|
| 70 |
+
|
| 71 |
+
if spatial_desc:
|
| 72 |
+
return spatial_desc
|
| 73 |
+
|
| 74 |
+
# 備用邏輯:使用改進的內建映射
|
| 75 |
+
clean_region = region.replace('_', ' ').strip().lower()
|
| 76 |
+
|
| 77 |
+
region_map = {
|
| 78 |
+
"top left": "in the upper left area",
|
| 79 |
+
"top center": "in the upper area",
|
| 80 |
+
"top right": "in the upper right area",
|
| 81 |
+
"middle left": "on the left side",
|
| 82 |
+
"middle center": "in the center",
|
| 83 |
+
"center": "in the center",
|
| 84 |
+
"middle right": "on the right side",
|
| 85 |
+
"bottom left": "in the lower left area",
|
| 86 |
+
"bottom center": "in the lower area",
|
| 87 |
+
"bottom right": "in the lower right area"
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
# 直接映射匹配
|
| 91 |
+
if clean_region in region_map:
|
| 92 |
+
return region_map[clean_region]
|
| 93 |
+
|
| 94 |
+
# 比較模糊籠統的方位匹配
|
| 95 |
+
if "top" in clean_region and "left" in clean_region:
|
| 96 |
+
return "in the upper left area"
|
| 97 |
+
elif "top" in clean_region and "right" in clean_region:
|
| 98 |
+
return "in the upper right area"
|
| 99 |
+
elif "bottom" in clean_region and "left" in clean_region:
|
| 100 |
+
return "in the lower left area"
|
| 101 |
+
elif "bottom" in clean_region and "right" in clean_region:
|
| 102 |
+
return "in the lower right area"
|
| 103 |
+
elif "top" in clean_region:
|
| 104 |
+
return "in the upper area"
|
| 105 |
+
elif "bottom" in clean_region:
|
| 106 |
+
return "in the lower area"
|
| 107 |
+
elif "left" in clean_region:
|
| 108 |
+
return "on the left side"
|
| 109 |
+
elif "right" in clean_region:
|
| 110 |
+
return "on the right side"
|
| 111 |
+
elif "center" in clean_region or "middle" in clean_region:
|
| 112 |
+
return "in the center"
|
| 113 |
+
|
| 114 |
+
# 如果region無法辨識,使用normalized_center作為備用
|
| 115 |
+
norm_center = obj.get("normalized_center")
|
| 116 |
+
if norm_center and image_width and image_height:
|
| 117 |
+
x_norm, y_norm = norm_center
|
| 118 |
+
h_pos = "left" if x_norm < 0.4 else "right" if x_norm > 0.6 else "center"
|
| 119 |
+
v_pos = "upper" if y_norm < 0.4 else "lower" if y_norm > 0.6 else "center"
|
| 120 |
+
|
| 121 |
+
if h_pos == "center" and v_pos == "center":
|
| 122 |
+
return "in the center"
|
| 123 |
+
return f"in the {v_pos} {h_pos} area"
|
| 124 |
+
|
| 125 |
+
# 如果所有方法都失敗,返回空字串
|
| 126 |
+
return ""
|
| 127 |
+
|
| 128 |
+
except Exception as e:
|
| 129 |
+
self.logger.warning(f"Error generating spatial description: {str(e)}")
|
| 130 |
+
return ""
|
| 131 |
+
|
| 132 |
+
def get_standardized_spatial_description(self, obj: Dict) -> str:
|
| 133 |
+
"""
|
| 134 |
+
使用RegionAnalyzer生成標準化空間描述的內部方法
|
| 135 |
+
|
| 136 |
+
Args:
|
| 137 |
+
obj: 物件字典
|
| 138 |
+
|
| 139 |
+
Returns:
|
| 140 |
+
str: 標準化空間描述,失敗時返回空字串
|
| 141 |
+
"""
|
| 142 |
+
try:
|
| 143 |
+
if hasattr(self, 'region_analyzer') and self.region_analyzer:
|
| 144 |
+
region = obj.get("region", "")
|
| 145 |
+
object_type = obj.get("class_name", "")
|
| 146 |
+
|
| 147 |
+
if hasattr(self.region_analyzer, 'get_contextual_spatial_description'):
|
| 148 |
+
return self.region_analyzer.get_contextual_spatial_description(region, object_type)
|
| 149 |
+
elif hasattr(self.region_analyzer, 'get_spatial_description_phrase'):
|
| 150 |
+
return self.region_analyzer.get_spatial_description_phrase(region)
|
| 151 |
+
|
| 152 |
+
return ""
|
| 153 |
+
|
| 154 |
+
except Exception as e:
|
| 155 |
+
self.logger.warning(f"Error getting standardized spatial description: {str(e)}")
|
| 156 |
+
object_type = obj.get("class_name", "")
|
| 157 |
+
if object_type:
|
| 158 |
+
return "visible in the scene"
|
| 159 |
+
return "present in the view"
|
| 160 |
+
|
| 161 |
+
def analyze_spatial_arrangement(self, class_name: str, scene_type: Optional[str],
|
| 162 |
+
detected_objects: Optional[List[Dict]],
|
| 163 |
+
count: int) -> Optional[str]:
|
| 164 |
+
"""
|
| 165 |
+
分析物件的空間排列模式並生成相應描述
|
| 166 |
+
|
| 167 |
+
Args:
|
| 168 |
+
class_name: 物件類別名稱
|
| 169 |
+
scene_type: 場景類型
|
| 170 |
+
detected_objects: 該類型的所有檢測物件
|
| 171 |
+
count: 物件數量
|
| 172 |
+
|
| 173 |
+
Returns:
|
| 174 |
+
Optional[str]: 空間排列描述,如果無法分析則返回None
|
| 175 |
+
"""
|
| 176 |
+
if not detected_objects or len(detected_objects) < 2:
|
| 177 |
+
return None
|
| 178 |
+
|
| 179 |
+
try:
|
| 180 |
+
# 提取物件的標準化位置
|
| 181 |
+
positions = []
|
| 182 |
+
for obj in detected_objects:
|
| 183 |
+
center = obj.get("normalized_center", [0.5, 0.5])
|
| 184 |
+
if isinstance(center, (list, tuple)) and len(center) >= 2:
|
| 185 |
+
positions.append(center)
|
| 186 |
+
|
| 187 |
+
if len(positions) < 2:
|
| 188 |
+
return None
|
| 189 |
+
|
| 190 |
+
# 分析排列模式
|
| 191 |
+
arrangement_pattern = self._analyze_arrangement_pattern(positions)
|
| 192 |
+
|
| 193 |
+
# 根據物件類型和場景生成描述
|
| 194 |
+
return self._generate_arrangement_description(class_name, scene_type,
|
| 195 |
+
arrangement_pattern, count)
|
| 196 |
+
|
| 197 |
+
except Exception as e:
|
| 198 |
+
self.logger.warning(f"Error analyzing spatial arrangement: {str(e)}")
|
| 199 |
+
return None
|
| 200 |
+
|
| 201 |
+
def _analyze_arrangement_pattern(self, positions: List[List[float]]) -> str:
|
| 202 |
+
"""
|
| 203 |
+
分析位置點的排列模式
|
| 204 |
+
|
| 205 |
+
Args:
|
| 206 |
+
positions: 標準化的位置座標列表
|
| 207 |
+
|
| 208 |
+
Returns:
|
| 209 |
+
str: 排列模式類型(linear, clustered, scattered, circular等)
|
| 210 |
+
"""
|
| 211 |
+
if len(positions) < 2:
|
| 212 |
+
return "single"
|
| 213 |
+
|
| 214 |
+
# 轉換為numpy陣列便於計算
|
| 215 |
+
pos_array = np.array(positions)
|
| 216 |
+
|
| 217 |
+
# 計算位置的分布特徵
|
| 218 |
+
x_coords = pos_array[:, 0]
|
| 219 |
+
y_coords = pos_array[:, 1]
|
| 220 |
+
|
| 221 |
+
# 分析x和y方向的變異程度
|
| 222 |
+
x_variance = np.var(x_coords)
|
| 223 |
+
y_variance = np.var(y_coords)
|
| 224 |
+
|
| 225 |
+
# 計算物件間的平均距離
|
| 226 |
+
distances = []
|
| 227 |
+
for i in range(len(positions)):
|
| 228 |
+
for j in range(i + 1, len(positions)):
|
| 229 |
+
dist = np.sqrt((positions[i][0] - positions[j][0])**2 +
|
| 230 |
+
(positions[i][1] - positions[j][1])**2)
|
| 231 |
+
distances.append(dist)
|
| 232 |
+
|
| 233 |
+
avg_distance = np.mean(distances) if distances else 0
|
| 234 |
+
distance_variance = np.var(distances) if distances else 0
|
| 235 |
+
|
| 236 |
+
# 判斷排列模式
|
| 237 |
+
if len(positions) >= 4 and self._is_circular_pattern(positions):
|
| 238 |
+
return "circular"
|
| 239 |
+
elif x_variance < 0.05 or y_variance < 0.05: # 一個方向變異很小
|
| 240 |
+
return "linear"
|
| 241 |
+
elif avg_distance < 0.3 and distance_variance < 0.02: # 物件聚集且距離相近
|
| 242 |
+
return "clustered"
|
| 243 |
+
elif avg_distance > 0.6: # 物件分散
|
| 244 |
+
return "scattered"
|
| 245 |
+
elif distance_variance < 0.03: # 距離一致,可能是規則排列
|
| 246 |
+
return "regular"
|
| 247 |
+
else:
|
| 248 |
+
return "distributed"
|
| 249 |
+
|
| 250 |
+
def _is_circular_pattern(self, positions: List[List[float]]) -> bool:
|
| 251 |
+
"""
|
| 252 |
+
檢查位置是否形成圓形或環形排列
|
| 253 |
+
|
| 254 |
+
Args:
|
| 255 |
+
positions: 位置座標列表
|
| 256 |
+
|
| 257 |
+
Returns:
|
| 258 |
+
bool: 是否為圓形排列
|
| 259 |
+
"""
|
| 260 |
+
if len(positions) < 4:
|
| 261 |
+
return False
|
| 262 |
+
|
| 263 |
+
try:
|
| 264 |
+
pos_array = np.array(positions)
|
| 265 |
+
|
| 266 |
+
# 計算中心點
|
| 267 |
+
center_x = np.mean(pos_array[:, 0])
|
| 268 |
+
center_y = np.mean(pos_array[:, 1])
|
| 269 |
+
|
| 270 |
+
# 計算每個點到中心的距離
|
| 271 |
+
distances_to_center = []
|
| 272 |
+
for pos in positions:
|
| 273 |
+
dist = np.sqrt((pos[0] - center_x)**2 + (pos[1] - center_y)**2)
|
| 274 |
+
distances_to_center.append(dist)
|
| 275 |
+
|
| 276 |
+
# 如果所有距離都相近,可能是圓形排列
|
| 277 |
+
distance_variance = np.var(distances_to_center)
|
| 278 |
+
return distance_variance < 0.05 and np.mean(distances_to_center) > 0.2
|
| 279 |
+
|
| 280 |
+
except:
|
| 281 |
+
return False
|
| 282 |
+
|
| 283 |
+
def _generate_arrangement_description(self, class_name: str, scene_type: Optional[str],
|
| 284 |
+
arrangement_pattern: str, count: int) -> Optional[str]:
|
| 285 |
+
"""
|
| 286 |
+
根據物件類型、場景和排列模式生成空間描述
|
| 287 |
+
|
| 288 |
+
Args:
|
| 289 |
+
class_name: 物件類別名稱
|
| 290 |
+
scene_type: 場景類型
|
| 291 |
+
arrangement_pattern: 排列模式
|
| 292 |
+
count: 物件數量
|
| 293 |
+
|
| 294 |
+
Returns:
|
| 295 |
+
Optional[str]: 生成的空間排列描述
|
| 296 |
+
"""
|
| 297 |
+
# 基於物件類型的描述模板
|
| 298 |
+
arrangement_templates = {
|
| 299 |
+
"chair": {
|
| 300 |
+
"linear": "arranged in a row",
|
| 301 |
+
"clustered": "grouped together for conversation",
|
| 302 |
+
"circular": "arranged around the table",
|
| 303 |
+
"scattered": "positioned throughout the space",
|
| 304 |
+
"regular": "evenly spaced",
|
| 305 |
+
"distributed": "thoughtfully positioned"
|
| 306 |
+
},
|
| 307 |
+
"dining table": {
|
| 308 |
+
"linear": "aligned to create a unified dining space",
|
| 309 |
+
"clustered": "grouped to form intimate dining areas",
|
| 310 |
+
"scattered": "distributed to optimize space flow",
|
| 311 |
+
"regular": "systematically positioned",
|
| 312 |
+
"distributed": "strategically placed"
|
| 313 |
+
},
|
| 314 |
+
"car": {
|
| 315 |
+
"linear": "parked in sequence",
|
| 316 |
+
"clustered": "grouped in the parking area",
|
| 317 |
+
"scattered": "distributed throughout the lot",
|
| 318 |
+
"regular": "neatly parked",
|
| 319 |
+
"distributed": "positioned across the area"
|
| 320 |
+
},
|
| 321 |
+
"person": {
|
| 322 |
+
"linear": "moving in a line",
|
| 323 |
+
"clustered": "gathered together",
|
| 324 |
+
"circular": "forming a circle",
|
| 325 |
+
"scattered": "spread across the area",
|
| 326 |
+
"distributed": "positioned throughout the scene"
|
| 327 |
+
}
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
# 獲取對應的描述模板
|
| 331 |
+
if class_name in arrangement_templates:
|
| 332 |
+
template_dict = arrangement_templates[class_name]
|
| 333 |
+
base_description = template_dict.get(arrangement_pattern, "positioned in the scene")
|
| 334 |
+
else:
|
| 335 |
+
# 通用的排列描述
|
| 336 |
+
generic_templates = {
|
| 337 |
+
"linear": "arranged in a line",
|
| 338 |
+
"clustered": "grouped together",
|
| 339 |
+
"circular": "arranged in a circular pattern",
|
| 340 |
+
"scattered": "distributed across the space",
|
| 341 |
+
"regular": "evenly positioned",
|
| 342 |
+
"distributed": "thoughtfully placed"
|
| 343 |
+
}
|
| 344 |
+
base_description = generic_templates.get(arrangement_pattern, "positioned in the scene")
|
| 345 |
+
|
| 346 |
+
return base_description
|
specialized_scene_processor.py
ADDED
|
@@ -0,0 +1,527 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import logging
|
| 3 |
+
import traceback
|
| 4 |
+
import numpy as np
|
| 5 |
+
from typing import Dict, List, Any, Optional
|
| 6 |
+
|
| 7 |
+
logger = logging.getLogger(__name__)
|
| 8 |
+
|
| 9 |
+
class SpecializedSceneProcessor:
|
| 10 |
+
"""
|
| 11 |
+
負責處理特殊場景類型和地標識別
|
| 12 |
+
包含亞洲文化場景、高級餐飲、金融區、空中視角等專門處理邏輯
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
def __init__(self):
|
| 16 |
+
"""初始化特殊場景處理器"""
|
| 17 |
+
try:
|
| 18 |
+
logger.info("SpecializedSceneProcessor initialized successfully")
|
| 19 |
+
except Exception as e:
|
| 20 |
+
logger.error(f"Failed to initialize SpecializedSceneProcessor: {str(e)}")
|
| 21 |
+
logger.error(traceback.format_exc())
|
| 22 |
+
raise
|
| 23 |
+
|
| 24 |
+
def identify_aerial_intersection_features(self, detected_objects: List[Dict]) -> Dict:
|
| 25 |
+
"""
|
| 26 |
+
空中視角十字路口特徵
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
detected_objects: 檢測到的物件列表
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
十字路口特徵區域字典
|
| 33 |
+
"""
|
| 34 |
+
try:
|
| 35 |
+
zones = {}
|
| 36 |
+
|
| 37 |
+
# 檢查交通信號
|
| 38 |
+
traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]
|
| 39 |
+
if traffic_light_objs:
|
| 40 |
+
zones["traffic_control_pattern"] = {
|
| 41 |
+
"region": "intersection",
|
| 42 |
+
"objects": ["traffic light"] * len(traffic_light_objs),
|
| 43 |
+
"description": f"Intersection traffic control with {len(traffic_light_objs)} signals visible from above"
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
# 人行道從空中視角的情境推斷
|
| 47 |
+
zones["crossing_pattern"] = {
|
| 48 |
+
"region": "central",
|
| 49 |
+
"objects": ["inferred crosswalk"],
|
| 50 |
+
"description": "Crossing pattern visible from aerial perspective"
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
return zones
|
| 54 |
+
|
| 55 |
+
except Exception as e:
|
| 56 |
+
logger.error(f"Error identifying aerial intersection features: {str(e)}")
|
| 57 |
+
logger.error(traceback.format_exc())
|
| 58 |
+
return {}
|
| 59 |
+
|
| 60 |
+
def identify_aerial_plaza_features(self, people_objs: List[Dict]) -> Dict:
|
| 61 |
+
"""
|
| 62 |
+
識別空中視角廣場特徵
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
people_objs: 行人物件列表
|
| 66 |
+
|
| 67 |
+
Returns:
|
| 68 |
+
廣場特徵區域字典
|
| 69 |
+
"""
|
| 70 |
+
try:
|
| 71 |
+
zones = {}
|
| 72 |
+
|
| 73 |
+
if people_objs:
|
| 74 |
+
# 檢查人群是否聚集在中央區域
|
| 75 |
+
central_people = [obj for obj in people_objs
|
| 76 |
+
if "middle" in obj["region"]]
|
| 77 |
+
|
| 78 |
+
if central_people:
|
| 79 |
+
zones["central_gathering"] = {
|
| 80 |
+
"region": "middle_center",
|
| 81 |
+
"objects": ["person"] * len(central_people),
|
| 82 |
+
"description": f"Central plaza gathering area with {len(central_people)} people viewed from above"
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
return zones
|
| 86 |
+
|
| 87 |
+
except Exception as e:
|
| 88 |
+
logger.error(f"Error identifying aerial plaza features: {str(e)}")
|
| 89 |
+
logger.error(traceback.format_exc())
|
| 90 |
+
return {}
|
| 91 |
+
|
| 92 |
+
def identify_asian_pedestrian_pathway(self, detected_objects: List[Dict]) -> Dict:
|
| 93 |
+
"""
|
| 94 |
+
亞洲文化場景中的行人通道
|
| 95 |
+
|
| 96 |
+
Args:
|
| 97 |
+
detected_objects: 檢測到的物件列表
|
| 98 |
+
|
| 99 |
+
Returns:
|
| 100 |
+
行人通道區域字典
|
| 101 |
+
"""
|
| 102 |
+
try:
|
| 103 |
+
zones = {}
|
| 104 |
+
|
| 105 |
+
pathway_items = []
|
| 106 |
+
pathway_regions = {}
|
| 107 |
+
|
| 108 |
+
# 提取人群用於通道分析
|
| 109 |
+
people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
|
| 110 |
+
|
| 111 |
+
# 分析人群是否形成線形(商業街的特徵)
|
| 112 |
+
people_positions = [obj["normalized_center"] for obj in people_objs]
|
| 113 |
+
|
| 114 |
+
structured_path = False
|
| 115 |
+
path_direction = "meandering"
|
| 116 |
+
|
| 117 |
+
if len(people_positions) >= 3:
|
| 118 |
+
# 檢查人群是否沿相似y坐標排列(水平路徑)
|
| 119 |
+
y_coords = [pos[1] for pos in people_positions]
|
| 120 |
+
y_mean = sum(y_coords) / len(y_coords)
|
| 121 |
+
y_variance = sum((y - y_mean)**2 for y in y_coords) / len(y_coords)
|
| 122 |
+
|
| 123 |
+
horizontal_path = y_variance < 0.05 # 低變異表示水平對齊
|
| 124 |
+
|
| 125 |
+
# 檢查人群是否沿相似x坐標排列(垂直路徑)
|
| 126 |
+
x_coords = [pos[0] for pos in people_positions]
|
| 127 |
+
x_mean = sum(x_coords) / len(x_coords)
|
| 128 |
+
x_variance = sum((x - x_mean)**2 for x in x_coords) / len(x_coords)
|
| 129 |
+
|
| 130 |
+
vertical_path = x_variance < 0.05 # 低變異表示垂直對齊
|
| 131 |
+
|
| 132 |
+
structured_path = horizontal_path or vertical_path
|
| 133 |
+
path_direction = "horizontal" if horizontal_path else "vertical" if vertical_path else "meandering"
|
| 134 |
+
|
| 135 |
+
# 收集通道物件(人、自行車、摩托車在中間區域)
|
| 136 |
+
for obj in detected_objects:
|
| 137 |
+
if obj["class_id"] in [0, 1, 3]: # Person, bicycle, motorcycle
|
| 138 |
+
y_pos = obj["normalized_center"][1]
|
| 139 |
+
# 按垂直位置分組(圖像中間可能是通道)
|
| 140 |
+
if 0.25 <= y_pos <= 0.75:
|
| 141 |
+
region = obj["region"]
|
| 142 |
+
if region not in pathway_regions:
|
| 143 |
+
pathway_regions[region] = []
|
| 144 |
+
pathway_regions[region].append(obj)
|
| 145 |
+
pathway_items.append(obj["class_name"])
|
| 146 |
+
|
| 147 |
+
if pathway_items:
|
| 148 |
+
path_desc = "Pedestrian walkway with people moving through the commercial area"
|
| 149 |
+
if structured_path:
|
| 150 |
+
path_desc = f"{path_direction.capitalize()} pedestrian walkway with organized foot traffic"
|
| 151 |
+
|
| 152 |
+
zones["pedestrian_pathway"] = {
|
| 153 |
+
"region": "middle_center", # 通道通常會在中間area
|
| 154 |
+
"objects": list(set(pathway_items)),
|
| 155 |
+
"description": path_desc
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
return zones
|
| 159 |
+
|
| 160 |
+
except Exception as e:
|
| 161 |
+
logger.error(f"Error identifying Asian pedestrian pathway: {str(e)}")
|
| 162 |
+
logger.error(traceback.format_exc())
|
| 163 |
+
return {}
|
| 164 |
+
|
| 165 |
+
def identify_vendor_zones(self, detected_objects: List[Dict]) -> Dict:
|
| 166 |
+
"""
|
| 167 |
+
識別攤販區域
|
| 168 |
+
|
| 169 |
+
Args:
|
| 170 |
+
detected_objects: 檢測到的物件列表
|
| 171 |
+
|
| 172 |
+
Returns:
|
| 173 |
+
攤販區域字典
|
| 174 |
+
"""
|
| 175 |
+
try:
|
| 176 |
+
zones = {}
|
| 177 |
+
|
| 178 |
+
# 識別攤販區域(小攤/商店 - 從情境推斷)
|
| 179 |
+
has_small_objects = any(obj["class_id"] in [24, 26, 39, 41] for obj in detected_objects) # bags, bottles, cups
|
| 180 |
+
has_people = any(obj["class_id"] == 0 for obj in detected_objects)
|
| 181 |
+
|
| 182 |
+
if has_small_objects and has_people:
|
| 183 |
+
# 可能的攤販區域是人群和小物件聚集的地方
|
| 184 |
+
small_obj_regions = {}
|
| 185 |
+
|
| 186 |
+
for obj in detected_objects:
|
| 187 |
+
if obj["class_id"] in [24, 26, 39, 41, 67]: # bags, bottles, cups, phones
|
| 188 |
+
region = obj["region"]
|
| 189 |
+
if region not in small_obj_regions:
|
| 190 |
+
small_obj_regions[region] = []
|
| 191 |
+
small_obj_regions[region].append(obj)
|
| 192 |
+
|
| 193 |
+
if small_obj_regions:
|
| 194 |
+
main_vendor_region = max(small_obj_regions.items(),
|
| 195 |
+
key=lambda x: len(x[1]),
|
| 196 |
+
default=(None, []))
|
| 197 |
+
|
| 198 |
+
if main_vendor_region[0] is not None:
|
| 199 |
+
vendor_items = [obj["class_name"] for obj in main_vendor_region[1]]
|
| 200 |
+
zones["vendor_zone"] = {
|
| 201 |
+
"region": main_vendor_region[0],
|
| 202 |
+
"objects": list(set(vendor_items)),
|
| 203 |
+
"description": "Vendor or market stall area with small merchandise"
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
return zones
|
| 207 |
+
|
| 208 |
+
except Exception as e:
|
| 209 |
+
logger.error(f"Error identifying vendor zones: {str(e)}")
|
| 210 |
+
logger.error(traceback.format_exc())
|
| 211 |
+
return {}
|
| 212 |
+
|
| 213 |
+
def identify_upscale_decorative_zones(self, detected_objects: List[Dict]) -> Dict:
|
| 214 |
+
"""
|
| 215 |
+
識別高級餐飲的裝飾區域
|
| 216 |
+
|
| 217 |
+
Args:
|
| 218 |
+
detected_objects: 檢測到的物件列表
|
| 219 |
+
|
| 220 |
+
Returns:
|
| 221 |
+
裝飾區域字典
|
| 222 |
+
"""
|
| 223 |
+
try:
|
| 224 |
+
zones = {}
|
| 225 |
+
|
| 226 |
+
decor_items = []
|
| 227 |
+
decor_regions = {}
|
| 228 |
+
|
| 229 |
+
# 尋找裝飾元素(花瓶、酒杯、未使用的餐具)
|
| 230 |
+
for obj in detected_objects:
|
| 231 |
+
if obj["class_id"] in [75, 40]: # Vase, wine glass
|
| 232 |
+
region = obj["region"]
|
| 233 |
+
if region not in decor_regions:
|
| 234 |
+
decor_regions[region] = []
|
| 235 |
+
decor_regions[region].append(obj)
|
| 236 |
+
decor_items.append(obj["class_name"])
|
| 237 |
+
|
| 238 |
+
if decor_items:
|
| 239 |
+
main_decor_region = max(decor_regions.items(),
|
| 240 |
+
key=lambda x: len(x[1]),
|
| 241 |
+
default=(None, []))
|
| 242 |
+
|
| 243 |
+
if main_decor_region[0] is not None:
|
| 244 |
+
zones["decorative_zone"] = {
|
| 245 |
+
"region": main_decor_region[0],
|
| 246 |
+
"objects": list(set(decor_items)),
|
| 247 |
+
"description": f"Decorative area with {', '.join(list(set(decor_items)))}"
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
return zones
|
| 251 |
+
|
| 252 |
+
except Exception as e:
|
| 253 |
+
logger.error(f"Error identifying upscale decorative zones: {str(e)}")
|
| 254 |
+
logger.error(traceback.format_exc())
|
| 255 |
+
return {}
|
| 256 |
+
|
| 257 |
+
def identify_dining_seating_zones(self, detected_objects: List[Dict]) -> Dict:
|
| 258 |
+
"""
|
| 259 |
+
識別餐廳座位安排區域
|
| 260 |
+
|
| 261 |
+
Args:
|
| 262 |
+
detected_objects: 檢測到的物件列表
|
| 263 |
+
|
| 264 |
+
Returns:
|
| 265 |
+
座位區域字典
|
| 266 |
+
"""
|
| 267 |
+
try:
|
| 268 |
+
zones = {}
|
| 269 |
+
|
| 270 |
+
# 識別座位安排區域
|
| 271 |
+
chairs = [obj for obj in detected_objects if obj["class_id"] == 56] # chairs
|
| 272 |
+
if len(chairs) >= 2:
|
| 273 |
+
chair_regions = {}
|
| 274 |
+
for obj in chairs:
|
| 275 |
+
region = obj["region"]
|
| 276 |
+
if region not in chair_regions:
|
| 277 |
+
chair_regions[region] = []
|
| 278 |
+
chair_regions[region].append(obj)
|
| 279 |
+
|
| 280 |
+
if chair_regions:
|
| 281 |
+
main_seating_region = max(chair_regions.items(),
|
| 282 |
+
key=lambda x: len(x[1]),
|
| 283 |
+
default=(None, []))
|
| 284 |
+
|
| 285 |
+
if main_seating_region[0] is not None:
|
| 286 |
+
zones["dining_seating_zone"] = {
|
| 287 |
+
"region": main_seating_region[0],
|
| 288 |
+
"objects": ["chair"] * len(main_seating_region[1]),
|
| 289 |
+
"description": f"Formal dining seating arrangement with {len(main_seating_region[1])} chairs"
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
return zones
|
| 293 |
+
|
| 294 |
+
except Exception as e:
|
| 295 |
+
logger.error(f"Error identifying dining seating zones: {str(e)}")
|
| 296 |
+
logger.error(traceback.format_exc())
|
| 297 |
+
return {}
|
| 298 |
+
|
| 299 |
+
def identify_serving_zones(self, detected_objects: List[Dict], existing_zones: Dict) -> Dict:
|
| 300 |
+
"""
|
| 301 |
+
識別服務區域
|
| 302 |
+
|
| 303 |
+
Args:
|
| 304 |
+
detected_objects: 檢測到的物件列表
|
| 305 |
+
existing_zones: 已存在的功能區域
|
| 306 |
+
|
| 307 |
+
Returns:
|
| 308 |
+
服務區域字典
|
| 309 |
+
"""
|
| 310 |
+
try:
|
| 311 |
+
zones = {}
|
| 312 |
+
|
| 313 |
+
serving_items = []
|
| 314 |
+
serving_regions = {}
|
| 315 |
+
|
| 316 |
+
# 服務區域可能有瓶子、碗、容器
|
| 317 |
+
for obj in detected_objects:
|
| 318 |
+
if obj["class_id"] in [39, 45]: # Bottle, bowl
|
| 319 |
+
# 檢查是否在與主餐桌不同的區域
|
| 320 |
+
if "formal_dining_zone" in existing_zones and obj["region"] != existing_zones["formal_dining_zone"]["region"]:
|
| 321 |
+
region = obj["region"]
|
| 322 |
+
if region not in serving_regions:
|
| 323 |
+
serving_regions[region] = []
|
| 324 |
+
serving_regions[region].append(obj)
|
| 325 |
+
serving_items.append(obj["class_name"])
|
| 326 |
+
|
| 327 |
+
if serving_items:
|
| 328 |
+
main_serving_region = max(serving_regions.items(),
|
| 329 |
+
key=lambda x: len(x[1]),
|
| 330 |
+
default=(None, []))
|
| 331 |
+
|
| 332 |
+
if main_serving_region[0] is not None:
|
| 333 |
+
zones["serving_zone"] = {
|
| 334 |
+
"region": main_serving_region[0],
|
| 335 |
+
"objects": list(set(serving_items)),
|
| 336 |
+
"description": f"Serving or sideboard area with {', '.join(list(set(serving_items)))}"
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
return zones
|
| 340 |
+
|
| 341 |
+
except Exception as e:
|
| 342 |
+
logger.error(f"Error identifying serving zones: {str(e)}")
|
| 343 |
+
logger.error(traceback.format_exc())
|
| 344 |
+
return {}
|
| 345 |
+
|
| 346 |
+
def identify_building_zones(self, detected_objects: List[Dict]) -> Dict:
|
| 347 |
+
"""
|
| 348 |
+
識別建築區域(從場景情境推斷)
|
| 349 |
+
|
| 350 |
+
Args:
|
| 351 |
+
detected_objects: 檢測到的物件列表
|
| 352 |
+
|
| 353 |
+
Returns:
|
| 354 |
+
建築區域字典
|
| 355 |
+
"""
|
| 356 |
+
try:
|
| 357 |
+
zones = {}
|
| 358 |
+
|
| 359 |
+
# 側邊建築區域(從場景情境推斷)
|
| 360 |
+
# 檢查是否有實際可能包含建築物的區域
|
| 361 |
+
left_side_regions = ["top_left", "middle_left", "bottom_left"]
|
| 362 |
+
right_side_regions = ["top_right", "middle_right", "bottom_right"]
|
| 363 |
+
|
| 364 |
+
# 檢查左側
|
| 365 |
+
left_building_evidence = True
|
| 366 |
+
for region in left_side_regions:
|
| 367 |
+
# 如果此區域有很多車輛或人群,不太可能是建築物
|
| 368 |
+
vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
|
| 369 |
+
for obj in detected_objects)
|
| 370 |
+
people_in_region = any(obj["region"] == region and obj["class_id"] == 0
|
| 371 |
+
for obj in detected_objects)
|
| 372 |
+
|
| 373 |
+
if vehicle_in_region or people_in_region:
|
| 374 |
+
left_building_evidence = False
|
| 375 |
+
break
|
| 376 |
+
|
| 377 |
+
# 檢查右側
|
| 378 |
+
right_building_evidence = True
|
| 379 |
+
for region in right_side_regions:
|
| 380 |
+
# 如果此區域有很多車輛或人群,不太可能是建築物
|
| 381 |
+
vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
|
| 382 |
+
for obj in detected_objects)
|
| 383 |
+
people_in_region = any(obj["region"] == region and obj["class_id"] == 0
|
| 384 |
+
for obj in detected_objects)
|
| 385 |
+
|
| 386 |
+
if vehicle_in_region or people_in_region:
|
| 387 |
+
right_building_evidence = False
|
| 388 |
+
break
|
| 389 |
+
|
| 390 |
+
# 如果證據支持,添加建築區域
|
| 391 |
+
if left_building_evidence:
|
| 392 |
+
zones["building_zone_left"] = {
|
| 393 |
+
"region": "middle_left",
|
| 394 |
+
"objects": ["building"], # 推斷
|
| 395 |
+
"description": "Tall buildings line the left side of the street"
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
if right_building_evidence:
|
| 399 |
+
zones["building_zone_right"] = {
|
| 400 |
+
"region": "middle_right",
|
| 401 |
+
"objects": ["building"], # 推斷
|
| 402 |
+
"description": "Tall buildings line the right side of the street"
|
| 403 |
+
}
|
| 404 |
+
|
| 405 |
+
return zones
|
| 406 |
+
|
| 407 |
+
except Exception as e:
|
| 408 |
+
logger.error(f"Error identifying building zones: {str(e)}")
|
| 409 |
+
logger.error(traceback.format_exc())
|
| 410 |
+
return {}
|
| 411 |
+
|
| 412 |
+
def identify_financial_pedestrian_zones(self, detected_objects: List[Dict]) -> Dict:
|
| 413 |
+
"""
|
| 414 |
+
識別金融區的行人區域
|
| 415 |
+
|
| 416 |
+
Args:
|
| 417 |
+
detected_objects: 檢測到的物件列表
|
| 418 |
+
|
| 419 |
+
Returns:
|
| 420 |
+
行人區域字典
|
| 421 |
+
"""
|
| 422 |
+
try:
|
| 423 |
+
zones = {}
|
| 424 |
+
|
| 425 |
+
# 辨識行人區域(如果有人群)
|
| 426 |
+
people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
|
| 427 |
+
if people_objs:
|
| 428 |
+
people_regions = {}
|
| 429 |
+
for obj in people_objs:
|
| 430 |
+
region = obj["region"]
|
| 431 |
+
if region not in people_regions:
|
| 432 |
+
people_regions[region] = []
|
| 433 |
+
people_regions[region].append(obj)
|
| 434 |
+
|
| 435 |
+
if people_regions:
|
| 436 |
+
main_pedestrian_region = max(people_regions.items(),
|
| 437 |
+
key=lambda x: len(x[1]),
|
| 438 |
+
default=(None, []))
|
| 439 |
+
|
| 440 |
+
if main_pedestrian_region[0] is not None:
|
| 441 |
+
zones["pedestrian_zone"] = {
|
| 442 |
+
"region": main_pedestrian_region[0],
|
| 443 |
+
"objects": ["person"] * len(main_pedestrian_region[1]),
|
| 444 |
+
"description": f"Pedestrian area with {len(main_pedestrian_region[1])} people navigating the financial district"
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
return zones
|
| 448 |
+
|
| 449 |
+
except Exception as e:
|
| 450 |
+
logger.error(f"Error identifying financial pedestrian zones: {str(e)}")
|
| 451 |
+
logger.error(traceback.format_exc())
|
| 452 |
+
return {}
|
| 453 |
+
|
| 454 |
+
def create_landmark_auxiliary_zones(self, landmark: Dict, index: int) -> Dict:
|
| 455 |
+
"""
|
| 456 |
+
創建地標相關的輔助區域(攝影區、紀念品區等)
|
| 457 |
+
|
| 458 |
+
Args:
|
| 459 |
+
landmark: 地標物件字典
|
| 460 |
+
index: 地標索引
|
| 461 |
+
|
| 462 |
+
Returns:
|
| 463 |
+
輔助區域字典
|
| 464 |
+
"""
|
| 465 |
+
try:
|
| 466 |
+
auxiliary_zones = {}
|
| 467 |
+
landmark_region = landmark.get("region", "middle_center")
|
| 468 |
+
landmark_name = landmark.get("class_name", "Landmark")
|
| 469 |
+
|
| 470 |
+
# 創建攝影區
|
| 471 |
+
# 根據地標位置調整攝影區位置(地標前方通常是攝影區)
|
| 472 |
+
region_mapping = {
|
| 473 |
+
"top_left": "bottom_right",
|
| 474 |
+
"top_center": "bottom_center",
|
| 475 |
+
"top_right": "bottom_left",
|
| 476 |
+
"middle_left": "middle_right",
|
| 477 |
+
"middle_center": "bottom_center",
|
| 478 |
+
"middle_right": "middle_left",
|
| 479 |
+
"bottom_left": "top_right",
|
| 480 |
+
"bottom_center": "top_center",
|
| 481 |
+
"bottom_right": "top_left"
|
| 482 |
+
}
|
| 483 |
+
|
| 484 |
+
photo_region = region_mapping.get(landmark_region, landmark_region)
|
| 485 |
+
|
| 486 |
+
photo_key = f"{landmark_name.lower().replace(' ', '_')}_photography_spot"
|
| 487 |
+
auxiliary_zones[photo_key] = {
|
| 488 |
+
"name": f"{landmark_name} Photography Spot",
|
| 489 |
+
"description": f"Popular position for photographing {landmark_name} with optimal viewing angle.",
|
| 490 |
+
"objects": ["camera", "person", "cell phone"],
|
| 491 |
+
"region": photo_region,
|
| 492 |
+
"primary_function": "Tourist photography"
|
| 493 |
+
}
|
| 494 |
+
|
| 495 |
+
# 如果是著名地標,可能有紀念品販售區
|
| 496 |
+
if landmark.get("confidence", 0) > 0.7: # 高置信度地標更可能有紀念品區
|
| 497 |
+
# 根據地標位置找到適合的紀念品區位置(通常在地標附近但不直接在地標上)
|
| 498 |
+
adjacent_regions = {
|
| 499 |
+
"top_left": ["top_center", "middle_left"],
|
| 500 |
+
"top_center": ["top_left", "top_right"],
|
| 501 |
+
"top_right": ["top_center", "middle_right"],
|
| 502 |
+
"middle_left": ["top_left", "bottom_left"],
|
| 503 |
+
"middle_center": ["middle_left", "middle_right"],
|
| 504 |
+
"middle_right": ["top_right", "bottom_right"],
|
| 505 |
+
"bottom_left": ["middle_left", "bottom_center"],
|
| 506 |
+
"bottom_center": ["bottom_left", "bottom_right"],
|
| 507 |
+
"bottom_right": ["bottom_center", "middle_right"]
|
| 508 |
+
}
|
| 509 |
+
|
| 510 |
+
if landmark_region in adjacent_regions:
|
| 511 |
+
souvenir_region = adjacent_regions[landmark_region][0] # 選擇第一個相鄰區域
|
| 512 |
+
|
| 513 |
+
souvenir_key = f"{landmark_name.lower().replace(' ', '_')}_souvenir_area"
|
| 514 |
+
auxiliary_zones[souvenir_key] = {
|
| 515 |
+
"name": f"{landmark_name} Souvenir Area",
|
| 516 |
+
"description": f"Area where visitors can purchase souvenirs and memorabilia related to {landmark_name}.",
|
| 517 |
+
"objects": ["person", "handbag", "backpack"],
|
| 518 |
+
"region": souvenir_region,
|
| 519 |
+
"primary_function": "Tourism commerce"
|
| 520 |
+
}
|
| 521 |
+
|
| 522 |
+
return auxiliary_zones
|
| 523 |
+
|
| 524 |
+
except Exception as e:
|
| 525 |
+
logger.error(f"Error creating landmark auxiliary zones: {str(e)}")
|
| 526 |
+
logger.error(traceback.format_exc())
|
| 527 |
+
return {}
|
statistics_processor.py
ADDED
|
@@ -0,0 +1,343 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from typing import Dict, List, Optional, Any
|
| 3 |
+
|
| 4 |
+
class StatisticsProcessor:
|
| 5 |
+
"""
|
| 6 |
+
統計分析處理器 - 負責複雜的物件統計分析和數據轉換
|
| 7 |
+
|
| 8 |
+
此類別專門處理物件統計信息的深度分析、Places365信息處理,
|
| 9 |
+
以及基於統計數據生成替換內容的複雜邏輯。
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
def __init__(self):
|
| 13 |
+
"""初始化統計分析處理器"""
|
| 14 |
+
self.logger = logging.getLogger(self.__class__.__name__)
|
| 15 |
+
self.logger.debug("StatisticsProcessor initialized successfully")
|
| 16 |
+
|
| 17 |
+
def generate_statistics_replacements(self, object_statistics: Optional[Dict]) -> Dict[str, str]:
|
| 18 |
+
"""
|
| 19 |
+
基於物體統計信息生成模板替換內容
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
object_statistics: 物體統計信息
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
Dict[str, str]: 統計信息基礎的替換內容
|
| 26 |
+
"""
|
| 27 |
+
replacements = {}
|
| 28 |
+
|
| 29 |
+
if not object_statistics:
|
| 30 |
+
return replacements
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
# 處理植物元素
|
| 34 |
+
if "potted plant" in object_statistics:
|
| 35 |
+
count = object_statistics["potted plant"]["count"]
|
| 36 |
+
if count == 1:
|
| 37 |
+
replacements["plant_elements"] = "a potted plant"
|
| 38 |
+
elif count <= 3:
|
| 39 |
+
replacements["plant_elements"] = f"{count} potted plants"
|
| 40 |
+
else:
|
| 41 |
+
replacements["plant_elements"] = f"multiple potted plants ({count} total)"
|
| 42 |
+
|
| 43 |
+
# 處理座位(椅子)相關
|
| 44 |
+
if "chair" in object_statistics:
|
| 45 |
+
count = object_statistics["chair"]["count"]
|
| 46 |
+
|
| 47 |
+
# 使用統一的數字轉換邏輯
|
| 48 |
+
number_words = {
|
| 49 |
+
1: "one", 2: "two", 3: "three", 4: "four",
|
| 50 |
+
5: "five", 6: "six", 7: "seven", 8: "eight",
|
| 51 |
+
9: "nine", 10: "ten", 11: "eleven", 12: "twelve"
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
if count == 1:
|
| 55 |
+
replacements["seating"] = "a chair"
|
| 56 |
+
replacements["furniture"] = "a chair"
|
| 57 |
+
elif count in number_words:
|
| 58 |
+
word_count = number_words[count]
|
| 59 |
+
replacements["seating"] = f"{word_count} chairs"
|
| 60 |
+
replacements["furniture"] = f"{word_count} chairs"
|
| 61 |
+
elif count <= 20:
|
| 62 |
+
replacements["seating"] = f"several chairs"
|
| 63 |
+
replacements["furniture"] = f"several chairs"
|
| 64 |
+
else:
|
| 65 |
+
replacements["seating"] = f"numerous chairs ({count} total)"
|
| 66 |
+
replacements["furniture"] = f"numerous chairs"
|
| 67 |
+
|
| 68 |
+
# 處理混合家具情況(當存在多種家具類型時)
|
| 69 |
+
furniture_items = []
|
| 70 |
+
furniture_counts = []
|
| 71 |
+
|
| 72 |
+
# 收集所有家具類型的統計
|
| 73 |
+
for furniture_type in ["chair", "dining table", "couch", "bed"]:
|
| 74 |
+
if furniture_type in object_statistics:
|
| 75 |
+
count = object_statistics[furniture_type]["count"]
|
| 76 |
+
if count > 0:
|
| 77 |
+
furniture_items.append(furniture_type)
|
| 78 |
+
furniture_counts.append(count)
|
| 79 |
+
|
| 80 |
+
# 如果只有椅子,那就用上面的方式
|
| 81 |
+
# 如果有多種家具類型,生成組合描述
|
| 82 |
+
if len(furniture_items) > 1 and "furniture" not in replacements:
|
| 83 |
+
main_furniture = furniture_items[0] # 數量最多的家具類型
|
| 84 |
+
main_count = furniture_counts[0]
|
| 85 |
+
|
| 86 |
+
if main_furniture == "chair":
|
| 87 |
+
number_words = ["", "one", "two", "three", "four", "five", "six"]
|
| 88 |
+
if main_count <= 6:
|
| 89 |
+
replacements["furniture"] = f"{number_words[main_count]} chairs and other furniture"
|
| 90 |
+
else:
|
| 91 |
+
replacements["furniture"] = "multiple chairs and other furniture"
|
| 92 |
+
|
| 93 |
+
# 處理人員
|
| 94 |
+
if "person" in object_statistics:
|
| 95 |
+
count = object_statistics["person"]["count"]
|
| 96 |
+
if count == 1:
|
| 97 |
+
replacements["people_and_vehicles"] = "a person"
|
| 98 |
+
replacements["pedestrian_flow"] = "an individual walking"
|
| 99 |
+
elif count <= 5:
|
| 100 |
+
replacements["people_and_vehicles"] = f"{count} people"
|
| 101 |
+
replacements["pedestrian_flow"] = f"{count} people walking"
|
| 102 |
+
else:
|
| 103 |
+
replacements["people_and_vehicles"] = f"many people ({count} individuals)"
|
| 104 |
+
replacements["pedestrian_flow"] = f"a crowd of {count} people"
|
| 105 |
+
|
| 106 |
+
# 處理桌子設置
|
| 107 |
+
if "dining table" in object_statistics:
|
| 108 |
+
count = object_statistics["dining table"]["count"]
|
| 109 |
+
if count == 1:
|
| 110 |
+
replacements["table_setup"] = "a dining table"
|
| 111 |
+
replacements["table_description"] = "a dining surface"
|
| 112 |
+
else:
|
| 113 |
+
replacements["table_setup"] = f"{count} dining tables"
|
| 114 |
+
replacements["table_description"] = f"{count} dining surfaces"
|
| 115 |
+
|
| 116 |
+
self.logger.debug(f"Generated {len(replacements)} statistics-based replacements")
|
| 117 |
+
|
| 118 |
+
except Exception as e:
|
| 119 |
+
self.logger.warning(f"Error generating statistics replacements: {str(e)}")
|
| 120 |
+
|
| 121 |
+
return replacements
|
| 122 |
+
|
| 123 |
+
def generate_places365_replacements(self, places365_info: Optional[Dict]) -> Dict[str, str]:
|
| 124 |
+
"""
|
| 125 |
+
基於Places365信息生成模板替換內容
|
| 126 |
+
|
| 127 |
+
Args:
|
| 128 |
+
places365_info: Places365場景分類信息
|
| 129 |
+
|
| 130 |
+
Returns:
|
| 131 |
+
Dict[str, str]: Places365基礎的替換內容
|
| 132 |
+
"""
|
| 133 |
+
replacements = {}
|
| 134 |
+
|
| 135 |
+
if not places365_info or places365_info.get('confidence', 0) <= 0.35:
|
| 136 |
+
replacements["places365_context"] = ""
|
| 137 |
+
replacements["places365_atmosphere"] = ""
|
| 138 |
+
return replacements
|
| 139 |
+
|
| 140 |
+
try:
|
| 141 |
+
scene_label = places365_info.get('scene_label', '').replace('_', ' ')
|
| 142 |
+
attributes = places365_info.get('attributes', [])
|
| 143 |
+
|
| 144 |
+
# 生成場景上下文
|
| 145 |
+
if scene_label:
|
| 146 |
+
replacements["places365_context"] = f"characteristic of a {scene_label}"
|
| 147 |
+
else:
|
| 148 |
+
replacements["places365_context"] = ""
|
| 149 |
+
|
| 150 |
+
# 生成氛圍描述
|
| 151 |
+
if 'natural_lighting' in attributes:
|
| 152 |
+
replacements["places365_atmosphere"] = "with natural illumination"
|
| 153 |
+
elif 'artificial_lighting' in attributes:
|
| 154 |
+
replacements["places365_atmosphere"] = "under artificial lighting"
|
| 155 |
+
else:
|
| 156 |
+
replacements["places365_atmosphere"] = ""
|
| 157 |
+
|
| 158 |
+
self.logger.debug("Generated Places365-based replacements")
|
| 159 |
+
|
| 160 |
+
except Exception as e:
|
| 161 |
+
self.logger.warning(f"Error generating Places365 replacements: {str(e)}")
|
| 162 |
+
replacements["places365_context"] = ""
|
| 163 |
+
replacements["places365_atmosphere"] = ""
|
| 164 |
+
|
| 165 |
+
return replacements
|
| 166 |
+
|
| 167 |
+
def analyze_scene_composition(self, detected_objects: List[Dict]) -> Dict:
|
| 168 |
+
"""
|
| 169 |
+
分析場景組成以確定模板複雜度
|
| 170 |
+
|
| 171 |
+
Args:
|
| 172 |
+
detected_objects: 檢測到的物件列表
|
| 173 |
+
|
| 174 |
+
Returns:
|
| 175 |
+
Dict: 場景組成統計信息
|
| 176 |
+
"""
|
| 177 |
+
try:
|
| 178 |
+
total_objects = len(detected_objects)
|
| 179 |
+
|
| 180 |
+
# 統計不同類型的物件
|
| 181 |
+
object_categories = {}
|
| 182 |
+
for obj in detected_objects:
|
| 183 |
+
class_name = obj.get("class_name", "unknown")
|
| 184 |
+
object_categories[class_name] = object_categories.get(class_name, 0) + 1
|
| 185 |
+
|
| 186 |
+
# 計算場景多樣性
|
| 187 |
+
unique_categories = len(object_categories)
|
| 188 |
+
|
| 189 |
+
return {
|
| 190 |
+
"total_objects": total_objects,
|
| 191 |
+
"unique_categories": unique_categories,
|
| 192 |
+
"category_distribution": object_categories,
|
| 193 |
+
"complexity_score": min(total_objects * 0.3 + unique_categories * 0.7, 10)
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
except Exception as e:
|
| 197 |
+
self.logger.warning(f"Error analyzing scene composition: {str(e)}")
|
| 198 |
+
return {"total_objects": 0, "unique_categories": 0, "complexity_score": 0}
|
| 199 |
+
|
| 200 |
+
def generate_zone_descriptions(self, zone_data: Dict[str, Any], section: Dict[str, Any]) -> List[str]:
|
| 201 |
+
"""
|
| 202 |
+
生成功能區域描述
|
| 203 |
+
|
| 204 |
+
Args:
|
| 205 |
+
zone_data: 區域數據字典
|
| 206 |
+
section: 區域配置信息
|
| 207 |
+
|
| 208 |
+
Returns:
|
| 209 |
+
List[str]: 區域描述列表
|
| 210 |
+
"""
|
| 211 |
+
try:
|
| 212 |
+
descriptions = []
|
| 213 |
+
|
| 214 |
+
if not zone_data:
|
| 215 |
+
return descriptions
|
| 216 |
+
|
| 217 |
+
# 直接處理區域資料(zone_data 本身就是區域字典)
|
| 218 |
+
sorted_zones = sorted(zone_data.items(),
|
| 219 |
+
key=lambda x: len(x[1].get("objects", [])),
|
| 220 |
+
reverse=True)
|
| 221 |
+
|
| 222 |
+
for zone_name, zone_info in sorted_zones:
|
| 223 |
+
description = zone_info.get("description", "")
|
| 224 |
+
objects = zone_info.get("objects", [])
|
| 225 |
+
|
| 226 |
+
if objects:
|
| 227 |
+
# 使用現有描述或生成基於物件的描述
|
| 228 |
+
if description and not any(tech in description.lower() for tech in ['zone', 'area', 'region']):
|
| 229 |
+
zone_desc = description
|
| 230 |
+
else:
|
| 231 |
+
# 生成更自然的區域描述
|
| 232 |
+
clean_zone_name = zone_name.replace('_', ' ').replace(' area', '').replace(' zone', '')
|
| 233 |
+
object_list = ', '.join(objects[:3])
|
| 234 |
+
|
| 235 |
+
if 'crossing' in zone_name or 'pedestrian' in zone_name:
|
| 236 |
+
zone_desc = f"In the central crossing area, there are {object_list}."
|
| 237 |
+
elif 'vehicle' in zone_name or 'traffic' in zone_name:
|
| 238 |
+
zone_desc = f"The vehicle movement area includes {object_list}."
|
| 239 |
+
elif 'control' in zone_name:
|
| 240 |
+
zone_desc = f"Traffic control elements include {object_list}."
|
| 241 |
+
else:
|
| 242 |
+
zone_desc = f"The {clean_zone_name} contains {object_list}."
|
| 243 |
+
|
| 244 |
+
if len(objects) > 3:
|
| 245 |
+
zone_desc += f" Along with {len(objects) - 3} additional elements."
|
| 246 |
+
|
| 247 |
+
descriptions.append(zone_desc)
|
| 248 |
+
|
| 249 |
+
return descriptions
|
| 250 |
+
|
| 251 |
+
except Exception as e:
|
| 252 |
+
self.logger.error(f"Error generating zone descriptions: {str(e)}")
|
| 253 |
+
return []
|
| 254 |
+
|
| 255 |
+
def generate_object_summary(self, object_data: List[Dict], section: Dict[str, Any]) -> str:
|
| 256 |
+
"""
|
| 257 |
+
生成物件摘要描述
|
| 258 |
+
|
| 259 |
+
Args:
|
| 260 |
+
object_data: 物件數據列表
|
| 261 |
+
section: 摘要配置信息
|
| 262 |
+
|
| 263 |
+
Returns:
|
| 264 |
+
str: 物件摘要描述
|
| 265 |
+
"""
|
| 266 |
+
try:
|
| 267 |
+
if not object_data:
|
| 268 |
+
return ""
|
| 269 |
+
|
| 270 |
+
# 統計物件類型並計算重要性
|
| 271 |
+
object_stats = {}
|
| 272 |
+
for obj in object_data:
|
| 273 |
+
class_name = obj.get("class_name", "unknown")
|
| 274 |
+
confidence = obj.get("confidence", 0.5)
|
| 275 |
+
|
| 276 |
+
if class_name not in object_stats:
|
| 277 |
+
object_stats[class_name] = {"count": 0, "total_confidence": 0}
|
| 278 |
+
|
| 279 |
+
object_stats[class_name]["count"] += 1
|
| 280 |
+
object_stats[class_name]["total_confidence"] += confidence
|
| 281 |
+
|
| 282 |
+
# 按重要性排序(結合數量和置信度)
|
| 283 |
+
sorted_objects = []
|
| 284 |
+
for class_name, stats in object_stats.items():
|
| 285 |
+
count = stats["count"]
|
| 286 |
+
avg_confidence = stats["total_confidence"] / count
|
| 287 |
+
importance = count * 0.6 + avg_confidence * 0.4
|
| 288 |
+
sorted_objects.append((class_name, count, importance))
|
| 289 |
+
|
| 290 |
+
sorted_objects.sort(key=lambda x: x[2], reverse=True)
|
| 291 |
+
|
| 292 |
+
# 生成自然語言描述
|
| 293 |
+
descriptions = []
|
| 294 |
+
for class_name, count, _ in sorted_objects[:5]:
|
| 295 |
+
clean_name = class_name.replace('_', ' ')
|
| 296 |
+
if count == 1:
|
| 297 |
+
article = "an" if clean_name[0].lower() in 'aeiou' else "a"
|
| 298 |
+
descriptions.append(f"{article} {clean_name}")
|
| 299 |
+
else:
|
| 300 |
+
descriptions.append(f"{count} {clean_name}s")
|
| 301 |
+
|
| 302 |
+
if len(descriptions) == 1:
|
| 303 |
+
return f"The scene features {descriptions[0]}."
|
| 304 |
+
elif len(descriptions) == 2:
|
| 305 |
+
return f"The scene features {descriptions[0]} and {descriptions[1]}."
|
| 306 |
+
else:
|
| 307 |
+
main_items = ", ".join(descriptions[:-1])
|
| 308 |
+
return f"The scene features {main_items}, and {descriptions[-1]}."
|
| 309 |
+
|
| 310 |
+
except Exception as e:
|
| 311 |
+
self.logger.error(f"Error generating object summary: {str(e)}")
|
| 312 |
+
return ""
|
| 313 |
+
|
| 314 |
+
def generate_conclusion(self, template: Dict[str, Any], zone_data: Dict[str, Any],
|
| 315 |
+
object_data: List[Dict]) -> str:
|
| 316 |
+
"""
|
| 317 |
+
生成結論描述
|
| 318 |
+
|
| 319 |
+
Args:
|
| 320 |
+
template: 模板配置信息
|
| 321 |
+
zone_data: 區域數據
|
| 322 |
+
object_data: 物件數據
|
| 323 |
+
|
| 324 |
+
Returns:
|
| 325 |
+
str: 結論描述
|
| 326 |
+
"""
|
| 327 |
+
try:
|
| 328 |
+
scene_type = template.get("scene_type", "general")
|
| 329 |
+
zones_count = len(zone_data)
|
| 330 |
+
objects_count = len(object_data)
|
| 331 |
+
|
| 332 |
+
if scene_type == "indoor":
|
| 333 |
+
conclusion = f"This indoor environment demonstrates clear functional organization with {zones_count} distinct areas and {objects_count} identified objects."
|
| 334 |
+
elif scene_type == "outdoor":
|
| 335 |
+
conclusion = f"This outdoor scene shows dynamic activity patterns across {zones_count} functional zones with {objects_count} detected elements."
|
| 336 |
+
else:
|
| 337 |
+
conclusion = f"The scene analysis reveals {zones_count} functional areas containing {objects_count} identifiable objects."
|
| 338 |
+
|
| 339 |
+
return conclusion
|
| 340 |
+
|
| 341 |
+
except Exception as e:
|
| 342 |
+
self.logger.error(f"Error generating conclusion: {str(e)}")
|
| 343 |
+
return ""
|
template_manager.py
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
template_processor.py
ADDED
|
@@ -0,0 +1,429 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import traceback
|
| 3 |
+
import re
|
| 4 |
+
from typing import Dict, List, Optional, Union, Any
|
| 5 |
+
|
| 6 |
+
class TemplateProcessor:
|
| 7 |
+
"""
|
| 8 |
+
模板處理器 - 負責模板填充、後處理和結構化模板渲染
|
| 9 |
+
|
| 10 |
+
此類別專門處理模板的最終填充過程、文本格式化、
|
| 11 |
+
語法修復以及結構化模板的渲染邏輯。
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
def __init__(self):
|
| 15 |
+
"""初始化模板處理器"""
|
| 16 |
+
self.logger = logging.getLogger(self.__class__.__name__)
|
| 17 |
+
self.logger.debug("TemplateProcessor initialized successfully")
|
| 18 |
+
|
| 19 |
+
def preprocess_template(self, template: str) -> str:
|
| 20 |
+
"""
|
| 21 |
+
預處理模板,修復常見問題
|
| 22 |
+
|
| 23 |
+
Args:
|
| 24 |
+
template: 原始模板字符串
|
| 25 |
+
|
| 26 |
+
Returns:
|
| 27 |
+
str: 預處理後的模板
|
| 28 |
+
"""
|
| 29 |
+
try:
|
| 30 |
+
# 移除可能導致問題的模式
|
| 31 |
+
template = re.sub(r'\{[^}]*\}\s*,\s*\{[^}]*\}', '{combined_elements}', template)
|
| 32 |
+
|
| 33 |
+
# 確保模板不以逗號開始
|
| 34 |
+
template = re.sub(r'^[,\s]*', '', template)
|
| 35 |
+
|
| 36 |
+
return template.strip()
|
| 37 |
+
|
| 38 |
+
except Exception as e:
|
| 39 |
+
self.logger.warning(f"Error preprocessing template: {str(e)}")
|
| 40 |
+
return template
|
| 41 |
+
|
| 42 |
+
def postprocess_filled_template(self, filled_template: str) -> str:
|
| 43 |
+
"""
|
| 44 |
+
後處理填充完成的模板,修復語法問題
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
filled_template: 填充後的模板字符串
|
| 48 |
+
|
| 49 |
+
Returns:
|
| 50 |
+
str: 修復後的模板字符串
|
| 51 |
+
"""
|
| 52 |
+
try:
|
| 53 |
+
# 修復 "In , " 模式
|
| 54 |
+
filled_template = re.sub(r'\bIn\s*,\s*', 'In this scene, ', filled_template)
|
| 55 |
+
filled_template = re.sub(r'\bAt\s*,\s*', 'At this location, ', filled_template)
|
| 56 |
+
filled_template = re.sub(r'\bWithin\s*,\s*', 'Within this area, ', filled_template)
|
| 57 |
+
|
| 58 |
+
# 修復連續逗號
|
| 59 |
+
filled_template = re.sub(r',\s*,', ',', filled_template)
|
| 60 |
+
|
| 61 |
+
# 修復開頭的逗號
|
| 62 |
+
filled_template = re.sub(r'^[,\s]*', '', filled_template)
|
| 63 |
+
|
| 64 |
+
# 確保首字母大寫
|
| 65 |
+
if filled_template and not filled_template[0].isupper():
|
| 66 |
+
filled_template = filled_template[0].upper() + filled_template[1:]
|
| 67 |
+
|
| 68 |
+
# 確保以句號結尾
|
| 69 |
+
if filled_template and not filled_template.endswith(('.', '!', '?')):
|
| 70 |
+
filled_template += '.'
|
| 71 |
+
|
| 72 |
+
return filled_template.strip()
|
| 73 |
+
|
| 74 |
+
except Exception as e:
|
| 75 |
+
self.logger.warning(f"Error postprocessing filled template: {str(e)}")
|
| 76 |
+
return filled_template
|
| 77 |
+
|
| 78 |
+
def get_template_by_scene_type(self, scene_type: str, detected_objects: List[Dict],
|
| 79 |
+
functional_zones: Dict, template_repository) -> str:
|
| 80 |
+
"""
|
| 81 |
+
根據場景類型選擇合適的模板並進行標準化處理
|
| 82 |
+
|
| 83 |
+
Args:
|
| 84 |
+
scene_type: 場景類型
|
| 85 |
+
detected_objects: 檢測到的物件列表
|
| 86 |
+
functional_zones: 功能區域字典
|
| 87 |
+
template_repository: 模板庫實例
|
| 88 |
+
|
| 89 |
+
Returns:
|
| 90 |
+
str: 標準化後的模板字符串
|
| 91 |
+
"""
|
| 92 |
+
try:
|
| 93 |
+
# 獲取場景的物件統計信息
|
| 94 |
+
object_stats = self._analyze_scene_composition(detected_objects)
|
| 95 |
+
zone_count = len(functional_zones) if functional_zones else 0
|
| 96 |
+
|
| 97 |
+
# 根據場景複雜度和類型選擇模板
|
| 98 |
+
templates = template_repository.templates
|
| 99 |
+
if scene_type in templates:
|
| 100 |
+
scene_templates = templates[scene_type]
|
| 101 |
+
|
| 102 |
+
# 根據複雜度選擇合適的模板變體
|
| 103 |
+
if zone_count >= 3 and object_stats.get("total_objects", 0) >= 10:
|
| 104 |
+
template_key = "complex"
|
| 105 |
+
elif zone_count >= 2 or object_stats.get("total_objects", 0) >= 5:
|
| 106 |
+
template_key = "moderate"
|
| 107 |
+
else:
|
| 108 |
+
template_key = "simple"
|
| 109 |
+
|
| 110 |
+
if template_key in scene_templates:
|
| 111 |
+
raw_template = scene_templates[template_key]
|
| 112 |
+
else:
|
| 113 |
+
raw_template = scene_templates.get("default", scene_templates[list(scene_templates.keys())[0]])
|
| 114 |
+
else:
|
| 115 |
+
# 如果沒有特定場景的模板,使用通用模板
|
| 116 |
+
raw_template = self._get_generic_template(object_stats, zone_count)
|
| 117 |
+
|
| 118 |
+
# 標準化模板中的佔位符和格式
|
| 119 |
+
standardized_template = self._standardize_template_format(raw_template)
|
| 120 |
+
return standardized_template
|
| 121 |
+
|
| 122 |
+
except Exception as e:
|
| 123 |
+
self.logger.error(f"Error selecting template for scene type '{scene_type}': {str(e)}")
|
| 124 |
+
return self._get_fallback_template()
|
| 125 |
+
|
| 126 |
+
def _analyze_scene_composition(self, detected_objects: List[Dict]) -> Dict:
|
| 127 |
+
"""
|
| 128 |
+
分析場景組成以確定模板複雜度
|
| 129 |
+
|
| 130 |
+
Args:
|
| 131 |
+
detected_objects: 檢測到的物件列表
|
| 132 |
+
|
| 133 |
+
Returns:
|
| 134 |
+
Dict: 場景組成統計信息
|
| 135 |
+
"""
|
| 136 |
+
try:
|
| 137 |
+
total_objects = len(detected_objects)
|
| 138 |
+
|
| 139 |
+
# 統計不同類型的物件
|
| 140 |
+
object_categories = {}
|
| 141 |
+
for obj in detected_objects:
|
| 142 |
+
class_name = obj.get("class_name", "unknown")
|
| 143 |
+
object_categories[class_name] = object_categories.get(class_name, 0) + 1
|
| 144 |
+
|
| 145 |
+
# 計算場景多樣性
|
| 146 |
+
unique_categories = len(object_categories)
|
| 147 |
+
|
| 148 |
+
return {
|
| 149 |
+
"total_objects": total_objects,
|
| 150 |
+
"unique_categories": unique_categories,
|
| 151 |
+
"category_distribution": object_categories,
|
| 152 |
+
"complexity_score": min(total_objects * 0.3 + unique_categories * 0.7, 10)
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
except Exception as e:
|
| 156 |
+
self.logger.warning(f"Error analyzing scene composition: {str(e)}")
|
| 157 |
+
return {"total_objects": 0, "unique_categories": 0, "complexity_score": 0}
|
| 158 |
+
|
| 159 |
+
def _get_generic_template(self, object_stats: Dict, zone_count: int) -> str:
|
| 160 |
+
"""
|
| 161 |
+
獲取通用模板
|
| 162 |
+
|
| 163 |
+
Args:
|
| 164 |
+
object_stats: 物件統計信息
|
| 165 |
+
zone_count: 功能區域數量
|
| 166 |
+
|
| 167 |
+
Returns:
|
| 168 |
+
str: 通用模板字符串
|
| 169 |
+
"""
|
| 170 |
+
try:
|
| 171 |
+
complexity_score = object_stats.get("complexity_score", 0)
|
| 172 |
+
|
| 173 |
+
if complexity_score >= 7 or zone_count >= 3:
|
| 174 |
+
return "This scene presents a comprehensive view featuring {functional_area} with {primary_objects}. The spatial organization demonstrates {spatial_arrangement} across multiple {activity_areas}, creating a dynamic environment with diverse elements and clear functional zones."
|
| 175 |
+
elif complexity_score >= 4 or zone_count >= 2:
|
| 176 |
+
return "The scene displays {functional_area} containing {primary_objects}. The arrangement shows {spatial_organization} with distinct areas serving different purposes within the overall space."
|
| 177 |
+
else:
|
| 178 |
+
return "A {scene_description} featuring {primary_objects} arranged in {basic_layout} within the visible area."
|
| 179 |
+
|
| 180 |
+
except Exception as e:
|
| 181 |
+
self.logger.warning(f"Error getting generic template: {str(e)}")
|
| 182 |
+
return self._get_fallback_template()
|
| 183 |
+
|
| 184 |
+
def _get_fallback_template(self) -> str:
|
| 185 |
+
"""
|
| 186 |
+
獲取備用模板
|
| 187 |
+
|
| 188 |
+
Returns:
|
| 189 |
+
str: 備用模板字符串
|
| 190 |
+
"""
|
| 191 |
+
return "A scene featuring various elements and organized areas of activity within the visible space."
|
| 192 |
+
|
| 193 |
+
def _standardize_template_format(self, template: str) -> str:
|
| 194 |
+
"""
|
| 195 |
+
標準化模板格式,確保佔位符和表達方式符合自然語言要求
|
| 196 |
+
|
| 197 |
+
Args:
|
| 198 |
+
template: 原始模板字符串
|
| 199 |
+
|
| 200 |
+
Returns:
|
| 201 |
+
str: 標準化後的模板字符串
|
| 202 |
+
"""
|
| 203 |
+
try:
|
| 204 |
+
if not template:
|
| 205 |
+
return self._get_fallback_template()
|
| 206 |
+
|
| 207 |
+
standardized = template
|
| 208 |
+
|
| 209 |
+
# 標準化佔位符格式,移除技術性標記
|
| 210 |
+
placeholder_mapping = {
|
| 211 |
+
r'\{zone_\d+\}': '{functional_area}',
|
| 212 |
+
r'\{object_group_\d+\}': '{primary_objects}',
|
| 213 |
+
r'\{region_\d+\}': '{spatial_area}',
|
| 214 |
+
r'\{category_\d+\}': '{object_category}',
|
| 215 |
+
r'\{area_\d+\}': '{activity_area}',
|
| 216 |
+
r'\{section_\d+\}': '{scene_section}'
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
for pattern, replacement in placeholder_mapping.items():
|
| 220 |
+
standardized = re.sub(pattern, replacement, standardized)
|
| 221 |
+
|
| 222 |
+
# 標準化常見的技術性術語
|
| 223 |
+
term_replacements = {
|
| 224 |
+
'functional_zones': 'areas of activity',
|
| 225 |
+
'object_detection': 'visible elements',
|
| 226 |
+
'category_regions': 'organized sections',
|
| 227 |
+
'spatial_distribution': 'arrangement throughout the space',
|
| 228 |
+
'viewpoint_analysis': 'perspective view'
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
for tech_term, natural_term in term_replacements.items():
|
| 232 |
+
standardized = standardized.replace(tech_term, natural_term)
|
| 233 |
+
|
| 234 |
+
# 確保模板語法的自然性
|
| 235 |
+
standardized = self._improve_template_readability(standardized)
|
| 236 |
+
|
| 237 |
+
return standardized
|
| 238 |
+
|
| 239 |
+
except Exception as e:
|
| 240 |
+
self.logger.warning(f"Error standardizing template format: {str(e)}")
|
| 241 |
+
return template if template else self._get_fallback_template()
|
| 242 |
+
|
| 243 |
+
def _improve_template_readability(self, template: str) -> str:
|
| 244 |
+
"""
|
| 245 |
+
改善模板的可讀性和自然性
|
| 246 |
+
|
| 247 |
+
Args:
|
| 248 |
+
template: 模板字符串
|
| 249 |
+
|
| 250 |
+
Returns:
|
| 251 |
+
str: 改善後的模板字符串
|
| 252 |
+
"""
|
| 253 |
+
try:
|
| 254 |
+
# 移除多餘的空格和換行
|
| 255 |
+
improved = re.sub(r'\s+', ' ', template).strip()
|
| 256 |
+
|
| 257 |
+
# 改善句子連接
|
| 258 |
+
improved = improved.replace(' . ', '. ')
|
| 259 |
+
improved = improved.replace(' , ', ', ')
|
| 260 |
+
improved = improved.replace(' ; ', '; ')
|
| 261 |
+
|
| 262 |
+
# 確保適當的句號結尾
|
| 263 |
+
if improved and not improved.endswith(('.', '!', '?')):
|
| 264 |
+
improved += '.'
|
| 265 |
+
|
| 266 |
+
# 改善常見的表達問題
|
| 267 |
+
readability_fixes = [
|
| 268 |
+
(r'\bthe the\b', 'the'),
|
| 269 |
+
(r'\ba a\b', 'a'),
|
| 270 |
+
(r'\ban an\b', 'an'),
|
| 271 |
+
(r'\bwith with\b', 'with'),
|
| 272 |
+
(r'\bin in\b', 'in'),
|
| 273 |
+
(r'\bof of\b', 'of'),
|
| 274 |
+
(r'\band and\b', 'and')
|
| 275 |
+
]
|
| 276 |
+
|
| 277 |
+
for pattern, replacement in readability_fixes:
|
| 278 |
+
improved = re.sub(pattern, replacement, improved, flags=re.IGNORECASE)
|
| 279 |
+
|
| 280 |
+
return improved
|
| 281 |
+
|
| 282 |
+
except Exception as e:
|
| 283 |
+
self.logger.warning(f"Error improving template readability: {str(e)}")
|
| 284 |
+
return template
|
| 285 |
+
|
| 286 |
+
def process_structured_template(self, template: Dict[str, Any], scene_data: Dict[str, Any],
|
| 287 |
+
statistics_processor) -> str:
|
| 288 |
+
"""
|
| 289 |
+
處理結構化模板字典
|
| 290 |
+
|
| 291 |
+
Args:
|
| 292 |
+
template: 結構化模板字典
|
| 293 |
+
scene_data: 場景分析資料
|
| 294 |
+
statistics_processor: 統計處理器實例
|
| 295 |
+
|
| 296 |
+
Returns:
|
| 297 |
+
str: 生成的場景描述
|
| 298 |
+
"""
|
| 299 |
+
try:
|
| 300 |
+
# 提取 scene_data 中各區塊資料
|
| 301 |
+
zone_data = scene_data.get("functional_zones", scene_data.get("zones", {}))
|
| 302 |
+
object_data = scene_data.get("detected_objects", [])
|
| 303 |
+
scene_context = scene_data.get("scene_context", "")
|
| 304 |
+
|
| 305 |
+
# 獲取模板結構
|
| 306 |
+
structure = template.get("structure", [])
|
| 307 |
+
if not structure:
|
| 308 |
+
self.logger.warning("Template has no structure defined")
|
| 309 |
+
return self._generate_fallback_scene_description(scene_data)
|
| 310 |
+
|
| 311 |
+
description_parts = []
|
| 312 |
+
|
| 313 |
+
# 按照模板結構生成描述
|
| 314 |
+
for section in structure:
|
| 315 |
+
section_type = section.get("type", "")
|
| 316 |
+
content = section.get("content", "")
|
| 317 |
+
|
| 318 |
+
if section_type == "opening":
|
| 319 |
+
description_parts.append(content)
|
| 320 |
+
|
| 321 |
+
elif section_type == "zone_analysis":
|
| 322 |
+
zone_descriptions = statistics_processor.generate_zone_descriptions(zone_data, section)
|
| 323 |
+
if zone_descriptions:
|
| 324 |
+
description_parts.extend(zone_descriptions)
|
| 325 |
+
|
| 326 |
+
elif section_type == "object_summary":
|
| 327 |
+
object_summary = statistics_processor.generate_object_summary(object_data, section)
|
| 328 |
+
if object_summary:
|
| 329 |
+
description_parts.append(object_summary)
|
| 330 |
+
|
| 331 |
+
elif section_type == "conclusion":
|
| 332 |
+
conclusion = statistics_processor.generate_conclusion(template, zone_data, object_data)
|
| 333 |
+
if conclusion:
|
| 334 |
+
description_parts.append(conclusion)
|
| 335 |
+
|
| 336 |
+
# 合併並標準化輸出
|
| 337 |
+
final_description = self._standardize_final_description(" ".join(description_parts))
|
| 338 |
+
self.logger.info("Successfully applied structured template")
|
| 339 |
+
return final_description
|
| 340 |
+
|
| 341 |
+
except Exception as e:
|
| 342 |
+
self.logger.error(f"Error processing structured template: {str(e)}")
|
| 343 |
+
return self._generate_fallback_scene_description(scene_data)
|
| 344 |
+
|
| 345 |
+
def _generate_fallback_scene_description(self, scene_data: Dict[str, Any]) -> str:
|
| 346 |
+
"""
|
| 347 |
+
生成備用場景描述
|
| 348 |
+
|
| 349 |
+
Args:
|
| 350 |
+
scene_data: 場景分析資料
|
| 351 |
+
|
| 352 |
+
Returns:
|
| 353 |
+
str: 備用場景描述
|
| 354 |
+
"""
|
| 355 |
+
try:
|
| 356 |
+
detected_objects = scene_data.get("detected_objects", [])
|
| 357 |
+
zones = scene_data.get("functional_zones", scene_data.get("zones", {}))
|
| 358 |
+
scene_type = scene_data.get("scene_type", "general")
|
| 359 |
+
|
| 360 |
+
object_count = len(detected_objects)
|
| 361 |
+
zone_count = len(zones)
|
| 362 |
+
|
| 363 |
+
if zone_count > 0 and object_count > 0:
|
| 364 |
+
return f"Scene analysis completed with {zone_count} functional areas containing {object_count} identified objects."
|
| 365 |
+
elif object_count > 0:
|
| 366 |
+
return f"Scene analysis identified {object_count} objects in this {scene_type.replace('_', ' ')} environment."
|
| 367 |
+
else:
|
| 368 |
+
return f"Scene analysis completed for this {scene_type.replace('_', ' ')} environment."
|
| 369 |
+
|
| 370 |
+
except Exception as e:
|
| 371 |
+
self.logger.warning(f"Error generating fallback description: {str(e)}")
|
| 372 |
+
return "Scene analysis completed with detected objects and functional areas."
|
| 373 |
+
|
| 374 |
+
def _standardize_final_description(self, description: str) -> str:
|
| 375 |
+
"""
|
| 376 |
+
對最終描述進行標準化處理
|
| 377 |
+
|
| 378 |
+
Args:
|
| 379 |
+
description: 原始描述文本
|
| 380 |
+
|
| 381 |
+
Returns:
|
| 382 |
+
str: 標準化後的描述文本
|
| 383 |
+
"""
|
| 384 |
+
try:
|
| 385 |
+
# 移除多餘空格
|
| 386 |
+
description = " ".join(description.split())
|
| 387 |
+
|
| 388 |
+
# 確保句子間有適當間距
|
| 389 |
+
description = description.replace(". ", ". ")
|
| 390 |
+
|
| 391 |
+
# 移除任何殘留的技術性標識符
|
| 392 |
+
technical_patterns = [
|
| 393 |
+
r'zone_\d+', r'area_\d+', r'region_\d+',
|
| 394 |
+
r'_zone', r'_area', r'_region'
|
| 395 |
+
]
|
| 396 |
+
|
| 397 |
+
for pattern in technical_patterns:
|
| 398 |
+
description = re.sub(pattern, '', description, flags=re.IGNORECASE)
|
| 399 |
+
|
| 400 |
+
return description.strip()
|
| 401 |
+
|
| 402 |
+
except Exception as e:
|
| 403 |
+
self.logger.error(f"Error standardizing final description: {str(e)}")
|
| 404 |
+
return description
|
| 405 |
+
|
| 406 |
+
def generate_fallback_description(self, scene_type: str, detected_objects: List[Dict]) -> str:
|
| 407 |
+
"""
|
| 408 |
+
生成備用描述,當模板填充完全失敗時使用
|
| 409 |
+
|
| 410 |
+
Args:
|
| 411 |
+
scene_type: 場景類型
|
| 412 |
+
detected_objects: 檢測到的物體列表
|
| 413 |
+
|
| 414 |
+
Returns:
|
| 415 |
+
str: 備用描述
|
| 416 |
+
"""
|
| 417 |
+
try:
|
| 418 |
+
object_count = len(detected_objects)
|
| 419 |
+
|
| 420 |
+
if object_count == 0:
|
| 421 |
+
return f"A {scene_type.replace('_', ' ')} scene."
|
| 422 |
+
elif object_count == 1:
|
| 423 |
+
return f"A {scene_type.replace('_', ' ')} scene with one visible element."
|
| 424 |
+
else:
|
| 425 |
+
return f"A {scene_type.replace('_', ' ')} scene with {object_count} visible elements."
|
| 426 |
+
|
| 427 |
+
except Exception as e:
|
| 428 |
+
self.logger.warning(f"Error generating fallback description: {str(e)}")
|
| 429 |
+
return "A scene with various elements."
|
template_repository.py
ADDED
|
@@ -0,0 +1,834 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import traceback
|
| 3 |
+
from typing import Dict, List, Optional, Any
|
| 4 |
+
|
| 5 |
+
from scene_detail_templates import SCENE_DETAIL_TEMPLATES
|
| 6 |
+
from object_template_fillers import OBJECT_TEMPLATE_FILLERS
|
| 7 |
+
from viewpoint_templates import VIEWPOINT_TEMPLATES
|
| 8 |
+
from cultural_templates import CULTURAL_TEMPLATES
|
| 9 |
+
from lighting_conditions import LIGHTING_CONDITIONS
|
| 10 |
+
from confidence_templates import CONFIDENCE_TEMPLATES
|
| 11 |
+
|
| 12 |
+
class TemplateRepository:
|
| 13 |
+
"""
|
| 14 |
+
模板資料的管理器 - 負責模板的載入、儲存、檢索和驗證
|
| 15 |
+
|
| 16 |
+
此類別專門處理模板資源的管理,包括從各種來源載入模板、
|
| 17 |
+
驗證模板完整性,以及提供統一的模板檢索介面。
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
def __init__(self, custom_templates_db: Optional[Dict] = None):
|
| 21 |
+
"""
|
| 22 |
+
初始化模板庫管理器
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
custom_templates_db: 可選的自定義模板數據庫,如果提供則會與默認模板合併
|
| 26 |
+
"""
|
| 27 |
+
self.logger = logging.getLogger(self.__class__.__name__)
|
| 28 |
+
self.templates = {}
|
| 29 |
+
self.template_registry = {}
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
# 載入模板數據庫
|
| 33 |
+
self.templates = self._load_templates()
|
| 34 |
+
|
| 35 |
+
# 初始化模板註冊表
|
| 36 |
+
self.template_registry = self._initialize_template_registry()
|
| 37 |
+
|
| 38 |
+
# 如果提供了自定義模板,則進行合併
|
| 39 |
+
if custom_templates_db:
|
| 40 |
+
self._merge_custom_templates(custom_templates_db)
|
| 41 |
+
|
| 42 |
+
# 驗證模板完整性
|
| 43 |
+
self._validate_templates()
|
| 44 |
+
|
| 45 |
+
self.logger.info("TemplateRepository initialized successfully with %d template categories",
|
| 46 |
+
len(self.templates))
|
| 47 |
+
|
| 48 |
+
except Exception as e:
|
| 49 |
+
error_msg = f"Failed to initialize TemplateRepository: {str(e)}"
|
| 50 |
+
self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
|
| 51 |
+
# 初始化基本的空模板
|
| 52 |
+
self.templates = self._initialize_fallback_templates()
|
| 53 |
+
|
| 54 |
+
def _load_templates(self) -> Dict:
|
| 55 |
+
"""
|
| 56 |
+
載入所有描述模板
|
| 57 |
+
|
| 58 |
+
Returns:
|
| 59 |
+
Dict: 包含所有模板類別的字典
|
| 60 |
+
"""
|
| 61 |
+
try:
|
| 62 |
+
templates = {}
|
| 63 |
+
|
| 64 |
+
# 載入場景詳細描述模板
|
| 65 |
+
self.logger.debug("Loading scene detail templates")
|
| 66 |
+
try:
|
| 67 |
+
templates["scene_detail_templates"] = SCENE_DETAIL_TEMPLATES
|
| 68 |
+
except NameError:
|
| 69 |
+
self.logger.warning("SCENE_DETAIL_TEMPLATES not defined, using empty dict")
|
| 70 |
+
templates["scene_detail_templates"] = {}
|
| 71 |
+
|
| 72 |
+
# 載入物體模板填充器
|
| 73 |
+
self.logger.debug("Loading object template fillers")
|
| 74 |
+
try:
|
| 75 |
+
templates["object_template_fillers"] = OBJECT_TEMPLATE_FILLERS
|
| 76 |
+
except NameError:
|
| 77 |
+
self.logger.warning("OBJECT_TEMPLATE_FILLERS not defined, using empty dict")
|
| 78 |
+
templates["object_template_fillers"] = {}
|
| 79 |
+
|
| 80 |
+
# 載入視角模板
|
| 81 |
+
self.logger.debug("Loading viewpoint templates")
|
| 82 |
+
try:
|
| 83 |
+
templates["viewpoint_templates"] = VIEWPOINT_TEMPLATES
|
| 84 |
+
except NameError:
|
| 85 |
+
self.logger.warning("VIEWPOINT_TEMPLATES not defined, using empty dict")
|
| 86 |
+
templates["viewpoint_templates"] = {}
|
| 87 |
+
|
| 88 |
+
# 載入文化模板
|
| 89 |
+
self.logger.debug("Loading cultural templates")
|
| 90 |
+
try:
|
| 91 |
+
templates["cultural_templates"] = CULTURAL_TEMPLATES
|
| 92 |
+
except NameError:
|
| 93 |
+
self.logger.warning("CULTURAL_TEMPLATES not defined, using empty dict")
|
| 94 |
+
templates["cultural_templates"] = {}
|
| 95 |
+
|
| 96 |
+
# 從照明條件模組載入照明模板
|
| 97 |
+
self.logger.debug("Loading lighting templates")
|
| 98 |
+
try:
|
| 99 |
+
templates["lighting_templates"] = self._extract_lighting_templates()
|
| 100 |
+
except Exception as e:
|
| 101 |
+
self.logger.warning(f"Failed to extract lighting templates: {str(e)}")
|
| 102 |
+
templates["lighting_templates"] = {}
|
| 103 |
+
|
| 104 |
+
# 載入信心度模板
|
| 105 |
+
self.logger.debug("Loading confidence templates")
|
| 106 |
+
try:
|
| 107 |
+
templates["confidence_templates"] = CONFIDENCE_TEMPLATES
|
| 108 |
+
except NameError:
|
| 109 |
+
self.logger.warning("CONFIDENCE_TEMPLATES not defined, using empty dict")
|
| 110 |
+
templates["confidence_templates"] = {}
|
| 111 |
+
|
| 112 |
+
# 初始化默認模板(當成備份)
|
| 113 |
+
self._initialize_default_templates(templates)
|
| 114 |
+
|
| 115 |
+
self.logger.info("Successfully loaded %d template categories", len(templates))
|
| 116 |
+
return templates
|
| 117 |
+
|
| 118 |
+
except Exception as e:
|
| 119 |
+
error_msg = f"Unexpected error during template loading: {str(e)}"
|
| 120 |
+
self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
|
| 121 |
+
# 返回基本模板
|
| 122 |
+
return self._initialize_fallback_templates()
|
| 123 |
+
|
| 124 |
+
def _initialize_template_registry(self) -> Dict[str, Dict[str, Any]]:
|
| 125 |
+
"""
|
| 126 |
+
初始化模板註冊表,包含各種場景類型的結構化模板
|
| 127 |
+
|
| 128 |
+
Returns:
|
| 129 |
+
Dict[str, Dict[str, Any]]: 模板註冊表字典
|
| 130 |
+
"""
|
| 131 |
+
try:
|
| 132 |
+
template_registry = {
|
| 133 |
+
"indoor_detailed": {
|
| 134 |
+
"scene_type": "indoor",
|
| 135 |
+
"complexity": "high",
|
| 136 |
+
"structure": [
|
| 137 |
+
{
|
| 138 |
+
"type": "opening",
|
| 139 |
+
"content": "This indoor scene presents a comprehensive view of a well-organized living space."
|
| 140 |
+
},
|
| 141 |
+
{
|
| 142 |
+
"type": "zone_analysis",
|
| 143 |
+
"priority": "functional_areas",
|
| 144 |
+
"detail_level": "detailed"
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"type": "object_summary",
|
| 148 |
+
"grouping": "by_category",
|
| 149 |
+
"include_counts": True
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"type": "conclusion",
|
| 153 |
+
"style": "analytical"
|
| 154 |
+
}
|
| 155 |
+
]
|
| 156 |
+
},
|
| 157 |
+
|
| 158 |
+
"indoor_moderate": {
|
| 159 |
+
"scene_type": "indoor",
|
| 160 |
+
"complexity": "medium",
|
| 161 |
+
"structure": [
|
| 162 |
+
{
|
| 163 |
+
"type": "opening",
|
| 164 |
+
"content": "The indoor environment displays organized functional areas."
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"type": "zone_analysis",
|
| 168 |
+
"priority": "main_areas",
|
| 169 |
+
"detail_level": "moderate"
|
| 170 |
+
},
|
| 171 |
+
{
|
| 172 |
+
"type": "object_summary",
|
| 173 |
+
"grouping": "by_function",
|
| 174 |
+
"include_counts": False
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"type": "conclusion",
|
| 178 |
+
"style": "descriptive"
|
| 179 |
+
}
|
| 180 |
+
]
|
| 181 |
+
},
|
| 182 |
+
|
| 183 |
+
"indoor_simple": {
|
| 184 |
+
"scene_type": "indoor",
|
| 185 |
+
"complexity": "low",
|
| 186 |
+
"structure": [
|
| 187 |
+
{
|
| 188 |
+
"type": "opening",
|
| 189 |
+
"content": "An indoor space with visible furniture and household items."
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"type": "zone_analysis",
|
| 193 |
+
"priority": "basic_areas",
|
| 194 |
+
"detail_level": "simple"
|
| 195 |
+
},
|
| 196 |
+
{
|
| 197 |
+
"type": "object_summary",
|
| 198 |
+
"grouping": "general",
|
| 199 |
+
"include_counts": False
|
| 200 |
+
}
|
| 201 |
+
]
|
| 202 |
+
},
|
| 203 |
+
|
| 204 |
+
"outdoor_detailed": {
|
| 205 |
+
"scene_type": "outdoor",
|
| 206 |
+
"complexity": "high",
|
| 207 |
+
"structure": [
|
| 208 |
+
{
|
| 209 |
+
"type": "opening",
|
| 210 |
+
"content": "This outdoor scene captures a dynamic urban environment with multiple activity zones."
|
| 211 |
+
},
|
| 212 |
+
{
|
| 213 |
+
"type": "zone_analysis",
|
| 214 |
+
"priority": "activity_areas",
|
| 215 |
+
"detail_level": "detailed"
|
| 216 |
+
},
|
| 217 |
+
{
|
| 218 |
+
"type": "object_summary",
|
| 219 |
+
"grouping": "by_location",
|
| 220 |
+
"include_counts": True
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"type": "conclusion",
|
| 224 |
+
"style": "environmental"
|
| 225 |
+
}
|
| 226 |
+
]
|
| 227 |
+
},
|
| 228 |
+
|
| 229 |
+
"outdoor_moderate": {
|
| 230 |
+
"scene_type": "outdoor",
|
| 231 |
+
"complexity": "medium",
|
| 232 |
+
"structure": [
|
| 233 |
+
{
|
| 234 |
+
"type": "opening",
|
| 235 |
+
"content": "The outdoor scene shows organized public spaces and pedestrian areas."
|
| 236 |
+
},
|
| 237 |
+
{
|
| 238 |
+
"type": "zone_analysis",
|
| 239 |
+
"priority": "public_areas",
|
| 240 |
+
"detail_level": "moderate"
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"type": "object_summary",
|
| 244 |
+
"grouping": "by_type",
|
| 245 |
+
"include_counts": False
|
| 246 |
+
},
|
| 247 |
+
{
|
| 248 |
+
"type": "conclusion",
|
| 249 |
+
"style": "observational"
|
| 250 |
+
}
|
| 251 |
+
]
|
| 252 |
+
},
|
| 253 |
+
|
| 254 |
+
"outdoor_simple": {
|
| 255 |
+
"scene_type": "outdoor",
|
| 256 |
+
"complexity": "low",
|
| 257 |
+
"structure": [
|
| 258 |
+
{
|
| 259 |
+
"type": "opening",
|
| 260 |
+
"content": "An outdoor area with pedestrians and urban elements."
|
| 261 |
+
},
|
| 262 |
+
{
|
| 263 |
+
"type": "zone_analysis",
|
| 264 |
+
"priority": "basic_areas",
|
| 265 |
+
"detail_level": "simple"
|
| 266 |
+
},
|
| 267 |
+
{
|
| 268 |
+
"type": "object_summary",
|
| 269 |
+
"grouping": "general",
|
| 270 |
+
"include_counts": False
|
| 271 |
+
}
|
| 272 |
+
]
|
| 273 |
+
},
|
| 274 |
+
|
| 275 |
+
"commercial_detailed": {
|
| 276 |
+
"scene_type": "commercial",
|
| 277 |
+
"complexity": "high",
|
| 278 |
+
"structure": [
|
| 279 |
+
{
|
| 280 |
+
"type": "opening",
|
| 281 |
+
"content": "This commercial environment demonstrates organized retail and customer service areas."
|
| 282 |
+
},
|
| 283 |
+
{
|
| 284 |
+
"type": "zone_analysis",
|
| 285 |
+
"priority": "service_areas",
|
| 286 |
+
"detail_level": "detailed"
|
| 287 |
+
},
|
| 288 |
+
{
|
| 289 |
+
"type": "object_summary",
|
| 290 |
+
"grouping": "by_function",
|
| 291 |
+
"include_counts": True
|
| 292 |
+
},
|
| 293 |
+
{
|
| 294 |
+
"type": "conclusion",
|
| 295 |
+
"style": "business"
|
| 296 |
+
}
|
| 297 |
+
]
|
| 298 |
+
},
|
| 299 |
+
|
| 300 |
+
"transportation_detailed": {
|
| 301 |
+
"scene_type": "transportation",
|
| 302 |
+
"complexity": "high",
|
| 303 |
+
"structure": [
|
| 304 |
+
{
|
| 305 |
+
"type": "opening",
|
| 306 |
+
"content": "This transportation hub features organized passenger facilities and transit infrastructure."
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"type": "zone_analysis",
|
| 310 |
+
"priority": "transit_areas",
|
| 311 |
+
"detail_level": "detailed"
|
| 312 |
+
},
|
| 313 |
+
{
|
| 314 |
+
"type": "object_summary",
|
| 315 |
+
"grouping": "by_transit_function",
|
| 316 |
+
"include_counts": True
|
| 317 |
+
},
|
| 318 |
+
{
|
| 319 |
+
"type": "conclusion",
|
| 320 |
+
"style": "infrastructure"
|
| 321 |
+
}
|
| 322 |
+
]
|
| 323 |
+
},
|
| 324 |
+
|
| 325 |
+
"default": {
|
| 326 |
+
"scene_type": "general",
|
| 327 |
+
"complexity": "medium",
|
| 328 |
+
"structure": [
|
| 329 |
+
{
|
| 330 |
+
"type": "opening",
|
| 331 |
+
"content": "The scene displays various elements organized across functional areas."
|
| 332 |
+
},
|
| 333 |
+
{
|
| 334 |
+
"type": "zone_analysis",
|
| 335 |
+
"priority": "general_areas",
|
| 336 |
+
"detail_level": "moderate"
|
| 337 |
+
},
|
| 338 |
+
{
|
| 339 |
+
"type": "object_summary",
|
| 340 |
+
"grouping": "general",
|
| 341 |
+
"include_counts": False
|
| 342 |
+
},
|
| 343 |
+
{
|
| 344 |
+
"type": "conclusion",
|
| 345 |
+
"style": "general"
|
| 346 |
+
}
|
| 347 |
+
]
|
| 348 |
+
}
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
self.logger.debug(f"Initialized template registry with {len(template_registry)} templates")
|
| 352 |
+
return template_registry
|
| 353 |
+
|
| 354 |
+
except Exception as e:
|
| 355 |
+
error_msg = f"Error initializing template registry: {str(e)}"
|
| 356 |
+
self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
|
| 357 |
+
# 返回最基本的註冊表
|
| 358 |
+
return {
|
| 359 |
+
"default": {
|
| 360 |
+
"scene_type": "general",
|
| 361 |
+
"complexity": "low",
|
| 362 |
+
"structure": [
|
| 363 |
+
{
|
| 364 |
+
"type": "opening",
|
| 365 |
+
"content": "Scene analysis completed with identified objects and areas."
|
| 366 |
+
}
|
| 367 |
+
]
|
| 368 |
+
}
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
def _extract_lighting_templates(self) -> Dict:
|
| 372 |
+
"""
|
| 373 |
+
從照明條件模組提取照明描述模板
|
| 374 |
+
|
| 375 |
+
Returns:
|
| 376 |
+
Dict: 照明模板字典
|
| 377 |
+
"""
|
| 378 |
+
try:
|
| 379 |
+
lighting_templates = {}
|
| 380 |
+
|
| 381 |
+
# 從 LIGHTING_CONDITIONS 提取時間描述
|
| 382 |
+
time_descriptions = LIGHTING_CONDITIONS.get("time_descriptions", {})
|
| 383 |
+
|
| 384 |
+
for time_key, time_data in time_descriptions.items():
|
| 385 |
+
if isinstance(time_data, dict) and "general" in time_data:
|
| 386 |
+
lighting_templates[time_key] = time_data["general"]
|
| 387 |
+
else:
|
| 388 |
+
# 如果數據結構不符合預期,使用備用描述
|
| 389 |
+
lighting_templates[time_key] = f"The scene is captured during {time_key.replace('_', ' ')}."
|
| 390 |
+
|
| 391 |
+
# 確保至少有基本的照明模板
|
| 392 |
+
if not lighting_templates:
|
| 393 |
+
self.logger.warning("No lighting templates found, using defaults")
|
| 394 |
+
lighting_templates = self._get_default_lighting_templates()
|
| 395 |
+
|
| 396 |
+
self.logger.debug("Extracted %d lighting templates", len(lighting_templates))
|
| 397 |
+
return lighting_templates
|
| 398 |
+
|
| 399 |
+
except Exception as e:
|
| 400 |
+
self.logger.warning(f"Error extracting lighting templates: {str(e)}, using defaults")
|
| 401 |
+
return self._get_default_lighting_templates()
|
| 402 |
+
|
| 403 |
+
def _get_default_lighting_templates(self) -> Dict:
|
| 404 |
+
"""獲取默認照明模板"""
|
| 405 |
+
return {
|
| 406 |
+
"day_clear": "The scene is captured during clear daylight conditions.",
|
| 407 |
+
"day_overcast": "The scene is captured during overcast daylight.",
|
| 408 |
+
"night": "The scene is captured at night with artificial lighting.",
|
| 409 |
+
"dawn": "The scene is captured during dawn with soft natural lighting.",
|
| 410 |
+
"dusk": "The scene is captured during dusk with diminishing natural light.",
|
| 411 |
+
"unknown": "The lighting conditions are not clearly identifiable."
|
| 412 |
+
}
|
| 413 |
+
|
| 414 |
+
def _initialize_default_templates(self, templates: Dict):
|
| 415 |
+
"""
|
| 416 |
+
初始化默認模板作為備份機制
|
| 417 |
+
|
| 418 |
+
Args:
|
| 419 |
+
templates: 要檢查和補充的模板字典
|
| 420 |
+
"""
|
| 421 |
+
try:
|
| 422 |
+
# 置信度模板備份
|
| 423 |
+
if "confidence_templates" not in templates or not templates["confidence_templates"]:
|
| 424 |
+
templates["confidence_templates"] = {
|
| 425 |
+
"high": "{description} {details}",
|
| 426 |
+
"medium": "This appears to be {description} {details}",
|
| 427 |
+
"low": "This might be {description}, but the confidence is low. {details}"
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
# 場景詳細模板備份
|
| 431 |
+
if "scene_detail_templates" not in templates or not templates["scene_detail_templates"]:
|
| 432 |
+
templates["scene_detail_templates"] = {
|
| 433 |
+
"default": ["A scene with various elements and objects."]
|
| 434 |
+
}
|
| 435 |
+
|
| 436 |
+
# 物體填充模板備份
|
| 437 |
+
if "object_template_fillers" not in templates or not templates["object_template_fillers"]:
|
| 438 |
+
templates["object_template_fillers"] = {
|
| 439 |
+
"default": ["various items", "different objects", "multiple elements"]
|
| 440 |
+
}
|
| 441 |
+
|
| 442 |
+
# 視角模板備份
|
| 443 |
+
if "viewpoint_templates" not in templates or not templates["viewpoint_templates"]:
|
| 444 |
+
templates["viewpoint_templates"] = {
|
| 445 |
+
"eye_level": {
|
| 446 |
+
"prefix": "From eye level, ",
|
| 447 |
+
"observation": "the scene is viewed straight ahead.",
|
| 448 |
+
"short_desc": "at eye level"
|
| 449 |
+
},
|
| 450 |
+
"aerial": {
|
| 451 |
+
"prefix": "From above, ",
|
| 452 |
+
"observation": "the scene is viewed from a bird's-eye perspective.",
|
| 453 |
+
"short_desc": "from above"
|
| 454 |
+
},
|
| 455 |
+
"low_angle": {
|
| 456 |
+
"prefix": "From a low angle, ",
|
| 457 |
+
"observation": "the scene is viewed from below looking upward.",
|
| 458 |
+
"short_desc": "from below"
|
| 459 |
+
},
|
| 460 |
+
"elevated": {
|
| 461 |
+
"prefix": "From an elevated position, ",
|
| 462 |
+
"observation": "the scene is viewed from a higher vantage point.",
|
| 463 |
+
"short_desc": "from an elevated position"
|
| 464 |
+
}
|
| 465 |
+
}
|
| 466 |
+
|
| 467 |
+
# 文化模板備份
|
| 468 |
+
if "cultural_templates" not in templates or not templates["cultural_templates"]:
|
| 469 |
+
templates["cultural_templates"] = {
|
| 470 |
+
"asian": {
|
| 471 |
+
"elements": ["traditional architectural elements", "cultural signage", "Asian design features"],
|
| 472 |
+
"description": "The scene displays distinctive Asian cultural characteristics with {elements}."
|
| 473 |
+
},
|
| 474 |
+
"european": {
|
| 475 |
+
"elements": ["classical architecture", "European design elements", "historic features"],
|
| 476 |
+
"description": "The scene exhibits European architectural and cultural elements including {elements}."
|
| 477 |
+
}
|
| 478 |
+
}
|
| 479 |
+
|
| 480 |
+
self.logger.debug("Default templates initialized as backup")
|
| 481 |
+
|
| 482 |
+
except Exception as e:
|
| 483 |
+
self.logger.error(f"Error initializing default templates: {str(e)}")
|
| 484 |
+
|
| 485 |
+
def _merge_custom_templates(self, custom_templates: Dict):
|
| 486 |
+
"""
|
| 487 |
+
合併自定義模板到現有模板庫
|
| 488 |
+
|
| 489 |
+
Args:
|
| 490 |
+
custom_templates: 自定義模板字典
|
| 491 |
+
"""
|
| 492 |
+
try:
|
| 493 |
+
for template_category, custom_content in custom_templates.items():
|
| 494 |
+
if template_category in self.templates:
|
| 495 |
+
if isinstance(self.templates[template_category], dict) and isinstance(custom_content, dict):
|
| 496 |
+
self.templates[template_category].update(custom_content)
|
| 497 |
+
self.logger.debug(f"Merged custom templates for category: {template_category}")
|
| 498 |
+
else:
|
| 499 |
+
self.templates[template_category] = custom_content
|
| 500 |
+
self.logger.debug(f"Replaced templates for category: {template_category}")
|
| 501 |
+
else:
|
| 502 |
+
self.templates[template_category] = custom_content
|
| 503 |
+
self.logger.debug(f"Added new template category: {template_category}")
|
| 504 |
+
|
| 505 |
+
self.logger.info("Successfully merged custom templates")
|
| 506 |
+
|
| 507 |
+
except Exception as e:
|
| 508 |
+
self.logger.warning(f"Error merging custom templates: {str(e)}")
|
| 509 |
+
|
| 510 |
+
def _validate_templates(self):
|
| 511 |
+
"""
|
| 512 |
+
驗證模板完整性和有效性
|
| 513 |
+
"""
|
| 514 |
+
try:
|
| 515 |
+
required_categories = [
|
| 516 |
+
"scene_detail_templates",
|
| 517 |
+
"object_template_fillers",
|
| 518 |
+
"viewpoint_templates",
|
| 519 |
+
"cultural_templates",
|
| 520 |
+
"lighting_templates",
|
| 521 |
+
"confidence_templates"
|
| 522 |
+
]
|
| 523 |
+
|
| 524 |
+
missing_categories = []
|
| 525 |
+
for category in required_categories:
|
| 526 |
+
if category not in self.templates:
|
| 527 |
+
missing_categories.append(category)
|
| 528 |
+
elif not self.templates[category]:
|
| 529 |
+
self.logger.warning(f"Template category '{category}' is empty")
|
| 530 |
+
|
| 531 |
+
if missing_categories:
|
| 532 |
+
error_msg = f"Missing required template categories: {missing_categories}"
|
| 533 |
+
self.logger.warning(error_msg)
|
| 534 |
+
# 為缺失的類別創建空模板
|
| 535 |
+
for category in missing_categories:
|
| 536 |
+
self.templates[category] = {}
|
| 537 |
+
|
| 538 |
+
# 驗證視角模板結構
|
| 539 |
+
self._validate_viewpoint_templates()
|
| 540 |
+
|
| 541 |
+
# 驗證文化模板結構
|
| 542 |
+
self._validate_cultural_templates()
|
| 543 |
+
|
| 544 |
+
self.logger.debug("Template validation completed successfully")
|
| 545 |
+
|
| 546 |
+
except Exception as e:
|
| 547 |
+
error_msg = f"Template validation failed: {str(e)}"
|
| 548 |
+
self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
|
| 549 |
+
|
| 550 |
+
def _validate_viewpoint_templates(self):
|
| 551 |
+
"""驗證視角模板結構"""
|
| 552 |
+
viewpoint_templates = self.templates.get("viewpoint_templates", {})
|
| 553 |
+
|
| 554 |
+
for viewpoint, template_data in viewpoint_templates.items():
|
| 555 |
+
if not isinstance(template_data, dict):
|
| 556 |
+
self.logger.warning(f"Invalid viewpoint template structure for '{viewpoint}'")
|
| 557 |
+
continue
|
| 558 |
+
|
| 559 |
+
required_keys = ["prefix", "observation"]
|
| 560 |
+
for key in required_keys:
|
| 561 |
+
if key not in template_data:
|
| 562 |
+
self.logger.warning(f"Missing '{key}' in viewpoint template '{viewpoint}'")
|
| 563 |
+
|
| 564 |
+
def _validate_cultural_templates(self):
|
| 565 |
+
"""驗證文化模板結構"""
|
| 566 |
+
cultural_templates = self.templates.get("cultural_templates", {})
|
| 567 |
+
|
| 568 |
+
for culture, template_data in cultural_templates.items():
|
| 569 |
+
if not isinstance(template_data, dict):
|
| 570 |
+
self.logger.warning(f"Invalid cultural template structure for '{culture}'")
|
| 571 |
+
continue
|
| 572 |
+
|
| 573 |
+
if "elements" not in template_data or "description" not in template_data:
|
| 574 |
+
self.logger.warning(f"Missing required keys in cultural template '{culture}'")
|
| 575 |
+
|
| 576 |
+
def _initialize_fallback_templates(self) -> Dict:
|
| 577 |
+
"""
|
| 578 |
+
初始化備用模板系統,當主要載入失敗時使用
|
| 579 |
+
|
| 580 |
+
Returns:
|
| 581 |
+
Dict: 最基本的模板字典
|
| 582 |
+
"""
|
| 583 |
+
return {
|
| 584 |
+
"scene_detail_templates": {"default": ["A scene with various elements."]},
|
| 585 |
+
"object_template_fillers": {"default": ["various items"]},
|
| 586 |
+
"viewpoint_templates": {
|
| 587 |
+
"eye_level": {
|
| 588 |
+
"prefix": "From eye level, ",
|
| 589 |
+
"observation": "the scene is viewed straight ahead.",
|
| 590 |
+
"short_desc": "at eye level"
|
| 591 |
+
}
|
| 592 |
+
},
|
| 593 |
+
"cultural_templates": {"default": {"elements": ["elements"], "description": "The scene displays cultural elements."}},
|
| 594 |
+
"lighting_templates": {"unknown": "The lighting conditions are not clearly identifiable."},
|
| 595 |
+
"confidence_templates": {"medium": "{description} {details}"}
|
| 596 |
+
}
|
| 597 |
+
|
| 598 |
+
def get_template(self, category: str, key: Optional[str] = None) -> Any:
|
| 599 |
+
"""
|
| 600 |
+
獲取指定類別的模板
|
| 601 |
+
|
| 602 |
+
Args:
|
| 603 |
+
category: 模板類別名稱
|
| 604 |
+
key: 可選的具體模板鍵值
|
| 605 |
+
|
| 606 |
+
Returns:
|
| 607 |
+
Any: 請求的模板內容,如果不存在則返回空字典或空字符串
|
| 608 |
+
"""
|
| 609 |
+
try:
|
| 610 |
+
if category not in self.templates:
|
| 611 |
+
self.logger.warning(f"Template category '{category}' not found")
|
| 612 |
+
return {} if key is None else ""
|
| 613 |
+
|
| 614 |
+
if key is None:
|
| 615 |
+
return self.templates[category]
|
| 616 |
+
|
| 617 |
+
category_templates = self.templates[category]
|
| 618 |
+
if not isinstance(category_templates, dict):
|
| 619 |
+
self.logger.warning(f"Template category '{category}' is not a dictionary")
|
| 620 |
+
return ""
|
| 621 |
+
|
| 622 |
+
if key not in category_templates:
|
| 623 |
+
self.logger.warning(f"Template key '{key}' not found in category '{category}'")
|
| 624 |
+
return ""
|
| 625 |
+
|
| 626 |
+
return category_templates[key]
|
| 627 |
+
|
| 628 |
+
except Exception as e:
|
| 629 |
+
error_msg = f"Error retrieving template {category}.{key}: {str(e)}"
|
| 630 |
+
self.logger.error(error_msg)
|
| 631 |
+
return {} if key is None else ""
|
| 632 |
+
|
| 633 |
+
def get_template_categories(self) -> List[str]:
|
| 634 |
+
"""
|
| 635 |
+
獲取所有可用的模板類別名稱
|
| 636 |
+
|
| 637 |
+
Returns:
|
| 638 |
+
List[str]: 模板類別名稱列表
|
| 639 |
+
"""
|
| 640 |
+
return list(self.templates.keys())
|
| 641 |
+
|
| 642 |
+
def template_exists(self, category: str, key: Optional[str] = None) -> bool:
|
| 643 |
+
"""
|
| 644 |
+
檢查模板是否存在
|
| 645 |
+
|
| 646 |
+
Args:
|
| 647 |
+
category: 模板類別
|
| 648 |
+
key: 可選的模板鍵值
|
| 649 |
+
|
| 650 |
+
Returns:
|
| 651 |
+
bool: 模板是否存在
|
| 652 |
+
"""
|
| 653 |
+
try:
|
| 654 |
+
if category not in self.templates:
|
| 655 |
+
return False
|
| 656 |
+
|
| 657 |
+
if key is None:
|
| 658 |
+
return True
|
| 659 |
+
|
| 660 |
+
category_templates = self.templates[category]
|
| 661 |
+
if isinstance(category_templates, dict):
|
| 662 |
+
return key in category_templates
|
| 663 |
+
|
| 664 |
+
return False
|
| 665 |
+
|
| 666 |
+
except Exception as e:
|
| 667 |
+
self.logger.warning(f"Error checking template existence for {category}.{key}: {str(e)}")
|
| 668 |
+
return False
|
| 669 |
+
|
| 670 |
+
def get_confidence_template(self, confidence_level: str) -> str:
|
| 671 |
+
"""
|
| 672 |
+
獲取指定信心度級別的模板
|
| 673 |
+
|
| 674 |
+
Args:
|
| 675 |
+
confidence_level: 信心度級別 ('high', 'medium', 'low')
|
| 676 |
+
|
| 677 |
+
Returns:
|
| 678 |
+
str: 信心度模板字符串
|
| 679 |
+
"""
|
| 680 |
+
try:
|
| 681 |
+
confidence_templates = self.templates.get("confidence_templates", {})
|
| 682 |
+
|
| 683 |
+
if confidence_level in confidence_templates:
|
| 684 |
+
return confidence_templates[confidence_level]
|
| 685 |
+
|
| 686 |
+
# 備用模板
|
| 687 |
+
fallback_templates = {
|
| 688 |
+
"high": "{description} {details}",
|
| 689 |
+
"medium": "This appears to be {description} {details}",
|
| 690 |
+
"low": "This might be {description}, but the confidence is low. {details}"
|
| 691 |
+
}
|
| 692 |
+
|
| 693 |
+
return fallback_templates.get(confidence_level, "{description} {details}")
|
| 694 |
+
|
| 695 |
+
except Exception as e:
|
| 696 |
+
self.logger.warning(f"Error getting confidence template for '{confidence_level}': {str(e)}")
|
| 697 |
+
return "{description} {details}"
|
| 698 |
+
|
| 699 |
+
def get_lighting_template(self, lighting_type: str) -> str:
|
| 700 |
+
"""
|
| 701 |
+
獲取指定照明類型的模板
|
| 702 |
+
|
| 703 |
+
Args:
|
| 704 |
+
lighting_type: 照明類型
|
| 705 |
+
|
| 706 |
+
Returns:
|
| 707 |
+
str: 照明描述模板
|
| 708 |
+
"""
|
| 709 |
+
try:
|
| 710 |
+
lighting_templates = self.templates.get("lighting_templates", {})
|
| 711 |
+
|
| 712 |
+
if lighting_type in lighting_templates:
|
| 713 |
+
return lighting_templates[lighting_type]
|
| 714 |
+
|
| 715 |
+
# 備用模板
|
| 716 |
+
return f"The scene is captured with {lighting_type.replace('_', ' ')} lighting conditions."
|
| 717 |
+
|
| 718 |
+
except Exception as e:
|
| 719 |
+
self.logger.warning(f"Error getting lighting template for '{lighting_type}': {str(e)}")
|
| 720 |
+
return "The lighting conditions are not clearly identifiable."
|
| 721 |
+
|
| 722 |
+
def get_viewpoint_template(self, viewpoint: str) -> Dict[str, str]:
|
| 723 |
+
"""
|
| 724 |
+
獲取指定視角的模板
|
| 725 |
+
|
| 726 |
+
Args:
|
| 727 |
+
viewpoint: 視角類型
|
| 728 |
+
|
| 729 |
+
Returns:
|
| 730 |
+
Dict[str, str]: 包含prefix、observation等鍵的視角模板字典
|
| 731 |
+
"""
|
| 732 |
+
try:
|
| 733 |
+
viewpoint_templates = self.templates.get("viewpoint_templates", {})
|
| 734 |
+
|
| 735 |
+
if viewpoint in viewpoint_templates:
|
| 736 |
+
return viewpoint_templates[viewpoint]
|
| 737 |
+
|
| 738 |
+
# 備用模板
|
| 739 |
+
fallback_templates = {
|
| 740 |
+
"eye_level": {
|
| 741 |
+
"prefix": "From eye level, ",
|
| 742 |
+
"observation": "the scene is viewed straight ahead.",
|
| 743 |
+
"short_desc": "at eye level"
|
| 744 |
+
},
|
| 745 |
+
"aerial": {
|
| 746 |
+
"prefix": "From above, ",
|
| 747 |
+
"observation": "the scene is viewed from a bird's-eye perspective.",
|
| 748 |
+
"short_desc": "from above"
|
| 749 |
+
},
|
| 750 |
+
"low_angle": {
|
| 751 |
+
"prefix": "From a low angle, ",
|
| 752 |
+
"observation": "the scene is viewed from below looking upward.",
|
| 753 |
+
"short_desc": "from below"
|
| 754 |
+
},
|
| 755 |
+
"elevated": {
|
| 756 |
+
"prefix": "From an elevated position, ",
|
| 757 |
+
"observation": "the scene is viewed from a higher vantage point.",
|
| 758 |
+
"short_desc": "from an elevated position"
|
| 759 |
+
}
|
| 760 |
+
}
|
| 761 |
+
|
| 762 |
+
return fallback_templates.get(viewpoint, fallback_templates["eye_level"])
|
| 763 |
+
|
| 764 |
+
except Exception as e:
|
| 765 |
+
self.logger.warning(f"Error getting viewpoint template for '{viewpoint}': {str(e)}")
|
| 766 |
+
return {
|
| 767 |
+
"prefix": "",
|
| 768 |
+
"observation": "the scene is viewed normally.",
|
| 769 |
+
"short_desc": "normally"
|
| 770 |
+
}
|
| 771 |
+
|
| 772 |
+
def get_cultural_template(self, cultural_context: str) -> Dict[str, Any]:
|
| 773 |
+
"""
|
| 774 |
+
獲取指定文化語境的模板
|
| 775 |
+
|
| 776 |
+
Args:
|
| 777 |
+
cultural_context: 文化語境
|
| 778 |
+
|
| 779 |
+
Returns:
|
| 780 |
+
Dict[str, Any]: 文化模板字典
|
| 781 |
+
"""
|
| 782 |
+
try:
|
| 783 |
+
cultural_templates = self.templates.get("cultural_templates", {})
|
| 784 |
+
|
| 785 |
+
if cultural_context in cultural_templates:
|
| 786 |
+
return cultural_templates[cultural_context]
|
| 787 |
+
|
| 788 |
+
# 備用模板
|
| 789 |
+
return {
|
| 790 |
+
"elements": ["cultural elements"],
|
| 791 |
+
"description": f"The scene displays {cultural_context} cultural characteristics."
|
| 792 |
+
}
|
| 793 |
+
|
| 794 |
+
except Exception as e:
|
| 795 |
+
self.logger.warning(f"Error getting cultural template for '{cultural_context}': {str(e)}")
|
| 796 |
+
return {
|
| 797 |
+
"elements": ["various elements"],
|
| 798 |
+
"description": "The scene displays cultural characteristics."
|
| 799 |
+
}
|
| 800 |
+
|
| 801 |
+
def get_scene_detail_templates(self, scene_type: str, viewpoint: Optional[str] = None) -> List[str]:
|
| 802 |
+
"""
|
| 803 |
+
獲取場景詳細描述模板
|
| 804 |
+
|
| 805 |
+
Args:
|
| 806 |
+
scene_type: 場景類型
|
| 807 |
+
viewpoint: 可選的視角類型
|
| 808 |
+
|
| 809 |
+
Returns:
|
| 810 |
+
List[str]: 場景描述模板列表
|
| 811 |
+
"""
|
| 812 |
+
try:
|
| 813 |
+
scene_templates = self.templates.get("scene_detail_templates", {})
|
| 814 |
+
|
| 815 |
+
# 首先嘗試獲取特定視角的模板
|
| 816 |
+
if viewpoint:
|
| 817 |
+
viewpoint_key = f"{scene_type}_{viewpoint}"
|
| 818 |
+
if viewpoint_key in scene_templates:
|
| 819 |
+
return scene_templates[viewpoint_key]
|
| 820 |
+
|
| 821 |
+
# 然後嘗試獲取場景類型的通用模板
|
| 822 |
+
if scene_type in scene_templates:
|
| 823 |
+
return scene_templates[scene_type]
|
| 824 |
+
|
| 825 |
+
# 最後使用默認模板
|
| 826 |
+
if "default" in scene_templates:
|
| 827 |
+
return scene_templates["default"]
|
| 828 |
+
|
| 829 |
+
# 備用模板
|
| 830 |
+
return ["A scene with various elements and objects."]
|
| 831 |
+
|
| 832 |
+
except Exception as e:
|
| 833 |
+
self.logger.warning(f"Error getting scene detail templates for '{scene_type}': {str(e)}")
|
| 834 |
+
return ["A scene with various elements and objects."]
|
text_optimizer.py
ADDED
|
@@ -0,0 +1,616 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import logging
|
| 3 |
+
from typing import Dict, List, Optional, Any, Tuple
|
| 4 |
+
|
| 5 |
+
class TextOptimizer:
|
| 6 |
+
"""
|
| 7 |
+
文本優化器 - 專門處理文本格式化、清理和優化
|
| 8 |
+
負責物件列表格式化、重複移除、複數形式處理以及描述文本的優化
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
def __init__(self):
|
| 12 |
+
"""初始化文本優化器"""
|
| 13 |
+
self.logger = logging.getLogger(self.__class__.__name__)
|
| 14 |
+
|
| 15 |
+
def format_object_list_for_description(self,
|
| 16 |
+
objects: List[Dict],
|
| 17 |
+
use_indefinite_article_for_one: bool = False,
|
| 18 |
+
count_threshold_for_generalization: int = -1,
|
| 19 |
+
max_types_to_list: int = 5) -> str:
|
| 20 |
+
"""
|
| 21 |
+
將物件列表格式化為人類可讀的字符串,包含總計數字
|
| 22 |
+
|
| 23 |
+
Args:
|
| 24 |
+
objects: 物件字典列表,每個應包含 'class_name'
|
| 25 |
+
use_indefinite_article_for_one: 單個物件是否使用 "a/an",否則使用 "one"
|
| 26 |
+
count_threshold_for_generalization: 超過此計數時使用通用術語,-1表示精確計數
|
| 27 |
+
max_types_to_list: 列表中包含的不同物件類型最大數量
|
| 28 |
+
|
| 29 |
+
Returns:
|
| 30 |
+
str: 格式化的物件描述字符串
|
| 31 |
+
"""
|
| 32 |
+
try:
|
| 33 |
+
if not objects:
|
| 34 |
+
return "no specific objects clearly identified"
|
| 35 |
+
|
| 36 |
+
counts: Dict[str, int] = {}
|
| 37 |
+
for obj in objects:
|
| 38 |
+
name = obj.get("class_name", "unknown object")
|
| 39 |
+
if name == "unknown object" or not name:
|
| 40 |
+
continue
|
| 41 |
+
counts[name] = counts.get(name, 0) + 1
|
| 42 |
+
|
| 43 |
+
if not counts:
|
| 44 |
+
return "no specific objects clearly identified"
|
| 45 |
+
|
| 46 |
+
descriptions = []
|
| 47 |
+
# 按計數降序然後按名稱升序排序,限制物件類型數量
|
| 48 |
+
sorted_counts = sorted(counts.items(), key=lambda item: (-item[1], item[0]))[:max_types_to_list]
|
| 49 |
+
|
| 50 |
+
for name, count in sorted_counts:
|
| 51 |
+
if count == 1:
|
| 52 |
+
if use_indefinite_article_for_one:
|
| 53 |
+
if name[0].lower() in 'aeiou':
|
| 54 |
+
descriptions.append(f"an {name}")
|
| 55 |
+
else:
|
| 56 |
+
descriptions.append(f"a {name}")
|
| 57 |
+
else:
|
| 58 |
+
descriptions.append(f"one {name}")
|
| 59 |
+
else:
|
| 60 |
+
# 處理複數形式
|
| 61 |
+
plural_name = self._get_plural_form(name)
|
| 62 |
+
|
| 63 |
+
if count_threshold_for_generalization != -1 and count > count_threshold_for_generalization:
|
| 64 |
+
if count <= count_threshold_for_generalization + 3:
|
| 65 |
+
descriptions.append(f"several {plural_name}")
|
| 66 |
+
else:
|
| 67 |
+
descriptions.append(f"many {plural_name}")
|
| 68 |
+
else:
|
| 69 |
+
descriptions.append(f"{count} {plural_name}")
|
| 70 |
+
|
| 71 |
+
if not descriptions:
|
| 72 |
+
return "no specific objects clearly identified"
|
| 73 |
+
|
| 74 |
+
if len(descriptions) == 1:
|
| 75 |
+
return descriptions[0]
|
| 76 |
+
elif len(descriptions) == 2:
|
| 77 |
+
return f"{descriptions[0]} and {descriptions[1]}"
|
| 78 |
+
else:
|
| 79 |
+
# 使用牛津逗號格式
|
| 80 |
+
return ", ".join(descriptions[:-1]) + f", and {descriptions[-1]}"
|
| 81 |
+
|
| 82 |
+
except Exception as e:
|
| 83 |
+
self.logger.warning(f"Error formatting object list: {str(e)}")
|
| 84 |
+
return "various objects"
|
| 85 |
+
|
| 86 |
+
def optimize_object_description(self, description: str) -> str:
|
| 87 |
+
"""
|
| 88 |
+
優化物件描述文本,消除多餘重複並改善表達流暢度
|
| 89 |
+
|
| 90 |
+
這個函數是後處理階段的關鍵組件,負責清理和精簡自然語言生成系統
|
| 91 |
+
產出的描述文字。它專門處理常見的重複問題,如相同物件的重複
|
| 92 |
+
列舉和冗餘的空間描述,讓最終的描述更簡潔自然。
|
| 93 |
+
|
| 94 |
+
Args:
|
| 95 |
+
description: 原始的場景描述文本,可能包含重複或冗餘的表達
|
| 96 |
+
|
| 97 |
+
Returns:
|
| 98 |
+
str: 經過優化清理的描述文本,如果處理失敗則返回原始文本
|
| 99 |
+
"""
|
| 100 |
+
try:
|
| 101 |
+
# 1. 處理多餘的空間限定表達
|
| 102 |
+
# 使用通用模式來識別和移除不必要的空間描述
|
| 103 |
+
description = self._remove_redundant_spatial_qualifiers(description)
|
| 104 |
+
|
| 105 |
+
# 2. 辨識並處理物件列表的重複問題
|
| 106 |
+
# 尋找形如 "with X, Y, Z" 或 "with X and Y" 的物件列表
|
| 107 |
+
object_lists = re.findall(r'with ([^.]+?)(?=\.|$)', description)
|
| 108 |
+
|
| 109 |
+
# 遍歷每個找到的物件列表進行重複檢測和優化
|
| 110 |
+
for obj_list in object_lists:
|
| 111 |
+
# 3. 解析單個物件列表中的項目
|
| 112 |
+
all_items = self._parse_object_list_items(obj_list)
|
| 113 |
+
|
| 114 |
+
# 4. 統計物件出現頻���
|
| 115 |
+
item_counts = self._count_object_items(all_items)
|
| 116 |
+
|
| 117 |
+
# 5. 生成優化後的物件列表
|
| 118 |
+
if item_counts:
|
| 119 |
+
new_items = self._generate_optimized_item_list(item_counts)
|
| 120 |
+
new_list = self._format_item_list(new_items)
|
| 121 |
+
description = description.replace(obj_list, new_list)
|
| 122 |
+
|
| 123 |
+
return description
|
| 124 |
+
|
| 125 |
+
except Exception as e:
|
| 126 |
+
self.logger.warning(f"Error optimizing object description: {str(e)}")
|
| 127 |
+
return description
|
| 128 |
+
|
| 129 |
+
def remove_repetitive_descriptors(self, description: str) -> str:
|
| 130 |
+
"""
|
| 131 |
+
移除描述中的重複性和不適當的描述詞彙,特別是 "identical" 等詞彙
|
| 132 |
+
|
| 133 |
+
Args:
|
| 134 |
+
description: 原始描述文本
|
| 135 |
+
|
| 136 |
+
Returns:
|
| 137 |
+
str: 清理後的描述文本
|
| 138 |
+
"""
|
| 139 |
+
try:
|
| 140 |
+
# 定義需要移除或替換的模式
|
| 141 |
+
cleanup_patterns = [
|
| 142 |
+
# 移除 "identical" 描述模式
|
| 143 |
+
(r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
| 144 |
+
(r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
| 145 |
+
(r'\bidentical\s+([a-zA-Z\s]+)', r'\1'),
|
| 146 |
+
|
| 147 |
+
# 改善 "comprehensive arrangement" 等過於技術性的表達
|
| 148 |
+
(r'\bcomprehensive arrangement of\b', 'arrangement of'),
|
| 149 |
+
(r'\bcomprehensive view featuring\b', 'scene featuring'),
|
| 150 |
+
(r'\bcomprehensive display of\b', 'display of'),
|
| 151 |
+
|
| 152 |
+
# 簡化過度描述性的短語
|
| 153 |
+
(r'\bpositioning around\s+(\d+)\s+identical\b', r'positioning around \1'),
|
| 154 |
+
(r'\barranged around\s+(\d+)\s+identical\b', r'arranged around \1'),
|
| 155 |
+
]
|
| 156 |
+
|
| 157 |
+
processed_description = description
|
| 158 |
+
for pattern, replacement in cleanup_patterns:
|
| 159 |
+
processed_description = re.sub(pattern, replacement, processed_description, flags=re.IGNORECASE)
|
| 160 |
+
|
| 161 |
+
# 進一步清理可能的多餘空格
|
| 162 |
+
processed_description = re.sub(r'\s+', ' ', processed_description).strip()
|
| 163 |
+
|
| 164 |
+
self.logger.debug(f"Cleaned description: removed repetitive descriptors")
|
| 165 |
+
return processed_description
|
| 166 |
+
|
| 167 |
+
except Exception as e:
|
| 168 |
+
self.logger.warning(f"Error removing repetitive descriptors: {str(e)}")
|
| 169 |
+
return description
|
| 170 |
+
|
| 171 |
+
def format_object_count_description(self, class_name: str, count: int,
|
| 172 |
+
scene_type: Optional[str] = None,
|
| 173 |
+
detected_objects: Optional[List[Dict]] = None,
|
| 174 |
+
avg_confidence: float = 0.0) -> str:
|
| 175 |
+
"""
|
| 176 |
+
格式化物件數量描述的核心方法,整合空間排列、材質推斷和場景語境
|
| 177 |
+
|
| 178 |
+
Args:
|
| 179 |
+
class_name: 標準化後的類別名稱
|
| 180 |
+
count: 物件數量
|
| 181 |
+
scene_type: 場景類型,用於語境化描述
|
| 182 |
+
detected_objects: 該類型的所有檢測物件,用於空間分析
|
| 183 |
+
avg_confidence: 平均檢測置信度,影響材質推斷的可信度
|
| 184 |
+
|
| 185 |
+
Returns:
|
| 186 |
+
str: 完整的格式化數量描述
|
| 187 |
+
"""
|
| 188 |
+
try:
|
| 189 |
+
if count <= 0:
|
| 190 |
+
return ""
|
| 191 |
+
|
| 192 |
+
# 獲取基礎的複數形式
|
| 193 |
+
plural_form = self._get_plural_form(class_name)
|
| 194 |
+
|
| 195 |
+
# 單數情況的處理
|
| 196 |
+
if count == 1:
|
| 197 |
+
return self._format_single_object_description(class_name, scene_type,
|
| 198 |
+
detected_objects, avg_confidence)
|
| 199 |
+
|
| 200 |
+
# 複數情況的處理
|
| 201 |
+
return self._format_multiple_objects_description(class_name, count, plural_form,
|
| 202 |
+
scene_type, detected_objects, avg_confidence)
|
| 203 |
+
|
| 204 |
+
except Exception as e:
|
| 205 |
+
self.logger.warning(f"Error formatting object count for '{class_name}': {str(e)}")
|
| 206 |
+
return f"{count} {class_name}s" if count > 1 else class_name
|
| 207 |
+
|
| 208 |
+
def normalize_object_class_name(self, class_name: str) -> str:
|
| 209 |
+
"""
|
| 210 |
+
標準化物件類別名稱,確保輸出自然語言格式
|
| 211 |
+
|
| 212 |
+
Args:
|
| 213 |
+
class_name: 原始類別名稱
|
| 214 |
+
|
| 215 |
+
Returns:
|
| 216 |
+
str: 標準化後的類別名稱
|
| 217 |
+
"""
|
| 218 |
+
try:
|
| 219 |
+
if not class_name or not isinstance(class_name, str):
|
| 220 |
+
return "object"
|
| 221 |
+
|
| 222 |
+
# 移除可能的技術性前綴或後綴
|
| 223 |
+
normalized = re.sub(r'^(class_|id_|type_)', '', class_name.lower())
|
| 224 |
+
normalized = re.sub(r'(_class|_id|_type)$', '', normalized)
|
| 225 |
+
|
| 226 |
+
# 將下劃線和連字符替換為空格
|
| 227 |
+
normalized = normalized.replace('_', ' ').replace('-', ' ')
|
| 228 |
+
|
| 229 |
+
# 移除多餘空格
|
| 230 |
+
normalized = ' '.join(normalized.split())
|
| 231 |
+
|
| 232 |
+
# 特殊類別名稱的標準化映射
|
| 233 |
+
class_name_mapping = {
|
| 234 |
+
'traffic light': 'traffic light',
|
| 235 |
+
'stop sign': 'stop sign',
|
| 236 |
+
'fire hydrant': 'fire hydrant',
|
| 237 |
+
'dining table': 'dining table',
|
| 238 |
+
'potted plant': 'potted plant',
|
| 239 |
+
'tv monitor': 'television',
|
| 240 |
+
'cell phone': 'mobile phone',
|
| 241 |
+
'wine glass': 'wine glass',
|
| 242 |
+
'hot dog': 'hot dog',
|
| 243 |
+
'teddy bear': 'teddy bear',
|
| 244 |
+
'hair drier': 'hair dryer',
|
| 245 |
+
'toothbrush': 'toothbrush'
|
| 246 |
+
}
|
| 247 |
+
|
| 248 |
+
return class_name_mapping.get(normalized, normalized)
|
| 249 |
+
|
| 250 |
+
except Exception as e:
|
| 251 |
+
self.logger.warning(f"Error normalizing class name '{class_name}': {str(e)}")
|
| 252 |
+
return class_name if isinstance(class_name, str) else "object"
|
| 253 |
+
|
| 254 |
+
def _remove_redundant_spatial_qualifiers(self, description: str) -> str:
|
| 255 |
+
"""
|
| 256 |
+
移除描述中冗餘的空間限定詞
|
| 257 |
+
|
| 258 |
+
Args:
|
| 259 |
+
description: 包含可能多餘空間描述的文本
|
| 260 |
+
|
| 261 |
+
Returns:
|
| 262 |
+
str: 移除多餘空間限定詞後的文本
|
| 263 |
+
"""
|
| 264 |
+
# 定義常見的多餘空間表達模式
|
| 265 |
+
redundant_patterns = [
|
| 266 |
+
# 室內物件的多餘房間描述
|
| 267 |
+
(r'\b(bed|sofa|couch|chair|table|desk|dresser|nightstand)\s+in\s+the\s+(room|bedroom|living\s+room)', r'\1'),
|
| 268 |
+
# 廚房物件的多餘描述
|
| 269 |
+
(r'\b(refrigerator|stove|oven|sink|microwave)\s+in\s+the\s+kitchen', r'\1'),
|
| 270 |
+
# 浴室物件的多餘描述
|
| 271 |
+
(r'\b(toilet|shower|bathtub|sink)\s+in\s+the\s+(bathroom|restroom)', r'\1'),
|
| 272 |
+
# 一般性的多餘表達:「在場景中」、「在圖片中」等
|
| 273 |
+
(r'\b([\w\s]+)\s+in\s+the\s+(scene|image|picture|frame)', r'\1'),
|
| 274 |
+
]
|
| 275 |
+
|
| 276 |
+
for pattern, replacement in redundant_patterns:
|
| 277 |
+
description = re.sub(pattern, replacement, description, flags=re.IGNORECASE)
|
| 278 |
+
|
| 279 |
+
return description
|
| 280 |
+
|
| 281 |
+
def _parse_object_list_items(self, obj_list: str) -> List[str]:
|
| 282 |
+
"""
|
| 283 |
+
解析物件列表中的項目
|
| 284 |
+
|
| 285 |
+
Args:
|
| 286 |
+
obj_list: 物件列表字符串
|
| 287 |
+
|
| 288 |
+
Returns:
|
| 289 |
+
List[str]: 解析後的項目列表
|
| 290 |
+
"""
|
| 291 |
+
# 先處理逗號格式 "A, B, and C"
|
| 292 |
+
if ", and " in obj_list:
|
| 293 |
+
before_last_and = obj_list.rsplit(", and ", 1)[0]
|
| 294 |
+
last_item = obj_list.rsplit(", and ", 1)[1]
|
| 295 |
+
front_items = [item.strip() for item in before_last_and.split(",")]
|
| 296 |
+
all_items = front_items + [last_item.strip()]
|
| 297 |
+
elif " and " in obj_list:
|
| 298 |
+
all_items = [item.strip() for item in obj_list.split(" and ")]
|
| 299 |
+
else:
|
| 300 |
+
all_items = [item.strip() for item in obj_list.split(",")]
|
| 301 |
+
|
| 302 |
+
return all_items
|
| 303 |
+
|
| 304 |
+
def _count_object_items(self, all_items: List[str]) -> Dict[str, int]:
|
| 305 |
+
"""
|
| 306 |
+
統計物件項目的出現次數
|
| 307 |
+
|
| 308 |
+
Args:
|
| 309 |
+
all_items: 所有項目列表
|
| 310 |
+
|
| 311 |
+
Returns:
|
| 312 |
+
Dict[str, int]: 項目計數字典
|
| 313 |
+
"""
|
| 314 |
+
item_counts = {}
|
| 315 |
+
|
| 316 |
+
for item in all_items:
|
| 317 |
+
item = item.strip()
|
| 318 |
+
if item and item not in ["and", "with", ""]:
|
| 319 |
+
clean_item = self._normalize_item_for_counting(item)
|
| 320 |
+
if clean_item not in item_counts:
|
| 321 |
+
item_counts[clean_item] = 0
|
| 322 |
+
item_counts[clean_item] += 1
|
| 323 |
+
|
| 324 |
+
return item_counts
|
| 325 |
+
|
| 326 |
+
def _generate_optimized_item_list(self, item_counts: Dict[str, int]) -> List[str]:
|
| 327 |
+
"""
|
| 328 |
+
生成優化後的項目列表
|
| 329 |
+
|
| 330 |
+
Args:
|
| 331 |
+
item_counts: 項目計數字典
|
| 332 |
+
|
| 333 |
+
Returns:
|
| 334 |
+
List[str]: 優化後的項目列表
|
| 335 |
+
"""
|
| 336 |
+
new_items = []
|
| 337 |
+
|
| 338 |
+
for item, count in item_counts.items():
|
| 339 |
+
if count > 1:
|
| 340 |
+
plural_item = self._make_plural(item)
|
| 341 |
+
new_items.append(f"{count} {plural_item}")
|
| 342 |
+
else:
|
| 343 |
+
new_items.append(item)
|
| 344 |
+
|
| 345 |
+
return new_items
|
| 346 |
+
|
| 347 |
+
def _format_item_list(self, new_items: List[str]) -> str:
|
| 348 |
+
"""
|
| 349 |
+
格式化項目列表為字符串
|
| 350 |
+
|
| 351 |
+
Args:
|
| 352 |
+
new_items: 新項目列表
|
| 353 |
+
|
| 354 |
+
Returns:
|
| 355 |
+
str: 格式化後的字符串
|
| 356 |
+
"""
|
| 357 |
+
if len(new_items) == 1:
|
| 358 |
+
return new_items[0]
|
| 359 |
+
elif len(new_items) == 2:
|
| 360 |
+
return f"{new_items[0]} and {new_items[1]}"
|
| 361 |
+
else:
|
| 362 |
+
return ", ".join(new_items[:-1]) + f", and {new_items[-1]}"
|
| 363 |
+
|
| 364 |
+
def _normalize_item_for_counting(self, item: str) -> str:
|
| 365 |
+
"""
|
| 366 |
+
正規化物件項目以便準確計數
|
| 367 |
+
|
| 368 |
+
Args:
|
| 369 |
+
item: 原始物件項目字串
|
| 370 |
+
|
| 371 |
+
Returns:
|
| 372 |
+
str: 正規化後的物件項目
|
| 373 |
+
"""
|
| 374 |
+
item = re.sub(r'^(a|an|the)\s+', '', item.lower())
|
| 375 |
+
return item.strip()
|
| 376 |
+
|
| 377 |
+
def _make_plural(self, item: str) -> str:
|
| 378 |
+
"""
|
| 379 |
+
將單數名詞轉換為複數形式
|
| 380 |
+
|
| 381 |
+
Args:
|
| 382 |
+
item: 單數形式的名詞
|
| 383 |
+
|
| 384 |
+
Returns:
|
| 385 |
+
str: 複數形式的名詞
|
| 386 |
+
"""
|
| 387 |
+
if item.endswith("y") and len(item) > 1 and item[-2].lower() not in 'aeiou':
|
| 388 |
+
return item[:-1] + "ies"
|
| 389 |
+
elif item.endswith(("s", "sh", "ch", "x", "z")):
|
| 390 |
+
return item + "es"
|
| 391 |
+
elif not item.endswith("s"):
|
| 392 |
+
return item + "s"
|
| 393 |
+
else:
|
| 394 |
+
return item
|
| 395 |
+
|
| 396 |
+
def _get_plural_form(self, word: str) -> str:
|
| 397 |
+
"""
|
| 398 |
+
獲取詞彙的複數形式
|
| 399 |
+
|
| 400 |
+
Args:
|
| 401 |
+
word: 單數詞彙
|
| 402 |
+
|
| 403 |
+
Returns:
|
| 404 |
+
str: 複數形式
|
| 405 |
+
"""
|
| 406 |
+
try:
|
| 407 |
+
# 特殊複數形式
|
| 408 |
+
irregular_plurals = {
|
| 409 |
+
'person': 'people',
|
| 410 |
+
'child': 'children',
|
| 411 |
+
'foot': 'feet',
|
| 412 |
+
'tooth': 'teeth',
|
| 413 |
+
'mouse': 'mice',
|
| 414 |
+
'man': 'men',
|
| 415 |
+
'woman': 'women'
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
if word.lower() in irregular_plurals:
|
| 419 |
+
return irregular_plurals[word.lower()]
|
| 420 |
+
|
| 421 |
+
# 規則複數形式
|
| 422 |
+
if word.endswith(('s', 'sh', 'ch', 'x', 'z')):
|
| 423 |
+
return word + 'es'
|
| 424 |
+
elif word.endswith('y') and word[-2] not in 'aeiou':
|
| 425 |
+
return word[:-1] + 'ies'
|
| 426 |
+
elif word.endswith('f'):
|
| 427 |
+
return word[:-1] + 'ves'
|
| 428 |
+
elif word.endswith('fe'):
|
| 429 |
+
return word[:-2] + 'ves'
|
| 430 |
+
else:
|
| 431 |
+
return word + 's'
|
| 432 |
+
|
| 433 |
+
except Exception as e:
|
| 434 |
+
self.logger.warning(f"Error getting plural form for '{word}': {str(e)}")
|
| 435 |
+
return word + 's'
|
| 436 |
+
|
| 437 |
+
def _format_single_object_description(self, class_name: str, scene_type: Optional[str],
|
| 438 |
+
detected_objects: Optional[List[Dict]],
|
| 439 |
+
avg_confidence: float) -> str:
|
| 440 |
+
"""
|
| 441 |
+
處理單個物件的描述生成
|
| 442 |
+
|
| 443 |
+
Args:
|
| 444 |
+
class_name: 物件類別名稱
|
| 445 |
+
scene_type: 場景類型
|
| 446 |
+
detected_objects: 檢測物件列表
|
| 447 |
+
avg_confidence: 平均置信度
|
| 448 |
+
|
| 449 |
+
Returns:
|
| 450 |
+
str: 單個物件的完整描述
|
| 451 |
+
"""
|
| 452 |
+
article = "an" if class_name[0].lower() in 'aeiou' else "a"
|
| 453 |
+
|
| 454 |
+
# 獲取材質描述符
|
| 455 |
+
material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
|
| 456 |
+
|
| 457 |
+
# 獲取位置或特徵描述符
|
| 458 |
+
feature_descriptor = self._get_single_object_feature(class_name, scene_type, detected_objects)
|
| 459 |
+
|
| 460 |
+
# 組合描述
|
| 461 |
+
descriptors = []
|
| 462 |
+
if material_descriptor:
|
| 463 |
+
descriptors.append(material_descriptor)
|
| 464 |
+
if feature_descriptor:
|
| 465 |
+
descriptors.append(feature_descriptor)
|
| 466 |
+
|
| 467 |
+
if descriptors:
|
| 468 |
+
return f"{article} {' '.join(descriptors)} {class_name}"
|
| 469 |
+
else:
|
| 470 |
+
return f"{article} {class_name}"
|
| 471 |
+
|
| 472 |
+
def _format_multiple_objects_description(self, class_name: str, count: int, plural_form: str,
|
| 473 |
+
scene_type: Optional[str], detected_objects: Optional[List[Dict]],
|
| 474 |
+
avg_confidence: float) -> str:
|
| 475 |
+
"""
|
| 476 |
+
處理多個物件的描述生成
|
| 477 |
+
|
| 478 |
+
Args:
|
| 479 |
+
class_name: 物件類別名稱
|
| 480 |
+
count: 物件數量
|
| 481 |
+
plural_form: 複數形式
|
| 482 |
+
scene_type: 場景類型
|
| 483 |
+
detected_objects: 檢測物件列表
|
| 484 |
+
avg_confidence: 平均置信度
|
| 485 |
+
|
| 486 |
+
Returns:
|
| 487 |
+
str: 多個物件的完整描述
|
| 488 |
+
"""
|
| 489 |
+
# 數字到文字的轉換映射
|
| 490 |
+
number_words = {
|
| 491 |
+
2: "two", 3: "three", 4: "four", 5: "five", 6: "six",
|
| 492 |
+
7: "seven", 8: "eight", 9: "nine", 10: "ten",
|
| 493 |
+
11: "eleven", 12: "twelve"
|
| 494 |
+
}
|
| 495 |
+
|
| 496 |
+
# 確定基礎數量表達
|
| 497 |
+
if count in number_words:
|
| 498 |
+
count_expression = number_words[count]
|
| 499 |
+
elif count <= 20:
|
| 500 |
+
count_expression = "several"
|
| 501 |
+
else:
|
| 502 |
+
count_expression = "numerous"
|
| 503 |
+
|
| 504 |
+
# 獲取材質或功能描述符
|
| 505 |
+
material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
|
| 506 |
+
|
| 507 |
+
# 構建基礎描述
|
| 508 |
+
descriptors = []
|
| 509 |
+
if material_descriptor:
|
| 510 |
+
descriptors.append(material_descriptor)
|
| 511 |
+
|
| 512 |
+
base_description = f"{count_expression} {' '.join(descriptors)} {plural_form}".strip()
|
| 513 |
+
return base_description
|
| 514 |
+
|
| 515 |
+
def _get_material_descriptor(self, class_name: str, scene_type: Optional[str],
|
| 516 |
+
avg_confidence: float) -> Optional[str]:
|
| 517 |
+
"""
|
| 518 |
+
基於場景語境和置信度進行材質推斷
|
| 519 |
+
|
| 520 |
+
Args:
|
| 521 |
+
class_name: 物件類別名稱
|
| 522 |
+
scene_type: 場景類型
|
| 523 |
+
avg_confidence: 檢測置信度
|
| 524 |
+
|
| 525 |
+
Returns:
|
| 526 |
+
Optional[str]: 材質描述符
|
| 527 |
+
"""
|
| 528 |
+
# 只有在置信度足夠高時才進行材質推斷
|
| 529 |
+
if avg_confidence < 0.5:
|
| 530 |
+
return None
|
| 531 |
+
|
| 532 |
+
# 餐廳和用餐相關場景
|
| 533 |
+
if scene_type and scene_type in ["dining_area", "restaurant", "upscale_dining", "cafe"]:
|
| 534 |
+
material_mapping = {
|
| 535 |
+
"chair": "wooden" if avg_confidence > 0.7 else None,
|
| 536 |
+
"dining table": "wooden",
|
| 537 |
+
"couch": "upholstered",
|
| 538 |
+
"vase": "decorative"
|
| 539 |
+
}
|
| 540 |
+
return material_mapping.get(class_name)
|
| 541 |
+
|
| 542 |
+
# 辦公場景
|
| 543 |
+
elif scene_type and scene_type in ["office_workspace", "meeting_room", "conference_room"]:
|
| 544 |
+
material_mapping = {
|
| 545 |
+
"chair": "office",
|
| 546 |
+
"dining table": "conference",
|
| 547 |
+
"laptop": "modern",
|
| 548 |
+
"book": "reference"
|
| 549 |
+
}
|
| 550 |
+
return material_mapping.get(class_name)
|
| 551 |
+
|
| 552 |
+
# 客廳場景
|
| 553 |
+
elif scene_type and scene_type in ["living_room"]:
|
| 554 |
+
material_mapping = {
|
| 555 |
+
"couch": "comfortable",
|
| 556 |
+
"chair": "accent",
|
| 557 |
+
"tv": "large",
|
| 558 |
+
"vase": "decorative"
|
| 559 |
+
}
|
| 560 |
+
return material_mapping.get(class_name)
|
| 561 |
+
|
| 562 |
+
# 室外場景
|
| 563 |
+
elif scene_type and scene_type in ["city_street", "park_area", "parking_lot"]:
|
| 564 |
+
material_mapping = {
|
| 565 |
+
"car": "parked",
|
| 566 |
+
"person": "walking",
|
| 567 |
+
"bicycle": "stationed"
|
| 568 |
+
}
|
| 569 |
+
return material_mapping.get(class_name)
|
| 570 |
+
|
| 571 |
+
# 如果沒有特定的場景映射,返回通用描述符
|
| 572 |
+
generic_mapping = {
|
| 573 |
+
"chair": "comfortable",
|
| 574 |
+
"dining table": "sturdy",
|
| 575 |
+
"car": "parked",
|
| 576 |
+
"person": "present"
|
| 577 |
+
}
|
| 578 |
+
|
| 579 |
+
return generic_mapping.get(class_name)
|
| 580 |
+
|
| 581 |
+
def _get_single_object_feature(self, class_name: str, scene_type: Optional[str],
|
| 582 |
+
detected_objects: Optional[List[Dict]]) -> Optional[str]:
|
| 583 |
+
"""
|
| 584 |
+
為單個物件生成特徵描述符
|
| 585 |
+
|
| 586 |
+
Args:
|
| 587 |
+
class_name: 物件類別名稱
|
| 588 |
+
scene_type: 場景類型
|
| 589 |
+
detected_objects: 檢測物件
|
| 590 |
+
|
| 591 |
+
Returns:
|
| 592 |
+
Optional[str]: 特徵描述符
|
| 593 |
+
"""
|
| 594 |
+
if not detected_objects or len(detected_objects) != 1:
|
| 595 |
+
return None
|
| 596 |
+
|
| 597 |
+
obj = detected_objects[0]
|
| 598 |
+
region = obj.get("region", "").lower()
|
| 599 |
+
|
| 600 |
+
# 基於位置的描述
|
| 601 |
+
if "center" in region:
|
| 602 |
+
if class_name == "dining table":
|
| 603 |
+
return "central"
|
| 604 |
+
elif class_name == "chair":
|
| 605 |
+
return "centrally placed"
|
| 606 |
+
elif "corner" in region or "left" in region or "right" in region:
|
| 607 |
+
return "positioned"
|
| 608 |
+
|
| 609 |
+
# 基於場景的功能描述
|
| 610 |
+
if scene_type and scene_type in ["dining_area", "restaurant"]:
|
| 611 |
+
if class_name == "chair":
|
| 612 |
+
return "dining"
|
| 613 |
+
elif class_name == "vase":
|
| 614 |
+
return "decorative"
|
| 615 |
+
|
| 616 |
+
return None
|