Spaces:

DawnC
/

VisionScout

Running on Zero

App Files Files Community

DawnC commited on Jun 12

Commit

12d9ea9

verified ·

1 Parent(s): 060f7fa

Upload 14 files

Browse files

Files changed (14) hide show

content_generator.py +569 -0
functional_zone_detector.py +298 -0
object_description_generator.py +76 -1191
object_group_processor.py +397 -0
pattern_analyzer.py +371 -0
prominence_calculator.py +147 -0
scene_zone_identifier.py +35 -1121
spatial_location_handler.py +346 -0
specialized_scene_processor.py +527 -0
statistics_processor.py +343 -0
template_manager.py +0 -0
template_processor.py +429 -0
template_repository.py +834 -0
text_optimizer.py +616 -0

content_generator.py ADDED Viewed

	@@ -0,0 +1,569 @@

+import logging
+import random
+import re
+from typing import Dict, List, Optional, Union, Any
+class ContentGenerator:
+    """
+    內容生成器 - 負責基礎內容生成和佔位符替換邏輯
+    此類別專門處理模板中的動態內容生成，包括物件摘要、
+    場景特定內容生成，以及提供默認的替換字典。
+    """
+    def __init__(self):
+        """初始化內容生成器"""
+        self.logger = logging.getLogger(self.__class__.__name__)
+        # 預載入默認替換內容
+        self.default_replacements = self._generate_default_replacements()
+        self.logger.debug("ContentGenerator initialized successfully")
+    def _generate_default_replacements(self) -> Dict[str, str]:
+        """
+        生成默認的模板替換內容
+        Returns:
+            Dict[str, str]: 默認替換內容字典
+        """
+        return {
+            # 場景介紹相關
+            "scene_introduction": "this scene",
+            "location_prefix": "this location",
+            "setting_description": "this setting",
+            "area_description": "this area",
+            "environment_description": "this environment",
+            "spatial_introduction": "this space",
+            # 室內相關
+            "furniture": "various furniture pieces",
+            "seating": "comfortable seating",
+            "electronics": "entertainment devices",
+            "bed_type": "a bed",
+            "bed_location": "room",
+            "bed_description": "sleeping arrangements",
+            "extras": "personal items",
+            "table_setup": "a dining table and chairs",
+            "table_description": "a dining surface",
+            "dining_items": "dining furniture and tableware",
+            "appliances": "kitchen appliances",
+            "kitchen_items": "cooking utensils and dishware",
+            "cooking_equipment": "cooking equipment",
+            "office_equipment": "work-related furniture and devices",
+            "desk_setup": "a desk and chair",
+            "computer_equipment": "electronic devices",
+            # 室外/城市相關
+            "traffic_description": "vehicles and pedestrians",
+            "people_and_vehicles": "people and various vehicles",
+            "street_elements": "urban infrastructure",
+            "park_features": "benches and greenery",
+            "outdoor_elements": "natural features",
+            "park_description": "outdoor amenities",
+            "store_elements": "merchandise displays",
+            "shopping_activity": "customers browse and shop",
+            "store_items": "products for sale",
+            # 高級餐廳相關
+            "design_elements": "elegant decor",
+            "lighting": "stylish lighting fixtures",
+            # 亞洲商業街相關
+            "storefront_features": "compact shops",
+            "pedestrian_flow": "people walking",
+            "asian_elements": "distinctive cultural elements",
+            "cultural_elements": "traditional design features",
+            "signage": "colorful signs",
+            "street_activities": "busy urban activity",
+            # 金融區相關
+            "buildings": "tall buildings",
+            "traffic_elements": "vehicles",
+            "skyscrapers": "high-rise buildings",
+            "road_features": "wide streets",
+            "architectural_elements": "modern architecture",
+            "city_landmarks": "prominent structures",
+            # 十字路口相關
+            "crossing_pattern": "clearly marked pedestrian crossings",
+            "pedestrian_behavior": "careful pedestrian movement",
+            "pedestrian_density": "multiple groups of pedestrians",
+            "traffic_pattern": "well-regulated traffic flow",
+            "pedestrian_flow": "steady pedestrian movement",
+            "traffic_description": "active urban traffic",
+            "people_and_vehicles": "pedestrians and vehicles",
+            "street_elements": "urban infrastructure elements",
+            # 交通相關
+            "transit_vehicles": "public transportation vehicles",
+            "passenger_activity": "commuter movement",
+            "transportation_modes": "various transit options",
+            "passenger_needs": "waiting areas",
+            "transit_infrastructure": "transit facilities",
+            "passenger_movement": "commuter flow",
+            # 購物區相關
+            "retail_elements": "shops and displays",
+            "store_types": "various retail establishments",
+            "walkway_features": "pedestrian pathways",
+            "commercial_signage": "store signs",
+            "consumer_behavior": "shopping activities",
+            # 空中視角相關
+            "commercial_layout": "organized retail areas",
+            "pedestrian_pattern": "people movement patterns",
+            "gathering_features": "public gathering spaces",
+            "movement_pattern": "crowd flow patterns",
+            "urban_elements": "city infrastructure",
+            "public_activity": "social interaction",
+            # 文化特定元素
+            "stall_elements": "vendor booths",
+            "lighting_features": "decorative lights",
+            "food_elements": "food offerings",
+            "vendor_stalls": "market stalls",
+            "nighttime_activity": "evening commerce",
+            "cultural_lighting": "traditional lighting",
+            "night_market_sounds": "lively market sounds",
+            "evening_crowd_behavior": "nighttime social activity",
+            "architectural_elements": "cultural buildings",
+            "religious_structures": "sacred buildings",
+            "decorative_features": "ornamental designs",
+            "cultural_practices": "traditional activities",
+            "temple_architecture": "religious structures",
+            "sensory_elements": "atmospheric elements",
+            "visitor_activities": "cultural experiences",
+            "ritual_activities": "ceremonial practices",
+            "cultural_symbols": "meaningful symbols",
+            "architectural_style": "historical buildings",
+            "historic_elements": "traditional architecture",
+            "urban_design": "city planning elements",
+            "social_behaviors": "public interactions",
+            "european_features": "European architectural details",
+            "tourist_activities": "visitor activities",
+            "local_customs": "regional practices",
+            # 時間特定元素
+            "lighting_effects": "artificial lighting",
+            "shadow_patterns": "light and shadow",
+            "urban_features": "city elements",
+            "illuminated_elements": "lit structures",
+            "evening_activities": "nighttime activities",
+            "light_sources": "lighting points",
+            "lit_areas": "illuminated spaces",
+            "shadowed_zones": "darker areas",
+            "illuminated_signage": "bright signs",
+            "colorful_lighting": "multicolored lights",
+            "neon_elements": "neon signs",
+            "night_crowd_behavior": "evening social patterns",
+            "light_displays": "lighting installations",
+            "building_features": "architectural elements",
+            "nightlife_activities": "evening entertainment",
+            "lighting_modifier": "bright",
+            # 混合環境元素
+            "transitional_elements": "connecting features",
+            "indoor_features": "interior elements",
+            "outdoor_setting": "exterior spaces",
+            "interior_amenities": "inside comforts",
+            "exterior_features": "outside elements",
+            "inside_elements": "interior design",
+            "outside_spaces": "outdoor areas",
+            "dual_environment_benefits": "combined settings",
+            "passenger_activities": "waiting behaviors",
+            "transportation_types": "transit vehicles",
+            "sheltered_elements": "covered areas",
+            "exposed_areas": "open sections",
+            "waiting_behaviors": "passenger activities",
+            "indoor_facilities": "inside services",
+            "platform_features": "transit platform elements",
+            "transit_routines": "transportation procedures",
+            # 專門場所元素
+            "seating_arrangement": "spectator seating",
+            "playing_surface": "athletic field",
+            "sporting_activities": "sports events",
+            "spectator_facilities": "viewer accommodations",
+            "competition_space": "sports arena",
+            "sports_events": "athletic competitions",
+            "viewing_areas": "audience sections",
+            "field_elements": "field markings and equipment",
+            "game_activities": "competitive play",
+            "construction_equipment": "building machinery",
+            "building_materials": "construction supplies",
+            "construction_activities": "building work",
+            "work_elements": "construction tools",
+            "structural_components": "building structures",
+            "site_equipment": "construction gear",
+            "raw_materials": "building supplies",
+            "construction_process": "building phases",
+            "medical_elements": "healthcare equipment",
+            "clinical_activities": "medical procedures",
+            "facility_design": "healthcare layout",
+            "healthcare_features": "medical facilities",
+            "patient_interactions": "care activities",
+            "equipment_types": "medical devices",
+            "care_procedures": "health services",
+            "treatment_spaces": "clinical areas",
+            "educational_furniture": "learning furniture",
+            "learning_activities": "educational practices",
+            "instructional_design": "teaching layout",
+            "classroom_elements": "school equipment",
+            "teaching_methods": "educational approaches",
+            "student_engagement": "learning participation",
+            "learning_spaces": "educational areas",
+            "educational_tools": "teaching resources",
+            "knowledge_transfer": "learning exchanges"
+        }
+    def generate_objects_summary(self, detected_objects: List[Dict]) -> str:
+        """
+        基於檢測物件生成自然語言摘要，按重要性排序
+        Args:
+            detected_objects: 檢測到的物件列表
+        Returns:
+            str: 物件摘要描述
+        """
+        try:
+            # detected_objects 裡有幾個 traffic light)
+            tl_count = len([obj for obj in detected_objects if obj.get("class_name","") == "traffic light"])
+            # print(f"[DEBUG] _generate_objects_summary 傳入的 detected_objects 中 traffic light: {tl_count} 個")
+            for obj in detected_objects:
+                if obj.get("class_name","") == "traffic light":
+                    print(f"    - conf={obj.get('confidence',0):.4f}, bbox={obj.get('bbox')}, region={obj.get('region')}")
+            if not detected_objects:
+                return "various elements"
+            # 計算物件統計
+            object_counts = {}
+            total_confidence = 0
+            for obj in detected_objects:
+                class_name = obj.get("class_name", "unknown")
+                confidence = obj.get("confidence", 0.5)
+                if class_name not in object_counts:
+                    object_counts[class_name] = {"count": 0, "total_confidence": 0}
+                object_counts[class_name]["count"] += 1
+                object_counts[class_name]["total_confidence"] += confidence
+                total_confidence += confidence
+            # 計算平均置信度並排序
+            sorted_objects = []
+            for class_name, stats in object_counts.items():
+                avg_confidence = stats["total_confidence"] / stats["count"]
+                count = stats["count"]
+                # 重要性評分：結合數量和置信度
+                importance_score = (count * 0.6) + (avg_confidence * 0.4)
+                sorted_objects.append((class_name, count, importance_score))
+            # 按重要性排序，取前5個最重要的物件
+            sorted_objects.sort(key=lambda x: x[2], reverse=True)
+            top_objects = sorted_objects[:5]
+            # 生成自然語言描述
+            descriptions = []
+            for class_name, count, _ in top_objects:
+                clean_name = class_name.replace('_', ' ')
+                if count == 1:
+                    article = "an" if clean_name[0].lower() in 'aeiou' else "a"
+                    descriptions.append(f"{article} {clean_name}")
+                else:
+                    descriptions.append(f"{count} {clean_name}s")
+            # 組合描述
+            if len(descriptions) == 1:
+                return descriptions[0]
+            elif len(descriptions) == 2:
+                return f"{descriptions[0]} and {descriptions[1]}"
+            else:
+                return ", ".join(descriptions[:-1]) + f", and {descriptions[-1]}"
+        except Exception as e:
+            self.logger.warning(f"Error generating objects summary: {str(e)}")
+            return "various elements"
+    def get_placeholder_replacement(self, placeholder: str, fillers: Dict,
+                                   all_replacements: Dict, detected_objects: List[Dict],
+                                   scene_type: str) -> str:
+        """
+        獲取特定佔位符的替換內容，確保永遠不返回空值
+        Args:
+            placeholder: 佔位符名稱
+            fillers: 模板填充器字典
+            all_replacements: 所有替換內容字典
+            detected_objects: 檢測到的物體列表
+            scene_type: 場景類型
+        Returns:
+            str: 替換內容
+        """
+        try:
+            # 優先處理動態內容生成的佔位符
+            dynamic_placeholders = [
+                'primary_objects', 'detected_objects_summary', 'main_objects',
+                'functional_area', 'functional_zones_description', 'scene_elements'
+            ]
+            if placeholder in dynamic_placeholders:
+                dynamic_content = self.generate_objects_summary(detected_objects)
+                if dynamic_content and dynamic_content.strip():
+                    return dynamic_content.strip()
+            # 檢查預定義替換內容
+            if placeholder in all_replacements:
+                replacement = all_replacements[placeholder]
+                if replacement and replacement.strip():
+                    return replacement.strip()
+            # 檢查物體模板填充器
+            if placeholder in fillers:
+                options = fillers[placeholder]
+                if options and isinstance(options, list):
+                    valid_options = [opt.strip() for opt in options if opt and str(opt).strip()]
+                    if valid_options:
+                        num_items = min(len(valid_options), random.randint(1, 3))
+                        selected_items = random.sample(valid_options, num_items)
+                        if len(selected_items) == 1:
+                            return selected_items[0]
+                        elif len(selected_items) == 2:
+                            return f"{selected_items[0]} and {selected_items[1]}"
+                        else:
+                            return ", ".join(selected_items[:-1]) + f", and {selected_items[-1]}"
+            # 基於檢測對象生成動態內容
+            scene_specific_replacement = self.generate_scene_specific_content(
+                placeholder, detected_objects, scene_type
+            )
+            if scene_specific_replacement and scene_specific_replacement.strip():
+                return scene_specific_replacement.strip()
+            # 通用備用字典
+            fallback_replacements = {
+                # 交通和城市相關
+                "crossing_pattern": "pedestrian crosswalks",
+                "pedestrian_behavior": "people moving carefully",
+                "traffic_pattern": "vehicle movement",
+                "urban_elements": "city infrastructure",
+                "street_elements": "urban features",
+                "intersection_features": "traffic management systems",
+                "pedestrian_density": "groups of people",
+                "pedestrian_flow": "pedestrian movement",
+                "traffic_description": "vehicle traffic",
+                "people_and_vehicles": "pedestrians and cars",
+                # 場景設置相關
+                "scene_setting": "this urban environment",
+                "location_context": "the area",
+                "spatial_context": "the scene",
+                "environmental_context": "this location",
+                # 常見的家具和設備
+                "furniture": "various furniture pieces",
+                "seating": "seating arrangements",
+                "electronics": "electronic devices",
+                "appliances": "household appliances",
+                # 活動和行為
+                "activities": "various activities",
+                "interactions": "people interacting",
+                "movement": "movement patterns",
+                # 照明和氛圍
+                "lighting_conditions": "ambient lighting",
+                "atmosphere": "the overall atmosphere",
+                "ambiance": "environmental ambiance",
+                # 空間描述
+                "spatial_arrangement": "spatial organization",
+                "layout": "the layout",
+                "composition": "visual composition",
+                # 物體和元素
+                "objects": "various objects",
+                "elements": "scene elements",
+                "features": "notable features",
+                "details": "observable details"
+            }
+            if placeholder in fallback_replacements:
+                return fallback_replacements[placeholder]
+            # 基於場景類型的智能默認值
+            scene_based_defaults = self.get_scene_based_default(placeholder, scene_type)
+            if scene_based_defaults:
+                return scene_based_defaults
+            # 最終備用：將下劃線轉換為有意義的短語
+            cleaned_placeholder = placeholder.replace('_', ' ')
+            # 對常見模式提供更好的默認值
+            if placeholder.endswith('_pattern'):
+                return f"{cleaned_placeholder.replace(' pattern', '')} arrangement"
+            elif placeholder.endswith('_behavior'):
+                return f"{cleaned_placeholder.replace(' behavior', '')} activity"
+            elif placeholder.endswith('_description'):
+                return f"{cleaned_placeholder.replace(' description', '')} elements"
+            elif placeholder.endswith('_elements'):
+                return cleaned_placeholder
+            elif placeholder.endswith('_features'):
+                return cleaned_placeholder
+            else:
+                return cleaned_placeholder if cleaned_placeholder != placeholder else "various elements"
+        except Exception as e:
+            self.logger.warning(f"Error getting replacement for placeholder '{placeholder}': {str(e)}")
+            # 確保即使在異常情況下也返回有意義的內容
+            return placeholder.replace('_', ' ') if placeholder else "scene elements"
+    def get_scene_based_default(self, placeholder: str, scene_type: str) -> Optional[str]:
+        """
+        基於場景類型提供智能默認值
+        Args:
+            placeholder: 佔位符名稱
+            scene_type: 場景類型
+        Returns:
+            Optional[str]: 場景特定的默認值或None
+        """
+        try:
+            # 針對不同場景類型的特定默認值
+            scene_defaults = {
+                "urban_intersection": {
+                    "crossing_pattern": "marked crosswalks",
+                    "pedestrian_behavior": "pedestrians crossing carefully",
+                    "traffic_pattern": "controlled traffic flow"
+                },
+                "city_street": {
+                    "traffic_description": "urban vehicle traffic",
+                    "street_elements": "city infrastructure",
+                    "people_and_vehicles": "pedestrians and vehicles"
+                },
+                "living_room": {
+                    "furniture": "comfortable living room furniture",
+                    "seating": "sofas and chairs",
+                    "electronics": "entertainment equipment"
+                },
+                "kitchen": {
+                    "appliances": "kitchen appliances",
+                    "cooking_equipment": "cooking tools and equipment"
+                },
+                "office_workspace": {
+                    "office_equipment": "work furniture and devices",
+                    "desk_setup": "desk and office chair"
+                }
+            }
+            if scene_type in scene_defaults and placeholder in scene_defaults[scene_type]:
+                return scene_defaults[scene_type][placeholder]
+            return None
+        except Exception as e:
+            self.logger.warning(f"Error getting scene-based default for '{placeholder}' in '{scene_type}': {str(e)}")
+            return None
+    def generate_scene_specific_content(self, placeholder: str, detected_objects: List[Dict],
+                                       scene_type: str) -> Optional[str]:
+        """
+        基於場景特定邏輯生成佔位符內容
+        Args:
+            placeholder: 佔位符名稱
+            detected_objects: 檢測到的物體列表
+            scene_type: 場景類型
+        Returns:
+            Optional[str]: 生成的內容或None
+        """
+        try:
+            if placeholder == "furniture":
+                # 提取家具物品
+                furniture_ids = [56, 57, 58, 59, 60, 61]  # 家具類別ID
+                furniture_objects = [obj for obj in detected_objects if obj.get("class_id") in furniture_ids]
+                if furniture_objects:
+                    furniture_names = [obj.get("class_name", "furniture") for obj in furniture_objects[:3]]
+                    unique_names = list(set(furniture_names))
+                    return ", ".join(unique_names) if len(unique_names) > 1 else unique_names[0]
+                return "various furniture items"
+            elif placeholder == "electronics":
+                # 提取電子設備
+                electronics_ids = [62, 63, 64, 65, 66, 67, 68, 69, 70]  # 電子設備類別ID
+                electronics_objects = [obj for obj in detected_objects if obj.get("class_id") in electronics_ids]
+                if electronics_objects:
+                    electronics_names = [obj.get("class_name", "electronic device") for obj in electronics_objects[:3]]
+                    unique_names = list(set(electronics_names))
+                    return ", ".join(unique_names) if len(unique_names) > 1 else unique_names[0]
+                return "electronic devices"
+            elif placeholder == "people_count":
+                # 計算人數
+                people_count = len([obj for obj in detected_objects if obj.get("class_id") == 0])
+                if people_count == 0:
+                    return "no people"
+                elif people_count == 1:
+                    return "one person"
+                elif people_count < 5:
+                    return f"{people_count} people"
+                else:
+                    return "several people"
+            elif placeholder == "seating":
+                # 提取座位物品
+                seating_ids = [56, 57]  # chair, sofa
+                seating_objects = [obj for obj in detected_objects if obj.get("class_id") in seating_ids]
+                if seating_objects:
+                    seating_names = [obj.get("class_name", "seating") for obj in seating_objects[:2]]
+                    unique_names = list(set(seating_names))
+                    return ", ".join(unique_names) if len(unique_names) > 1 else unique_names[0]
+                return "seating arrangements"
+            # 如果沒有匹配的特定邏輯，返回None
+            return None
+        except Exception as e:
+            self.logger.warning(f"Error generating scene-specific content for '{placeholder}': {str(e)}")
+            return None
+    def get_emergency_replacement(self, placeholder: str) -> str:
+        """
+        獲取緊急替換值，確保不會產生語法錯誤
+        Args:
+            placeholder: 佔位符名稱
+        Returns:
+            str: 安全的替換值
+        """
+        emergency_replacements = {
+            "crossing_pattern": "pedestrian walkways",
+            "pedestrian_behavior": "people moving through the area",
+            "traffic_pattern": "vehicle movement",
+            "scene_setting": "this location",
+            "urban_elements": "city features",
+            "street_elements": "urban components"
+        }
+        if placeholder in emergency_replacements:
+            return emergency_replacements[placeholder]
+        # 基於佔位符名稱生成合理的替換
+        cleaned = placeholder.replace('_', ' ')
+        if len(cleaned.split()) > 1:
+            return cleaned
+        else:
+            return f"various {cleaned}"

functional_zone_detector.py ADDED Viewed

	@@ -0,0 +1,298 @@

+import logging
+import traceback
+from typing import Dict, List, Any, Optional
+logger = logging.getLogger(__name__)
+class FunctionalZoneDetector:
+    """
+    負責基於物件關聯性的功能區域識別
+    處理物件組合分析和描述性區域命名
+    """
+    def __init__(self):
+        """初始化功能區域檢測器"""
+        try:
+            logger.info("FunctionalZoneDetector initialized successfully")
+        except Exception as e:
+            logger.error(f"Failed to initialize FunctionalZoneDetector: {str(e)}")
+            logger.error(traceback.format_exc())
+            raise
+    def identify_primary_functional_area(self, detected_objects: List[Dict]) -> Dict:
+        """
+        識別主要功能區域，基於最強的物件關聯性組合
+        採用通用邏輯處理各種室內場景
+        Args:
+            detected_objects: 檢測到的物件列表
+        Returns:
+            主要功能區域字典或None
+        """
+        try:
+            # 用餐區域檢測（桌椅組合）
+            dining_area = self.detect_functional_combination(
+                detected_objects,
+                primary_objects=[60],  # dining table
+                supporting_objects=[56, 40, 41, 42, 43],  # chair, wine glass, cup, fork, knife
+                min_supporting=2,
+                description_template="Dining area with table and seating arrangement"
+            )
+            if dining_area:
+                return dining_area
+            # 休息區域檢測（沙發電視組合或床）
+            seating_area = self.detect_functional_combination(
+                detected_objects,
+                primary_objects=[57, 59],  # sofa, bed
+                supporting_objects=[62, 58, 56],  # tv, potted plant, chair
+                min_supporting=1,
+                description_template="Seating and relaxation area"
+            )
+            if seating_area:
+                return seating_area
+            # 工作區域檢測（電子設備與家具組合）
+            work_area = self.detect_functional_combination(
+                detected_objects,
+                primary_objects=[63, 66],  # laptop, keyboard
+                supporting_objects=[60, 56, 64],  # dining table, chair, mouse
+                min_supporting=2,
+                description_template="Workspace area with electronics and furniture"
+            )
+            if work_area:
+                return work_area
+            return None
+        except Exception as e:
+            logger.error(f"Error identifying primary functional area: {str(e)}")
+            logger.error(traceback.format_exc())
+            return None
+    def identify_secondary_functional_area(self, detected_objects: List[Dict], existing_zones: Dict) -> Dict:
+        """
+        識別次要功能區域，避免與主要區域重疊
+        Args:
+            detected_objects: 檢測到的物件列表
+            existing_zones: 已存在的功能區域
+        Returns:
+            次要功能區域字典或None
+        """
+        try:
+            # 獲取已使用的區域
+            used_regions = set(zone.get("region") for zone in existing_zones.values())
+            # 裝飾區域檢測（植物集中區域）
+            decorative_area = self.detect_functional_combination(
+                detected_objects,
+                primary_objects=[58],  # potted plant
+                supporting_objects=[75],  # vase
+                min_supporting=0,
+                min_primary=3,  # 至少需要3個植物
+                description_template="Decorative area with plants and ornamental items",
+                exclude_regions=used_regions
+            )
+            if decorative_area:
+                return decorative_area
+            # 儲存區域檢測（廚房電器組合）
+            storage_area = self.detect_functional_combination(
+                detected_objects,
+                primary_objects=[72, 68, 69],  # refrigerator, microwave, oven
+                supporting_objects=[71],  # sink
+                min_supporting=0,
+                min_primary=2,
+                description_template="Kitchen appliance and storage area",
+                exclude_regions=used_regions
+            )
+            if storage_area:
+                return storage_area
+            return None
+        except Exception as e:
+            logger.error(f"Error identifying secondary functional area: {str(e)}")
+            logger.error(traceback.format_exc())
+            return None
+    def detect_functional_combination(self, detected_objects: List[Dict], primary_objects: List[int],
+                                    supporting_objects: List[int], min_supporting: int,
+                                    description_template: str, min_primary: int = 1,
+                                    exclude_regions: set = None) -> Dict:
+        """
+        通用的功能組合檢測方法
+        基於主要物件和支持物件的組合判斷��能區域
+        Args:
+            detected_objects: 檢測到的物件列表
+            primary_objects: 主要物件的class_id列表
+            supporting_objects: 支持物件的class_id列表
+            min_supporting: 最少需要的支持物件數量
+            description_template: 描述模板
+            min_primary: 最少需要的主要物件數量
+            exclude_regions: 需要排除的區域集合
+        Returns:
+            功能區域資訊字典，如果不符合條件則返回None
+        """
+        try:
+            if exclude_regions is None:
+                exclude_regions = set()
+            # 收集主要物件
+            primary_objs = [obj for obj in detected_objects
+                        if obj.get("class_id") in primary_objects and obj.get("confidence", 0) >= 0.4]
+            # 收集支持物件
+            supporting_objs = [obj for obj in detected_objects
+                            if obj.get("class_id") in supporting_objects and obj.get("confidence", 0) >= 0.4]
+            # 檢查是否滿足最少數量要求
+            if len(primary_objs) < min_primary or len(supporting_objs) < min_supporting:
+                return None
+            # 按區域組織物件
+            region_combinations = {}
+            all_relevant_objs = primary_objs + supporting_objs
+            for obj in all_relevant_objs:
+                region = obj.get("region")
+                # 排除指定區域
+                if region in exclude_regions:
+                    continue
+                if region not in region_combinations:
+                    region_combinations[region] = {"primary": [], "supporting": [], "all": []}
+                region_combinations[region]["all"].append(obj)
+                if obj.get("class_id") in primary_objects:
+                    region_combinations[region]["primary"].append(obj)
+                else:
+                    region_combinations[region]["supporting"].append(obj)
+            # 找到最佳區域組合
+            best_region = None
+            best_score = 0
+            for region, objs in region_combinations.items():
+                # 計算該區域的評分
+                primary_count = len(objs["primary"])
+                supporting_count = len(objs["supporting"])
+                # 必須滿足最低要求
+                if primary_count < min_primary or supporting_count < min_supporting:
+                    continue
+                # 計算組合評分（主要物件權重較高）
+                score = primary_count * 2 + supporting_count
+                if score > best_score:
+                    best_score = score
+                    best_region = region
+            if best_region is None:
+                return None
+            best_combination = region_combinations[best_region]
+            all_objects = [obj["class_name"] for obj in best_combination["all"]]
+            return {
+                "region": best_region,
+                "objects": all_objects,
+                "description": description_template
+            }
+        except Exception as e:
+            logger.error(f"Error detecting functional combination: {str(e)}")
+            logger.error(traceback.format_exc())
+            return None
+    def generate_descriptive_zone_key_from_data(self, zone_data: Dict, priority_level: str) -> str:
+        """
+        基於區域與物品名產生一個比較有描述性的區域
+        Args:
+            zone_data: 區域數據字典
+            priority_level: 優先級別（primary/secondary）
+        Returns:
+            str: 描述性區域鍵名
+        """
+        try:
+            objects = zone_data.get("objects", [])
+            region = zone_data.get("region", "")
+            description = zone_data.get("description", "")
+            # 基於物件內容確定功能類型
+            if any("dining" in obj.lower() or "table" in obj.lower() for obj in objects):
+                base_name = "dining area"
+            elif any("chair" in obj.lower() or "sofa" in obj.lower() for obj in objects):
+                base_name = "seating area"
+            elif any("bed" in obj.lower() for obj in objects):
+                base_name = "sleeping area"
+            elif any("laptop" in obj.lower() or "keyboard" in obj.lower() for obj in objects):
+                base_name = "workspace area"
+            elif any("plant" in obj.lower() or "vase" in obj.lower() for obj in objects):
+                base_name = "decorative area"
+            elif any("refrigerator" in obj.lower() or "microwave" in obj.lower() for obj in objects):
+                base_name = "kitchen area"
+            else:
+                # 基於描述內容推斷
+                if "dining" in description.lower():
+                    base_name = "dining area"
+                elif "seating" in description.lower() or "relaxation" in description.lower():
+                    base_name = "seating area"
+                elif "work" in description.lower():
+                    base_name = "workspace area"
+                elif "decorative" in description.lower():
+                    base_name = "decorative area"
+                else:
+                    base_name = "functional area"
+            # 為次要區域添加位置標識以區分
+            if priority_level == "secondary" and region:
+                spatial_context = self.get_spatial_context_description(region)
+                if spatial_context:
+                    return f"{spatial_context} {base_name}"
+            return base_name
+        except Exception as e:
+            logger.warning(f"Error generating descriptive zone key: {str(e)}")
+            return "activity area"
+    def get_spatial_context_description(self, region: str) -> str:
+        """
+        獲取空間上下文描述
+        Args:
+            region: 區域位置標識
+        Returns:
+            str: 空間上下文描述
+        """
+        try:
+            spatial_mapping = {
+                "top_left": "upper left",
+                "top_center": "upper",
+                "top_right": "upper right",
+                "middle_left": "left side",
+                "middle_center": "central",
+                "middle_right": "right side",
+                "bottom_left": "lower left",
+                "bottom_center": "lower",
+                "bottom_right": "lower right"
+            }
+            return spatial_mapping.get(region, "")
+        except Exception as e:
+            logger.warning(f"Error getting spatial context for region '{region}': {str(e)}")
+            return ""

object_description_generator.py CHANGED Viewed

@@ -4,6 +4,11 @@ import traceback
 from typing import Dict, List, Tuple, Optional, Any
 import numpy as np
 class ObjectDescriptionError(Exception):
     """物件描述生成過程中的自定義異常"""
     pass
@@ -12,9 +17,12 @@ class ObjectDescriptionError(Exception):
 class ObjectDescriptionGenerator:
     """
     物件描述生成器 - 負責將檢測到的物件轉換為自然語言描述
     該類別處理物件相關的所有描述生成邏輯，包括重要物件的辨識、
     空間位置描述、物件列表格式化以及描述文本的優化。
     """
     def __init__(self,
@@ -31,6 +39,7 @@ class ObjectDescriptionGenerator:
             max_categories_to_return: 返回的物件類別最大數量
             max_total_objects: 返回的物件總數上限
             confidence_threshold_for_description: 用於描述的置信度閾值
         """
         self.logger = logging.getLogger(self.__class__.__name__)
@@ -40,6 +49,23 @@ class ObjectDescriptionGenerator:
         self.confidence_threshold_for_description = confidence_threshold_for_description
         self.region_analyzer = region_analyzer
         self.logger.info("ObjectDescriptionGenerator initialized with prominence_score=%.2f, "
                         "max_categories=%d, max_objects=%d, confidence_threshold=%.2f",
                         min_prominence_score, max_categories_to_return,
@@ -59,49 +85,11 @@ class ObjectDescriptionGenerator:
         Returns:
             List[Dict]: 按重要性排序的物件列表
         """
-        try:
-            if not detected_objects:
-                return []
-            prominent_objects = []
-            for obj in detected_objects:
-                # 計算重要性評分
-                prominence_score = self._calculate_prominence_score(obj)
-                # 只保留超過閾值的物件
-                if prominence_score >= min_prominence_score:
-                    obj_copy = obj.copy()
-                    obj_copy['prominence_score'] = prominence_score
-                    prominent_objects.append(obj_copy)
-            # 按重要性評分排序（從高到低）
-            prominent_objects.sort(key=lambda x: x.get('prominence_score', 0), reverse=True)
-            # 如果指定了最大類別數量限制，進行過濾
-            if max_categories_to_return is not None and max_categories_to_return > 0:
-                categories_seen = set()
-                filtered_objects = []
-                for obj in prominent_objects:
-                    class_name = obj.get("class_name", "unknown")
-                    # 如果是新類別且未達到限制
-                    if class_name not in categories_seen:
-                        if len(categories_seen) < max_categories_to_return:
-                            categories_seen.add(class_name)
-                            filtered_objects.append(obj)
-                    else:
-                        # 已見過的類別，直接添加
-                        filtered_objects.append(obj)
-                return filtered_objects
-            return prominent_objects
-        except Exception as e:
-            self.logger.error(f"Error calculating prominent objects: {str(e)}")
-            return []
     def set_region_analyzer(self, region_analyzer: Any) -> None:
         """
@@ -112,107 +100,11 @@ class ObjectDescriptionGenerator:
         """
         try:
             self.region_analyzer = region_analyzer
             self.logger.info("RegionAnalyzer instance set for ObjectDescriptionGenerator")
         except Exception as e:
             self.logger.warning(f"Error setting RegionAnalyzer: {str(e)}")
-    def _get_standardized_spatial_description(self, obj: Dict) -> str:
-        """
-        使用RegionAnalyzer生成標準化空間描述的內部方法
-        Args:
-            obj: 物件字典
-        Returns:
-            str: 標準化空間描述，失敗時返回空字串
-        """
-        try:
-            if hasattr(self, 'region_analyzer') and self.region_analyzer:
-                region = obj.get("region", "")
-                object_type = obj.get("class_name", "")
-                if hasattr(self.region_analyzer, 'get_contextual_spatial_description'):
-                    return self.region_analyzer.get_contextual_spatial_description(region, object_type)
-                elif hasattr(self.region_analyzer, 'get_spatial_description_phrase'):
-                    return self.region_analyzer.get_spatial_description_phrase(region)
-            return ""
-        except Exception as e:
-            self.logger.warning(f"Error getting standardized spatial description: {str(e)}")
-            if object_type:
-                return f"visible in the scene"
-            return "present in the view"
-    def _calculate_prominence_score(self, obj: Dict) -> float:
-        """
-        計算物件的重要性評分
-        Args:
-            obj: 物件字典，包含檢測信息
-        Returns:
-            float: 重要性評分 (0.0-1.0)
-        """
-        try:
-            # 基礎置信度評分 (權重: 40%)
-            confidence = obj.get("confidence", 0.5)
-            confidence_score = confidence * 0.4
-            # 大小評分 (權重: 30%)
-            normalized_area = obj.get("normalized_area", 0.1)
-            # 使用對數縮放避免過大物件主導評分
-            size_score = min(np.log(normalized_area * 10 + 1) / np.log(11), 1.0) * 0.3
-            # 位置評分 (權重: 20%)
-            # 中心區域的物件通常更重要
-            center_x, center_y = obj.get("normalized_center", [0.5, 0.5])
-            distance_from_center = np.sqrt((center_x - 0.5)**2 + (center_y - 0.5)**2)
-            position_score = (1 - min(distance_from_center * 2, 1.0)) * 0.2
-            # 類別重要性評分 (權重: 10%)
-            class_importance = self._get_class_importance(obj.get("class_name", "unknown"))
-            class_score = class_importance * 0.1
-            total_score = confidence_score + size_score + position_score + class_score
-            # 確保評分在有效範圍內
-            return max(0.0, min(1.0, total_score))
-        except Exception as e:
-            self.logger.warning(f"Error calculating prominence score for object: {str(e)}")
-            return 0.5  # 返回中等評分作為備用
-    def _get_class_importance(self, class_name: str) -> float:
-        """
-        根據物件類別返回重要性係數
-        Args:
-            class_name: 物件類別名稱
-        Returns:
-            float: 類別重要性係數 (0.0-1.0)
-        """
-        # 高重要性物件（人、車輛、建築）
-        high_importance = ["person", "car", "truck", "bus", "motorcycle", "bicycle", "building"]
-        # 中等重要性物件（家具、電器）
-        medium_importance = ["chair", "couch", "tv", "laptop", "refrigerator", "dining table", "bed"]
-        # 低重要性物件（小物品、配件）
-        low_importance = ["handbag", "backpack", "umbrella", "cell phone", "remote", "mouse"]
-        class_name_lower = class_name.lower()
-        if any(item in class_name_lower for item in high_importance):
-            return 1.0
-        elif any(item in class_name_lower for item in medium_importance):
-            return 0.7
-        elif any(item in class_name_lower for item in low_importance):
-            return 0.4
-        else:
-            return 0.6  # 預設中等重要性
     def format_object_list_for_description(self,
                                           objects: List[Dict],
                                           use_indefinite_article_for_one: bool = False,
@@ -230,65 +122,12 @@ class ObjectDescriptionGenerator:
         Returns:
             str: 格式化的物件描述字符串
         """
-        try:
-            if not objects:
-                return "no specific objects clearly identified"
-            counts: Dict[str, int] = {}
-            for obj in objects:
-                name = obj.get("class_name", "unknown object")
-                if name == "unknown object" or not name:
-                    continue
-                counts[name] = counts.get(name, 0) + 1
-            if not counts:
-                return "no specific objects clearly identified"
-            descriptions = []
-            # 按計數降序然後按名稱升序排序，限制物件類型數量
-            sorted_counts = sorted(counts.items(), key=lambda item: (-item[1], item[0]))[:max_types_to_list]
-            for name, count in sorted_counts:
-                if count == 1:
-                    if use_indefinite_article_for_one:
-                        if name[0].lower() in 'aeiou':
-                            descriptions.append(f"an {name}")
-                        else:
-                            descriptions.append(f"a {name}")
-                    else:
-                        descriptions.append(f"one {name}")
-                else:
-                    # 處理複數形式
-                    plural_name = name
-                    if name.endswith("y") and not name.lower().endswith(("ay", "ey", "iy", "oy", "uy")):
-                        plural_name = name[:-1] + "ies"
-                    elif name.endswith(("s", "sh", "ch", "x", "z")):
-                        plural_name = name + "es"
-                    elif not name.endswith("s"):
-                        plural_name = name + "s"
-                    if count_threshold_for_generalization != -1 and count > count_threshold_for_generalization:
-                        if count <= count_threshold_for_generalization + 3:
-                            descriptions.append(f"several {plural_name}")
-                        else:
-                            descriptions.append(f"many {plural_name}")
-                    else:
-                        descriptions.append(f"{count} {plural_name}")
-            if not descriptions:
-                return "no specific objects clearly identified"
-            if len(descriptions) == 1:
-                return descriptions[0]
-            elif len(descriptions) == 2:
-                return f"{descriptions[0]} and {descriptions[1]}"
-            else:
-                # 使用牛津逗號格式
-                return ", ".join(descriptions[:-1]) + f", and {descriptions[-1]}"
-        except Exception as e:
-            self.logger.warning(f"Error formatting object list: {str(e)}")
-            return "various objects"
     def get_spatial_description(self, obj: Dict, image_width: Optional[int] = None,
                            image_height: Optional[int] = None,
@@ -305,95 +144,16 @@ class ObjectDescriptionGenerator:
         Returns:
             str: 空間描述字符串，空值region時返回空字串
         """
-        try:
-            region = obj.get("region") or ""
-            # 處理空值或無效region，直接返回空字串避免不完整描述
-            if not region.strip() or region == "unknown":
-                # 根據物件類型提供合適的預設位置描述
-                if object_type and any(vehicle in object_type.lower() for vehicle in ["car", "truck", "bus"]):
-                    return "positioned in the scene"
-                elif object_type and "person" in object_type.lower():
-                    return "present in the area"
-                else:
-                    return "located in the scene"
-            # 如果提供了RegionAnalyzer實例，使用其標準化方法
-            if region_analyzer and hasattr(region_analyzer, 'get_spatial_description_phrase'):
-                object_type = obj.get("class_name", "")
-                if hasattr(region_analyzer, 'get_contextual_spatial_description'):
-                    spatial_desc = region_analyzer.get_contextual_spatial_description(region, object_type)
-                else:
-                    spatial_desc = region_analyzer.get_spatial_description_phrase(region)
-                if spatial_desc:
-                    return spatial_desc
-            # 備用邏輯：使用改進的內建映射
-            clean_region = region.replace('_', ' ').strip().lower()
-            region_map = {
-                "top left": "in the upper left area",
-                "top center": "in the upper area",
-                "top right": "in the upper right area",
-                "middle left": "on the left side",
-                "middle center": "in the center",
-                "center": "in the center",
-                "middle right": "on the right side",
-                "bottom left": "in the lower left area",
-                "bottom center": "in the lower area",
-                "bottom right": "in the lower right area"
-            }
-            # 直接映射匹配
-            if clean_region in region_map:
-                return region_map[clean_region]
-            # 模糊匹配處理
-            if "top" in clean_region and "left" in clean_region:
-                return "in the upper left area"
-            elif "top" in clean_region and "right" in clean_region:
-                return "in the upper right area"
-            elif "bottom" in clean_region and "left" in clean_region:
-                return "in the lower left area"
-            elif "bottom" in clean_region and "right" in clean_region:
-                return "in the lower right area"
-            elif "top" in clean_region:
-                return "in the upper area"
-            elif "bottom" in clean_region:
-                return "in the lower area"
-            elif "left" in clean_region:
-                return "on the left side"
-            elif "right" in clean_region:
-                return "on the right side"
-            elif "center" in clean_region or "middle" in clean_region:
-                return "in the center"
-            # 如果region無法識別，使用normalized_center作為最後備用
-            norm_center = obj.get("normalized_center")
-            if norm_center and image_width and image_height:
-                x_norm, y_norm = norm_center
-                h_pos = "left" if x_norm < 0.4 else "right" if x_norm > 0.6 else "center"
-                v_pos = "upper" if y_norm < 0.4 else "lower" if y_norm > 0.6 else "center"
-                if h_pos == "center" and v_pos == "center":
-                    return "in the center"
-                return f"in the {v_pos} {h_pos} area"
-            # 如果所有方法都失敗，返回空字串
-            return ""
-        except Exception as e:
-            self.logger.warning(f"Error generating spatial description: {str(e)}")
-            return ""
     def optimize_object_description(self, description: str) -> str:
         """
-        優化物件描述文本，消除冗餘重複並改善表達流暢度
-        這個函數是後處理階段的關鍵組件，負責清理和精簡自然語言生成系統
-        產出的描述文字。它專門處理常見的重複問題，如相同物件的重複
-        列舉和冗餘的空間描述，讓最終的描述更簡潔自然。
         Args:
             description: 原始的場景描述文本，可能包含重複或冗餘的表達
@@ -401,164 +161,7 @@ class ObjectDescriptionGenerator:
         Returns:
             str: 經過優化清理的描述文本，如果處理失敗則返回原始文本
         """
-        try:
-            import re
-            # 1. 處理多餘的空間限定表達
-            # 使用通用模式來識別和移除不必要的空間描述
-            # 例如："bed in the room" -> "bed"，因為床本身就表示是室內環境
-            description = self._remove_redundant_spatial_qualifiers(description)
-            # 2. 辨識並處理物件列表的重複問題
-            # 尋找形如 "with X, Y, Z" 或 "with X and Y" 的物件列表
-            # 使用正則表達式捕獲 "with" 關鍵字後的物件序列
-            # 注意：正則表達式需要修正以避免貪婪匹配的問題
-            object_lists = re.findall(r'with ([^.]+?)(?=\.|$)', description)
-            # 遍歷每個找到的物件列表進行重複檢測和優化
-            for obj_list in object_lists:
-                # 3. 解析單個物件列表中的項目
-                # 使用更精確的正則表達式來分割物件項目
-                # 處理 "X, Y, and Z" 或 "X and Y" 格式的列表
-                # 需要特別注意處理最後一個 "and" 的情況
-                # 先處理逗號格式 "A, B, and C"
-                if ", and " in obj_list:
-                    # 分割 ", and " 前後的部分
-                    before_last_and = obj_list.rsplit(", and ", 1)[0]
-                    last_item = obj_list.rsplit(", and ", 1)[1]
-                    # 處理前面的項目（用逗號分割）
-                    front_items = [item.strip() for item in before_last_and.split(",")]
-                    # 添加最後一個項目
-                    all_items = front_items + [last_item.strip()]
-                elif " and " in obj_list:
-                    # 處理簡單的 "A and B" 格式
-                    all_items = [item.strip() for item in obj_list.split(" and ")]
-                else:
-                    # 處理純逗號分隔的列表
-                    all_items = [item.strip() for item in obj_list.split(",")]
-                # 4. 統計物件出現頻率
-                # 建立字典來記錄每個物件的出現次數
-                item_counts = {}
-                for item in all_items:
-                    # 清理項目文字並過濾無效內容
-                    item = item.strip()
-                    # 過濾掉連接詞和空白項目
-                    if item and item not in ["and", "with", ""]:
-                        # 移除可能的冠詞前綴以便正確計數
-                        # 例如 "a car" 和 "car" 應該被視為同一項目
-                        clean_item = self._normalize_item_for_counting(item)
-                        if clean_item not in item_counts:
-                            item_counts[clean_item] = 0
-                        item_counts[clean_item] += 1
-                # 5. 生成優化後的物件列表
-                if item_counts:
-                    new_items = []
-                    for item, count in item_counts.items():
-                        if count > 1:
-                            # 對於重複項目，使用數字加複數形式
-                            plural_item = self._make_plural(item)
-                            new_items.append(f"{count} {plural_item}")
-                        else:
-                            # 單個項目保持原樣
-                            new_items.append(item)
-                    # 6. 重新格式化物件列表
-                    # 使用標準的英文列表連接格式
-                    if len(new_items) == 1:
-                        new_list = new_items[0]
-                    elif len(new_items) == 2:
-                        new_list = f"{new_items[0]} and {new_items[1]}"
-                    else:
-                        # 使用逗號格式確保清晰度
-                        new_list = ", ".join(new_items[:-1]) + f", and {new_items[-1]}"
-                    # 7. 在原文中替換優化後的列表
-                    # 將原始的多餘列表替換為優化後的簡潔版本
-                    description = description.replace(obj_list, new_list)
-            return description
-        except Exception as e:
-            self.logger.warning(f"Error optimizing object description: {str(e)}")
-            return description
-    def _remove_redundant_spatial_qualifiers(self, description: str) -> str:
-        """
-        移除描述中冗餘的空間限定詞
-        這個方法使用模式匹配來識別和移除不必要的空間描述，例如
-        "bed in the room" 中的 "in the room" 部分通常是多餘的，因為
-        床這個物件本身就是室內環境。
-        Args:
-            description: 包含可能多餘空間描述的文本
-        Returns:
-            str: 移除多餘空間限定詞後的文本
-        """
-        import re
-        # 定義常見的多餘空間表達模式
-        # 這些模式捕獲「物件 + 不必要的空間限定」的情況
-        redundant_patterns = [
-            # 室內物件的多餘房間描述
-            (r'\b(bed|sofa|couch|chair|table|desk|dresser|nightstand)\s+in\s+the\s+(room|bedroom|living\s+room)', r'\1'),
-            # 廚房物件的多餘描述
-            (r'\b(refrigerator|stove|oven|sink|microwave)\s+in\s+the\s+kitchen', r'\1'),
-            # 浴室物件的多餘描述
-            (r'\b(toilet|shower|bathtub|sink)\s+in\s+the\s+(bathroom|restroom)', r'\1'),
-            # 一般性的多餘表達：「在場景中」、「在圖片中」等
-            (r'\b([\w\s]+)\s+in\s+the\s+(scene|image|picture|frame)', r'\1'),
-        ]
-        for pattern, replacement in redundant_patterns:
-            description = re.sub(pattern, replacement, description, flags=re.IGNORECASE)
-        return description
-    def _normalize_item_for_counting(self, item: str) -> str:
-        """
-        正規化物件項目以便準確計數
-        移除冠詞和其他可能影響計數準確性的前綴詞彙，
-        確保 "a car" 和 "car" 被視為同一物件類型。
-        Args:
-            item: 原始物件項目字串
-        Returns:
-            str: 正規化後的物件項目
-        """
-        # 移除常見的英文冠詞
-        item = re.sub(r'^(a|an|the)\s+', '', item.lower())
-        return item.strip()
-    def _make_plural(self, item: str) -> str:
-        """
-        將單數名詞轉換為複數形式
-        Args:
-            item: 單數形式的名詞
-        Returns:
-            str: 複數形式的名詞
-        """
-        # 重用已經實現的複數化邏輯
-        if item.endswith("y") and len(item) > 1 and item[-2].lower() not in 'aeiou':
-            return item[:-1] + "ies"
-        elif item.endswith(("s", "sh", "ch", "x", "z")):
-            return item + "es"
-        elif not item.endswith("s"):
-            return item + "s"
-        else:
-            return item
     def generate_dynamic_everyday_description(self,
                                             detected_objects: List[Dict],
@@ -586,6 +189,7 @@ class ObjectDescriptionGenerator:
         try:
             description_segments = []
             image_width, image_height = image_dimensions if image_dimensions else (None, None)
             self.logger.debug(f"Generating dynamic description for {len(detected_objects)} objects, "
                             f"viewpoint: {viewpoint}, lighting: {lighting_info is not None}")
@@ -625,8 +229,6 @@ class ObjectDescriptionGenerator:
                 else:
                     description_segments.append("Within this setting, no specific objects were clearly identified.")
             else:
-                objects_by_class: Dict[str, List[Dict]] = {}
                 # 使用置信度過濾
                 confident_objects = [obj for obj in detected_objects
                                    if obj.get("confidence", 0) >= self.confidence_threshold_for_description]
@@ -642,172 +244,29 @@ class ObjectDescriptionGenerator:
                     else:
                         description_segments.append(no_confident_obj_msg.lower().capitalize())
                 else:
-                    if object_statistics:
-                        # 使用預計算的統計信息，採用動態的信心度
-                        for class_name, stats in object_statistics.items():
-                            count = stats.get("count", 0)
-                            avg_confidence = stats.get("avg_confidence", 0)
-                            # 動態調整置信度閾值
-                            dynamic_threshold = self.confidence_threshold_for_description
-                            if class_name in ["potted plant", "vase", "clock", "book"]:
-                                dynamic_threshold = max(0.15, self.confidence_threshold_for_description * 0.6)
-                            elif count >= 3:
-                                dynamic_threshold = max(0.2, self.confidence_threshold_for_description * 0.8)
-                            if count > 0 and avg_confidence >= dynamic_threshold:
-                                matching_objects = [obj for obj in confident_objects if obj.get("class_name") == class_name]
-                                if not matching_objects:
-                                    matching_objects = [obj for obj in detected_objects
-                                                      if obj.get("class_name") == class_name and obj.get("confidence", 0) >= dynamic_threshold]
-                                if matching_objects:
-                                    actual_count = min(stats["count"], len(matching_objects))
-                                    objects_by_class[class_name] = matching_objects[:actual_count]
-                    else:
-                        # 備用邏輯，同樣使用動態閾值
-                        for obj in confident_objects:
-                            name = obj.get("class_name", "unknown object")
-                            if name == "unknown object" or not name:
-                                continue
-                            if name not in objects_by_class:
-                                objects_by_class[name] = []
-                            objects_by_class[name].append(obj)
-                            print(f"DEBUG: Before spatial deduplication:")
-                            for class_name in ["car", "traffic light", "person", "handbag"]:
-                                if class_name in objects_by_class:
-                                    print(f"DEBUG: {class_name}: {len(objects_by_class[class_name])} objects before dedup")
                     if not objects_by_class:
                         description_segments.append("No common objects were confidently identified for detailed description.")
                     else:
-                        # 物件組排序函數
-                        def sort_key_object_groups(item_tuple: Tuple[str, List[Dict]]):
-                            class_name_key, obj_group_list = item_tuple
-                            priority = 3
-                            count = len(obj_group_list)
-                            # 確保類別名稱已標準化
-                            normalized_class_name = self._normalize_object_class_name(class_name_key)
-                            # 動態優先級
-                            if normalized_class_name == "person":
-                                priority = 0
-                            elif normalized_class_name in ["dining table", "chair", "sofa", "bed"]:
-                                priority = 1
-                            elif normalized_class_name in ["car", "bus", "truck", "traffic light"]:
-                                priority = 2
-                            elif count >= 3:
-                                priority = max(1, priority - 1)
-                            elif normalized_class_name in ["potted plant", "vase", "clock", "book"] and count >= 2:
-                                priority = 2
-                            avg_area = sum(o.get("normalized_area", 0.0) for o in obj_group_list) / len(obj_group_list) if obj_group_list else 0
-                            quantity_bonus = min(count / 5.0, 1.0)
-                            return (priority, -len(obj_group_list), -avg_area, -quantity_bonus)
-                        # remove duplicate
-                        deduplicated_objects_by_class = {}
-                        processed_positions = []
-                        for class_name, group_of_objects in objects_by_class.items():
-                            unique_objects = []
-                            for obj in group_of_objects:
-                                obj_position = obj.get("normalized_center", [0.5, 0.5])
-                                is_duplicate = False
-                                for processed_pos in processed_positions:
-                                    position_distance = abs(obj_position[0] - processed_pos[0]) + abs(obj_position[1] - processed_pos[1])
-                                    if position_distance < 0.15:
-                                        is_duplicate = True
-                                        break
-                                if not is_duplicate:
-                                    unique_objects.append(obj)
-                                    processed_positions.append(obj_position)
-                            if unique_objects:
-                                deduplicated_objects_by_class[class_name] = unique_objects
-                        objects_by_class = deduplicated_objects_by_class
-                        print(f"DEBUG: After spatial deduplication:")
-                        for class_name in ["car", "traffic light", "person", "handbag"]:
-                            if class_name in objects_by_class:
-                                print(f"DEBUG: {class_name}: {len(objects_by_class[class_name])} objects after dedup")
-                        sorted_object_groups = sorted(objects_by_class.items(), key=sort_key_object_groups)
-                        object_clauses = []
-                        for class_name, group_of_objects in sorted_object_groups:
-                            count = len(group_of_objects)
-                            if class_name in ["car", "traffic light", "person", "handbag"]:
-                                print(f"DEBUG: Final count for {class_name}: {count}")
-                            if count == 0:
-                                continue
-                            # 標準化class name
-                            normalized_class_name = self._normalize_object_class_name(class_name)
-                            # 使用統計信息確保準確的數量描述
-                            if object_statistics and class_name in object_statistics:
-                                actual_count = object_statistics[class_name]["count"]
-                                formatted_name_with_exact_count = self._format_object_count_description(
-                                    normalized_class_name,
-                                    actual_count,
-                                    scene_type=scene_type
-                                )
-                            else:
-                                formatted_name_with_exact_count = self._format_object_count_description(
-                                    normalized_class_name,
-                                    count,
-                                    scene_type=scene_type
-                                )
-                            if formatted_name_with_exact_count == "no specific objects clearly identified" or not formatted_name_with_exact_count:
-                                continue
-                            # 確定群組的集體位置
-                            location_description_suffix = ""
-                            if count == 1:
-                                spatial_desc = self.get_spatial_description(group_of_objects[0], image_width, image_height, self.region_analyzer)
-                                if spatial_desc:
-                                    location_description_suffix = f"is {spatial_desc}"
-                                else:
-                                    distinct_regions = sorted(list(set(obj.get("region", "") for obj in group_of_objects if obj.get("region"))))
-                                    valid_regions = [r for r in distinct_regions if r and r != "unknown" and r.strip()]
-                                    if not valid_regions:
-                                        location_description_suffix = "is positioned in the scene"
-                                    elif len(valid_regions) == 1:
-                                        spatial_desc = self.get_spatial_description_phrase(valid_regions[0])
-                                        location_description_suffix = f"is primarily {spatial_desc}" if spatial_desc else "is positioned in the scene"
-                                    elif len(valid_regions) == 2:
-                                        clean_region1 = valid_regions[0].replace('_', ' ')
-                                        clean_region2 = valid_regions[1].replace('_', ' ')
-                                        location_description_suffix = f"is mainly across the {clean_region1} and {clean_region2} areas"
-                                    else:
-                                        location_description_suffix = "is distributed in various parts of the scene"
-                            else:
-                                distinct_regions = sorted(list(set(obj.get("region", "") for obj in group_of_objects if obj.get("region"))))
-                                valid_regions = [r for r in distinct_regions if r and r != "unknown" and r.strip()]
-                                if not valid_regions:
-                                    location_description_suffix = "are visible in the scene"
-                                elif len(valid_regions) == 1:
-                                    clean_region = valid_regions[0].replace('_', ' ')
-                                    location_description_suffix = f"are primarily in the {clean_region} area"
-                                elif len(valid_regions) == 2:
-                                    clean_region1 = valid_regions[0].replace('_', ' ')
-                                    clean_region2 = valid_regions[1].replace('_', ' ')
-                                    location_description_suffix = f"are mainly across the {clean_region1} and {clean_region2} areas"
-                                else:
-                                    location_description_suffix = "are distributed in various parts of the scene"
-                            # 首字母大寫
-                            formatted_name_capitalized = formatted_name_with_exact_count[0].upper() + formatted_name_with_exact_count[1:]
-                            object_clauses.append(f"{formatted_name_capitalized} {location_description_suffix}")
                         if object_clauses:
                             if not description_segments:
@@ -845,7 +304,7 @@ class ObjectDescriptionGenerator:
                 raw_description += "."
             # 移除重複性和不適當的描述詞彙
-            raw_description = self._remove_repetitive_descriptors(raw_description)
             if not raw_description or len(raw_description.strip()) < 20:
                 if 'confident_objects' in locals() and confident_objects:
@@ -860,586 +319,6 @@ class ObjectDescriptionGenerator:
             self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
             raise ObjectDescriptionError(error_msg) from e
-    def _remove_repetitive_descriptors(self, description: str) -> str:
-        """
-        移除描述中的重複性和不適當的描述詞彙，特別是 "identical" 等詞彙
-        Args:
-            description: 原始描述文本
-        Returns:
-            str: 清理後的描述文本
-        """
-        try:
-            import re
-            # 定義需要移除或替換的模式
-            cleanup_patterns = [
-                # 移除 "identical" 描述模式
-                (r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
-                (r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
-                (r'\bidentical\s+([a-zA-Z\s]+)', r'\1'),
-                # 改善 "comprehensive arrangement" 等過於技術性的表達
-                (r'\bcomprehensive arrangement of\b', 'arrangement of'),
-                (r'\bcomprehensive view featuring\b', 'scene featuring'),
-                (r'\bcomprehensive display of\b', 'display of'),
-                # 簡化過度描述性的短語
-                (r'\bpositioning around\s+(\d+)\s+identical\b', r'positioning around \1'),
-                (r'\barranged around\s+(\d+)\s+identical\b', r'arranged around \1'),
-            ]
-            processed_description = description
-            for pattern, replacement in cleanup_patterns:
-                processed_description = re.sub(pattern, replacement, processed_description, flags=re.IGNORECASE)
-            # 進一步清理可能的多餘空格
-            processed_description = re.sub(r'\s+', ' ', processed_description).strip()
-            self.logger.debug(f"Cleaned description: removed repetitive descriptors")
-            return processed_description
-        except Exception as e:
-            self.logger.warning(f"Error removing repetitive descriptors: {str(e)}")
-            return description
-    def _format_object_count_description(self, class_name: str, count: int,
-                                    scene_type: Optional[str] = None,
-                                    detected_objects: Optional[List[Dict]] = None,
-                                    avg_confidence: float = 0.0) -> str:
-        """
-        格式化物件數量描述的核心方法，整合空間排列、材質推斷和場景語境
-        這個方法是整個物件描述系統的核心，它將多個子功能整合在一起：
-        1. 數字到文字的轉換（避免阿拉伯數字）
-        2. 基於場景的材質推斷
-        3. 空間排列模式的描述
-        4. 語境化的物件描述
-        Args:
-            class_name: 標準化後的類別名稱
-            count: 物件數量
-            scene_type: 場景類型，用於語境化描述
-            detected_objects: 該類型的所有檢測物件，用於空間分析
-            avg_confidence: 平均檢測置信度，影響材質推斷的可信度
-        Returns:
-            str: 完整的格式化數量描述
-        """
-        try:
-            if count <= 0:
-                return ""
-            # 獲取基礎的複數形式
-            plural_form = self._get_plural_form(class_name)
-            # 單數情況的處理
-            if count == 1:
-                return self._format_single_object_description(class_name, scene_type,
-                                                            detected_objects, avg_confidence)
-            # 複數情況的處理
-            return self._format_multiple_objects_description(class_name, count, plural_form,
-                                                        scene_type, detected_objects, avg_confidence)
-        except Exception as e:
-            self.logger.warning(f"Error formatting object count for '{class_name}': {str(e)}")
-            return f"{count} {class_name}s" if count > 1 else class_name
-    def _format_single_object_description(self, class_name: str, scene_type: Optional[str],
-                                        detected_objects: Optional[List[Dict]],
-                                        avg_confidence: float) -> str:
-        """
-        處理單個物件的描述生成
-        對於單個物件，我們重點在於通過材質推斷和位置描述來豐富描述內容，
-        避免簡單的 "a chair" 這樣的描述，而是生成 "a wooden dining chair" 這樣的表達
-        Args:
-            class_name: 物件類別名稱
-            scene_type: 場景類型
-            detected_objects: 檢測物件列表
-            avg_confidence: 平均置信度
-        Returns:
-            str: 單個物件的完整描述
-        """
-        article = "an" if class_name[0].lower() in 'aeiou' else "a"
-        # 獲取材質描述符
-        material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
-        # 獲取位置或特徵描述符
-        feature_descriptor = self._get_single_object_feature(class_name, scene_type, detected_objects)
-        # 組合描述
-        descriptors = []
-        if material_descriptor:
-            descriptors.append(material_descriptor)
-        if feature_descriptor:
-            descriptors.append(feature_descriptor)
-        if descriptors:
-            return f"{article} {' '.join(descriptors)} {class_name}"
-        else:
-            return f"{article} {class_name}"
-    def _format_multiple_objects_description(self, class_name: str, count: int, plural_form: str,
-                                        scene_type: Optional[str], detected_objects: Optional[List[Dict]],
-                                        avg_confidence: float) -> str:
-        """
-        處理多個物件的描述生成
-        對於多個物件，我們的重點是：
-        1. 將數字轉換為文字表達
-        2. 分析空間排列模式
-        3. 添加適當的材質或功能描述
-        4. 生成自然流暢的描述
-        Args:
-            class_name: 物件類別名稱
-            count: 物件數量
-            plural_form: 複數形式
-            scene_type: 場景類型
-            detected_objects: 檢測物件列表
-            avg_confidence: 平均置信度
-        Returns:
-            str: 多個物件的完整描述
-        """
-        # 數字到文字的轉換映射
-        number_words = {
-            2: "two", 3: "three", 4: "four", 5: "five", 6: "six",
-            7: "seven", 8: "eight", 9: "nine", 10: "ten",
-            11: "eleven", 12: "twelve"
-        }
-        # 確定基礎數量表達
-        if count in number_words:
-            count_expression = number_words[count]
-        elif count <= 20:
-            count_expression = "several"
-        else:
-            count_expression = "numerous"
-        # 獲取材質或功能描述符
-        material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
-        # 獲取空間排列描述
-        spatial_descriptor = self._get_spatial_arrangement_descriptor(class_name, scene_type,
-                                                                    detected_objects, count)
-        # 組合最終描述
-        descriptors = []
-        if material_descriptor:
-            descriptors.append(material_descriptor)
-        # 構建基礎描述
-        base_description = f"{count_expression} {' '.join(descriptors)} {plural_form}".strip()
-        # 添加空間排列信息
-        if spatial_descriptor:
-            return f"{base_description} {spatial_descriptor}"
-        else:
-            return base_description
-    def _get_material_descriptor(self, class_name: str, scene_type: Optional[str],
-                            avg_confidence: float) -> Optional[str]:
-        """
-        基於場景語境和置信度進行材質推斷
-        這個方法實現了智能的材質推斷，它不依賴複雜的圖像分析，
-        而是基於常識和場景邏輯來推斷最可能的材質描述
-        Args:
-            class_name: 物件類別名稱
-            scene_type: 場景類型
-            avg_confidence: 檢測置信度，影響推斷的保守程度
-        Returns:
-            Optional[str]: 材質描述符，如果無法推斷則返回None
-        """
-        # 只有在置信度足夠高時才進行材質推斷
-        if avg_confidence < 0.5:
-            return None
-        # 餐廳和用餐相關場景
-        if scene_type and scene_type in ["dining_area", "restaurant", "upscale_dining", "cafe"]:
-            material_mapping = {
-                "chair": "wooden" if avg_confidence > 0.7 else None,
-                "dining table": "wooden",
-                "couch": "upholstered",
-                "vase": "decorative"
-            }
-            return material_mapping.get(class_name)
-        # 辦公場景
-        elif scene_type and scene_type in ["office_workspace", "meeting_room", "conference_room"]:
-            material_mapping = {
-                "chair": "office",
-                "dining table": "conference",  # 在辦公環境中，餐桌通常是會議桌
-                "laptop": "modern",
-                "book": "reference"
-            }
-            return material_mapping.get(class_name)
-        # 客廳場景
-        elif scene_type and scene_type in ["living_room"]:
-            material_mapping = {
-                "couch": "comfortable",
-                "chair": "accent",
-                "tv": "large",
-                "vase": "decorative"
-            }
-            return material_mapping.get(class_name)
-        # 室外場景
-        elif scene_type and scene_type in ["city_street", "park_area", "parking_lot"]:
-            material_mapping = {
-                "car": "parked",
-                "person": "walking",
-                "bicycle": "stationed"
-            }
-            return material_mapping.get(class_name)
-        # 如果沒有特定的場景映射，返回通用描述符
-        generic_mapping = {
-            "chair": "comfortable",
-            "dining table": "sturdy",
-            "car": "parked",
-            "person": "present"
-        }
-        return generic_mapping.get(class_name)
-    def _get_spatial_arrangement_descriptor(self, class_name: str, scene_type: Optional[str],
-                                        detected_objects: Optional[List[Dict]],
-                                        count: int) -> Optional[str]:
-        """
-        分析物件的空間排列模式並生成相應描述
-        這個方法通過分析物件的位置分布來判斷排列模式，
-        然後根據物件類型和場景生成適當的空間描述
-        Args:
-            class_name: 物件類別名稱
-            scene_type: 場景類型
-            detected_objects: 該類型的所有檢測物件
-            count: 物件數量
-        Returns:
-            Optional[str]: 空間排列描述，如果無法分析則返回None
-        """
-        if not detected_objects or len(detected_objects) < 2:
-            return None
-        try:
-            # 提取物件的標準化位置
-            positions = []
-            for obj in detected_objects:
-                center = obj.get("normalized_center", [0.5, 0.5])
-                if isinstance(center, (list, tuple)) and len(center) >= 2:
-                    positions.append(center)
-            if len(positions) < 2:
-                return None
-            # 分析排列模式
-            arrangement_pattern = self._analyze_arrangement_pattern(positions)
-            # 根據物件類型和場景生成描述
-            return self._generate_arrangement_description(class_name, scene_type,
-                                                        arrangement_pattern, count)
-        except Exception as e:
-            self.logger.warning(f"Error analyzing spatial arrangement: {str(e)}")
-            return None
-    def _analyze_arrangement_pattern(self, positions: List[List[float]]) -> str:
-        """
-        分析位置點的排列模式
-        這個方法使用簡單的幾何分析來判斷物件的排列類型，
-        幫助我們理解物件在空間中的組織方式
-        Args:
-            positions: 標準化的位置座標列表
-        Returns:
-            str: 排列模式類型（linear, clustered, scattered, circular等）
-        """
-        import numpy as np
-        if len(positions) < 2:
-            return "single"
-        # 轉換為numpy陣列便於計算
-        pos_array = np.array(positions)
-        # 計算位置的分布特徵
-        x_coords = pos_array[:, 0]
-        y_coords = pos_array[:, 1]
-        # 分析x和y方向的變異程度
-        x_variance = np.var(x_coords)
-        y_variance = np.var(y_coords)
-        # 計算物件間的平均距離
-        distances = []
-        for i in range(len(positions)):
-            for j in range(i + 1, len(positions)):
-                dist = np.sqrt((positions[i][0] - positions[j][0])**2 +
-                            (positions[i][1] - positions[j][1])**2)
-                distances.append(dist)
-        avg_distance = np.mean(distances) if distances else 0
-        distance_variance = np.var(distances) if distances else 0
-        # 判斷排列模式
-        if len(positions) >= 4 and self._is_circular_pattern(positions):
-            return "circular"
-        elif x_variance < 0.05 or y_variance < 0.05:  # 一個方向變異很小
-            return "linear"
-        elif avg_distance < 0.3 and distance_variance < 0.02:  # 物件聚集且距離相近
-            return "clustered"
-        elif avg_distance > 0.6:  # 物件分散
-            return "scattered"
-        elif distance_variance < 0.03:  # 距離一致，可能是規則排列
-            return "regular"
-        else:
-            return "distributed"
-    def _is_circular_pattern(self, positions: List[List[float]]) -> bool:
-        """
-        檢查位置是否形成圓形或環形排列
-        Args:
-            positions: 位置座標列表
-        Returns:
-            bool: 是否為圓形排列
-        """
-        import numpy as np
-        if len(positions) < 4:
-            return False
-        try:
-            pos_array = np.array(positions)
-            # 計算中心點
-            center_x = np.mean(pos_array[:, 0])
-            center_y = np.mean(pos_array[:, 1])
-            # 計算每個點到中心的距離
-            distances_to_center = []
-            for pos in positions:
-                dist = np.sqrt((pos[0] - center_x)**2 + (pos[1] - center_y)**2)
-                distances_to_center.append(dist)
-            # 如果所有距離都相近，可能是圓形排列
-            distance_variance = np.var(distances_to_center)
-            return distance_variance < 0.05 and np.mean(distances_to_center) > 0.2
-        except:
-            return False
-    def _generate_arrangement_description(self, class_name: str, scene_type: Optional[str],
-                                        arrangement_pattern: str, count: int) -> Optional[str]:
-        """
-        根據物件類型、場景和排列模式生成空間描述
-        這個方法將抽象的排列模式轉換為自然語言描述，
-        並根據具體的物件類型和場景語境進行定制
-        Args:
-            class_name: 物件類別名稱
-            scene_type: 場景類型
-            arrangement_pattern: 排列模式
-            count: 物件數量
-        Returns:
-            Optional[str]: 生成的空間排列描述
-        """
-        # 基於物件類型的描述模板
-        arrangement_templates = {
-            "chair": {
-                "linear": "arranged in a row",
-                "clustered": "grouped together for conversation",
-                "circular": "arranged around the table",
-                "scattered": "positioned throughout the space",
-                "regular": "evenly spaced",
-                "distributed": "thoughtfully positioned"
-            },
-            "dining table": {
-                "linear": "aligned to create a unified dining space",
-                "clustered": "grouped to form intimate dining areas",
-                "scattered": "distributed to optimize space flow",
-                "regular": "systematically positioned",
-                "distributed": "strategically placed"
-            },
-            "car": {
-                "linear": "parked in sequence",
-                "clustered": "grouped in the parking area",
-                "scattered": "distributed throughout the lot",
-                "regular": "neatly parked",
-                "distributed": "positioned across the area"
-            },
-            "person": {
-                "linear": "moving in a line",
-                "clustered": "gathered together",
-                "circular": "forming a circle",
-                "scattered": "spread across the area",
-                "distributed": "positioned throughout the scene"
-            }
-        }
-        # 獲取對應的描述模板
-        if class_name in arrangement_templates:
-            template_dict = arrangement_templates[class_name]
-            base_description = template_dict.get(arrangement_pattern, "positioned in the scene")
-        else:
-            # 通用的排列描述
-            generic_templates = {
-                "linear": "arranged in a line",
-                "clustered": "grouped together",
-                "circular": "arranged in a circular pattern",
-                "scattered": "distributed across the space",
-                "regular": "evenly positioned",
-                "distributed": "thoughtfully placed"
-            }
-            base_description = generic_templates.get(arrangement_pattern, "positioned in the scene")
-        return base_description
-    def _get_single_object_feature(self, class_name: str, scene_type: Optional[str],
-                                detected_objects: Optional[List[Dict]]) -> Optional[str]:
-        """
-        為單個物件生成特徵描述符
-        當只有一個物件時，我們可以提供更具體的位置或功能描述
-        Args:
-            class_name: 物件類別名稱
-            scene_type: 場景類型
-            detected_objects: 檢測物件（單個）
-        Returns:
-            Optional[str]: 特徵描述符
-        """
-        if not detected_objects or len(detected_objects) != 1:
-            return None
-        obj = detected_objects[0]
-        region = obj.get("region", "").lower()
-        # 基於位置的描述
-        if "center" in region:
-            if class_name == "dining table":
-                return "central"
-            elif class_name == "chair":
-                return "centrally placed"
-        elif "corner" in region or "left" in region or "right" in region:
-            return "positioned"
-        # 基於場景的功能描述
-        if scene_type and scene_type in ["dining_area", "restaurant"]:
-            if class_name == "chair":
-                return "dining"
-            elif class_name == "vase":
-                return "decorative"
-        return None
-    def _get_plural_form(self, word: str) -> str:
-        """
-        獲取詞彙的複數形式
-        Args:
-            word: 單數詞彙
-        Returns:
-            str: 複數形式
-        """
-        try:
-            # 特殊複數形式
-            irregular_plurals = {
-                'person': 'people',
-                'child': 'children',
-                'foot': 'feet',
-                'tooth': 'teeth',
-                'mouse': 'mice',
-                'man': 'men',
-                'woman': 'women'
-            }
-            if word.lower() in irregular_plurals:
-                return irregular_plurals[word.lower()]
-            # 規則複數形式
-            if word.endswith(('s', 'sh', 'ch', 'x', 'z')):
-                return word + 'es'
-            elif word.endswith('y') and word[-2] not in 'aeiou':
-                return word[:-1] + 'ies'
-            elif word.endswith('f'):
-                return word[:-1] + 'ves'
-            elif word.endswith('fe'):
-                return word[:-2] + 'ves'
-            else:
-                return word + 's'
-        except Exception as e:
-            self.logger.warning(f"Error getting plural form for '{word}': {str(e)}")
-            return word + 's'
-    def _normalize_object_class_name(self, class_name: str) -> str:
-        """
-        標準化物件類別名稱，確保輸出自然語言格式
-        Args:
-            class_name: 原始類別名稱
-        Returns:
-            str: 標準化後的類別名稱
-        """
-        try:
-            if not class_name or not isinstance(class_name, str):
-                return "object"
-            # 移除可能的技術性前綴或後綴
-            import re
-            normalized = re.sub(r'^(class_|id_|type_)', '', class_name.lower())
-            normalized = re.sub(r'(_class|_id|_type)$', '', normalized)
-            # 將下劃線和連字符替換為空格
-            normalized = normalized.replace('_', ' ').replace('-', ' ')
-            # 移除多餘空格
-            normalized = ' '.join(normalized.split())
-            # 特殊類別名稱的標準化映射
-            class_name_mapping = {
-                'traffic light': 'traffic light',
-                'stop sign': 'stop sign',
-                'fire hydrant': 'fire hydrant',
-                'dining table': 'dining table',
-                'potted plant': 'potted plant',
-                'tv monitor': 'television',
-                'cell phone': 'mobile phone',
-                'wine glass': 'wine glass',
-                'hot dog': 'hot dog',
-                'teddy bear': 'teddy bear',
-                'hair drier': 'hair dryer',
-                'toothbrush': 'toothbrush'
-            }
-            return class_name_mapping.get(normalized, normalized)
-        except Exception as e:
-            self.logger.warning(f"Error normalizing class name '{class_name}': {str(e)}")
-            return class_name if isinstance(class_name, str) else "object"
     def generate_basic_details(self, scene_type: str, detected_objects: List[Dict]) -> str:
         """
         當模板不可用時生成基本詳細��息
@@ -1588,7 +467,7 @@ class ObjectDescriptionGenerator:
                     furniture_names = []
                     for obj in furniture_objects[:3]:
                         raw_name = obj.get("class_name", "furniture")
-                        normalized_name = self._normalize_object_class_name(raw_name)
                         furniture_names.append(normalized_name)
                     unique_names = list(set(furniture_names))
@@ -1786,7 +665,6 @@ class ObjectDescriptionGenerator:
                 return "functional area"
             # 移除數字後綴（如 crossing_zone_1 -> crossing_zone）
-            import re
             base_name = re.sub(r'_\d+$', '', zone_name)
             # 將下劃線替換為空格
@@ -1851,9 +729,16 @@ class ObjectDescriptionGenerator:
                     old_value = getattr(self, key)
                     setattr(self, key, value)
                     self.logger.info(f"Updated {key}: {old_value} -> {value}")
                 else:
                     self.logger.warning(f"Unknown configuration parameter: {key}")
         except Exception as e:
             self.logger.error(f"Error updating configuration: {str(e)}")
-            raise ObjectDescriptionError(f"Failed to update configuration: {str(e)}") from e

 from typing import Dict, List, Tuple, Optional, Any
 import numpy as np
+from prominence_calculator import ProminenceCalculator
+from spatial_location_handler import SpatialLocationHandler
+from text_optimizer import TextOptimizer
+from object_group_processor import ObjectGroupProcessor
 class ObjectDescriptionError(Exception):
     """物件描述生成過程中的自定義異常"""
     pass
 class ObjectDescriptionGenerator:
     """
     物件描述生成器 - 負責將檢測到的物件轉換為自然語言描述
+    匯總於EnhancedSceneDescriber
     該類別處理物件相關的所有描述生成邏輯，包括重要物件的辨識、
     空間位置描述、物件列表格式化以及描述文本的優化。
+    作為 Facade 模式的實現，協調四個專門的子組件來完成複雜的描述生成任務。
     """
     def __init__(self,
             max_categories_to_return: 返回的物件類別最大數量
             max_total_objects: 返回的物件總數上限
             confidence_threshold_for_description: 用於描述的置信度閾值
+            region_analyzer: 可選的RegionAnalyzer實例
         """
         self.logger = logging.getLogger(self.__class__.__name__)
         self.confidence_threshold_for_description = confidence_threshold_for_description
         self.region_analyzer = region_analyzer
+        # 初始化子組件
+        self.prominence_calculator = ProminenceCalculator(
+            min_prominence_score=self.min_prominence_score
+        )
+        self.spatial_handler = SpatialLocationHandler(
+            region_analyzer=self.region_analyzer
+        )
+        self.text_optimizer = TextOptimizer()
+        self.object_group_processor = ObjectGroupProcessor(
+            confidence_threshold_for_description=self.confidence_threshold_for_description,
+            spatial_handler=self.spatial_handler,
+            text_optimizer=self.text_optimizer
+        )
         self.logger.info("ObjectDescriptionGenerator initialized with prominence_score=%.2f, "
                         "max_categories=%d, max_objects=%d, confidence_threshold=%.2f",
                         min_prominence_score, max_categories_to_return,
         Returns:
             List[Dict]: 按重要性排序的物件列表
         """
+        return self.prominence_calculator.filter_prominent_objects(
+            detected_objects=detected_objects,
+            min_prominence_score=min_prominence_score,
+            max_categories_to_return=max_categories_to_return
+        )
     def set_region_analyzer(self, region_analyzer: Any) -> None:
         """
         """
         try:
             self.region_analyzer = region_analyzer
+            self.spatial_handler.set_region_analyzer(region_analyzer)
             self.logger.info("RegionAnalyzer instance set for ObjectDescriptionGenerator")
         except Exception as e:
             self.logger.warning(f"Error setting RegionAnalyzer: {str(e)}")
     def format_object_list_for_description(self,
                                           objects: List[Dict],
                                           use_indefinite_article_for_one: bool = False,
         Returns:
             str: 格式化的物件描述字符串
         """
+        return self.text_optimizer.format_object_list_for_description(
+            objects=objects,
+            use_indefinite_article_for_one=use_indefinite_article_for_one,
+            count_threshold_for_generalization=count_threshold_for_generalization,
+            max_types_to_list=max_types_to_list
+        )
     def get_spatial_description(self, obj: Dict, image_width: Optional[int] = None,
                            image_height: Optional[int] = None,
         Returns:
             str: 空間描述字符串，空值region時返回空字串
         """
+        return self.spatial_handler.generate_spatial_description(
+            obj=obj,
+            image_width=image_width,
+            image_height=image_height,
+            region_analyzer=region_analyzer
+        )
     def optimize_object_description(self, description: str) -> str:
         """
+        優化物件描述文本，消除多餘重複並改善表達流暢度
         Args:
             description: 原始的場景描述文本，可能包含重複或冗餘的表達
         Returns:
             str: 經過優化清理的描述文本，如果處理失敗則返回原始文本
         """
+        return self.text_optimizer.optimize_object_description(description)
     def generate_dynamic_everyday_description(self,
                                             detected_objects: List[Dict],
         try:
             description_segments = []
             image_width, image_height = image_dimensions if image_dimensions else (None, None)
+            scene_type = places365_info.get("scene", "") if places365_info else ""
             self.logger.debug(f"Generating dynamic description for {len(detected_objects)} objects, "
                             f"viewpoint: {viewpoint}, lighting: {lighting_info is not None}")
                 else:
                     description_segments.append("Within this setting, no specific objects were clearly identified.")
             else:
                 # 使用置信度過濾
                 confident_objects = [obj for obj in detected_objects
                                    if obj.get("confidence", 0) >= self.confidence_threshold_for_description]
                     else:
                         description_segments.append(no_confident_obj_msg.lower().capitalize())
                 else:
+                    # 使用 ObjectGroupProcessor 處理物件分組和排序
+                    objects_by_class = self.object_group_processor.group_objects_by_class(
+                        confident_objects, object_statistics
+                    )
                     if not objects_by_class:
                         description_segments.append("No common objects were confidently identified for detailed description.")
                     else:
+                        # 移除重複物件
+                        deduplicated_objects_by_class = self.object_group_processor.remove_duplicate_objects(
+                            objects_by_class
+                        )
+                        # 排序物件組
+                        sorted_object_groups = self.object_group_processor.sort_object_groups(
+                            deduplicated_objects_by_class
+                        )
+                        # 生成物件描述子句
+                        object_clauses = self.object_group_processor.generate_object_clauses(
+                            sorted_object_groups, object_statistics, scene_type,
+                            image_width, image_height, self.region_analyzer
+                        )
                         if object_clauses:
                             if not description_segments:
                 raw_description += "."
             # 移除重複性和不適當的描述詞彙
+            raw_description = self.text_optimizer.remove_repetitive_descriptors(raw_description)
             if not raw_description or len(raw_description.strip()) < 20:
                 if 'confident_objects' in locals() and confident_objects:
             self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
             raise ObjectDescriptionError(error_msg) from e
     def generate_basic_details(self, scene_type: str, detected_objects: List[Dict]) -> str:
         """
         當模板不可用時生成基本詳細��息
                     furniture_names = []
                     for obj in furniture_objects[:3]:
                         raw_name = obj.get("class_name", "furniture")
+                        normalized_name = self.text_optimizer.normalize_object_class_name(raw_name)
                         furniture_names.append(normalized_name)
                     unique_names = list(set(furniture_names))
                 return "functional area"
             # 移除數字後綴（如 crossing_zone_1 -> crossing_zone）
             base_name = re.sub(r'_\d+$', '', zone_name)
             # 將下劃線替換為空格
                     old_value = getattr(self, key)
                     setattr(self, key, value)
                     self.logger.info(f"Updated {key}: {old_value} -> {value}")
+                    # 同步更新子組件的配置
+                    if key == "min_prominence_score" and hasattr(self, 'prominence_calculator'):
+                        self.prominence_calculator.min_prominence_score = value
+                    elif key == "confidence_threshold_for_description" and hasattr(self, 'object_group_processor'):
+                        self.object_group_processor.confidence_threshold_for_description = value
                 else:
                     self.logger.warning(f"Unknown configuration parameter: {key}")
         except Exception as e:
             self.logger.error(f"Error updating configuration: {str(e)}")
+            raise ObjectDescriptionError(f"Failed to update configuration: {str(e)}") from e

object_group_processor.py ADDED Viewed

	@@ -0,0 +1,397 @@

+import logging
+from typing import Dict, List, Tuple, Optional, Any
+class ObjectGroupProcessor:
+    """
+    物件組處理器 - 專門處理物件分組、排序和子句生成的邏輯
+    負責物件按類別分組、重複物件檢測移除、物件組優先級排序以及描述子句的生成
+    """
+    def __init__(self, confidence_threshold_for_description: float = 0.25,
+                 spatial_handler: Optional[Any] = None,
+                 text_optimizer: Optional[Any] = None):
+        """
+        初始化物件組處理器
+        Args:
+            confidence_threshold_for_description: 用於描述的置信度閾值
+            spatial_handler: 空間位置處理器實例
+            text_optimizer: 文本優化器實例
+        """
+        self.logger = logging.getLogger(self.__class__.__name__)
+        self.confidence_threshold_for_description = confidence_threshold_for_description
+        self.spatial_handler = spatial_handler
+        self.text_optimizer = text_optimizer
+    def group_objects_by_class(self, confident_objects: List[Dict],
+                              object_statistics: Optional[Dict]) -> Dict[str, List[Dict]]:
+        """
+        按類別分組物件
+        Args:
+            confident_objects: 置信度過濾後的物件
+            object_statistics: 物件統計信息
+        Returns:
+            Dict[str, List[Dict]]: 按類別分組的物件
+        """
+        objects_by_class = {}
+        if object_statistics:
+            # 使用預計算的統計信息，採用動態的信心度
+            for class_name, stats in object_statistics.items():
+                count = stats.get("count", 0)
+                avg_confidence = stats.get("avg_confidence", 0)
+                # 動態調整置信度閾值
+                dynamic_threshold = self.confidence_threshold_for_description
+                if class_name in ["potted plant", "vase", "clock", "book"]:
+                    dynamic_threshold = max(0.15, self.confidence_threshold_for_description * 0.6)
+                elif count >= 3:
+                    dynamic_threshold = max(0.2, self.confidence_threshold_for_description * 0.8)
+                if count > 0 and avg_confidence >= dynamic_threshold:
+                    matching_objects = [obj for obj in confident_objects if obj.get("class_name") == class_name]
+                    if not matching_objects:
+                        matching_objects = [obj for obj in confident_objects
+                                          if obj.get("class_name") == class_name and obj.get("confidence", 0) >= dynamic_threshold]
+                    if matching_objects:
+                        actual_count = min(stats["count"], len(matching_objects))
+                        objects_by_class[class_name] = matching_objects[:actual_count]
+                        # Debug logging for specific classes
+                        if class_name in ["car", "traffic light", "person", "handbag"]:
+                            print(f"DEBUG: Before spatial deduplication:")
+                            print(f"DEBUG: {class_name}: {len(objects_by_class[class_name])} objects before dedup")
+        else:
+            # 備用邏輯，同樣使用動態閾值
+            for obj in confident_objects:
+                name = obj.get("class_name", "unknown object")
+                if name == "unknown object" or not name:
+                    continue
+                if name not in objects_by_class:
+                    objects_by_class[name] = []
+                objects_by_class[name].append(obj)
+        return objects_by_class
+    def remove_duplicate_objects(self, objects_by_class: Dict[str, List[Dict]]) -> Dict[str, List[Dict]]:
+        """
+        移除重複物件
+        Args:
+            objects_by_class: 按類別分組的物件
+        Returns:
+            Dict[str, List[Dict]]: 去重後的物件
+        """
+        deduplicated_objects_by_class = {}
+        processed_positions = []
+        for class_name, group_of_objects in objects_by_class.items():
+            unique_objects = []
+            for obj in group_of_objects:
+                obj_position = obj.get("normalized_center", [0.5, 0.5])
+                is_duplicate = False
+                for processed_pos in processed_positions:
+                    position_distance = abs(obj_position[0] - processed_pos[0]) + abs(obj_position[1] - processed_pos[1])
+                    if position_distance < 0.15:
+                        is_duplicate = True
+                        break
+                if not is_duplicate:
+                    unique_objects.append(obj)
+                    processed_positions.append(obj_position)
+            if unique_objects:
+                deduplicated_objects_by_class[class_name] = unique_objects
+        # Debug logging after deduplication
+        for class_name in ["car", "traffic light", "person", "handbag"]:
+            if class_name in deduplicated_objects_by_class:
+                print(f"DEBUG: After spatial deduplication:")
+                print(f"DEBUG: {class_name}: {len(deduplicated_objects_by_class[class_name])} objects after dedup")
+        return deduplicated_objects_by_class
+    def sort_object_groups(self, objects_by_class: Dict[str, List[Dict]]) -> List[Tuple[str, List[Dict]]]:
+        """
+        排序物件組
+        Args:
+            objects_by_class: 按類別分組的物件
+        Returns:
+            List[Tuple[str, List[Dict]]]: 排序後的物件組
+        """
+        def sort_key_object_groups(item_tuple: Tuple[str, List[Dict]]):
+            class_name_key, obj_group_list = item_tuple
+            priority = 3
+            count = len(obj_group_list)
+            # 確保類別名稱已標準化
+            normalized_class_name = self._normalize_object_class_name(class_name_key)
+            # 動態優先級
+            if normalized_class_name == "person":
+                priority = 0
+            elif normalized_class_name in ["dining table", "chair", "sofa", "bed"]:
+                priority = 1
+            elif normalized_class_name in ["car", "bus", "truck", "traffic light"]:
+                priority = 2
+            elif count >= 3:
+                priority = max(1, priority - 1)
+            elif normalized_class_name in ["potted plant", "vase", "clock", "book"] and count >= 2:
+                priority = 2
+            avg_area = sum(o.get("normalized_area", 0.0) for o in obj_group_list) / len(obj_group_list) if obj_group_list else 0
+            quantity_bonus = min(count / 5.0, 1.0)
+            return (priority, -len(obj_group_list), -avg_area, -quantity_bonus)
+        return sorted(objects_by_class.items(), key=sort_key_object_groups)
+    def generate_object_clauses(self, sorted_object_groups: List[Tuple[str, List[Dict]]],
+                               object_statistics: Optional[Dict],
+                               scene_type: str,
+                               image_width: Optional[int],
+                               image_height: Optional[int],
+                               region_analyzer: Optional[Any] = None) -> List[str]:
+        """
+        生成物件描述子句
+        Args:
+            sorted_object_groups: 排序後的物件組
+            object_statistics: 物件統計信息
+            scene_type: 場景類型
+            image_width: 圖像寬度
+            image_height: 圖像高度
+            region_analyzer: 區域分析器實例
+        Returns:
+            List[str]: 物件描述子句列表
+        """
+        object_clauses = []
+        for class_name, group_of_objects in sorted_object_groups:
+            count = len(group_of_objects)
+            # Debug logging for final count
+            if class_name in ["car", "traffic light", "person", "handbag"]:
+                print(f"DEBUG: Final count for {class_name}: {count}")
+            if count == 0:
+                continue
+            # 標準化class name
+            normalized_class_name = self._normalize_object_class_name(class_name)
+            # 使用統計信息確保準確的數量描述
+            if object_statistics and class_name in object_statistics:
+                actual_count = object_statistics[class_name]["count"]
+                formatted_name_with_exact_count = self._format_object_count_description(
+                    normalized_class_name,
+                    actual_count,
+                    scene_type=scene_type
+                )
+            else:
+                formatted_name_with_exact_count = self._format_object_count_description(
+                    normalized_class_name,
+                    count,
+                    scene_type=scene_type
+                )
+            if formatted_name_with_exact_count == "no specific objects clearly identified" or not formatted_name_with_exact_count:
+                continue
+            # 確定群組的集體位置
+            location_description_suffix = self._generate_location_description(
+                group_of_objects, count, image_width, image_height, region_analyzer
+            )
+            # 首字母大寫
+            formatted_name_capitalized = formatted_name_with_exact_count[0].upper() + formatted_name_with_exact_count[1:]
+            object_clauses.append(f"{formatted_name_capitalized} {location_description_suffix}")
+        return object_clauses
+    def format_object_clauses(self, object_clauses: List[str]) -> str:
+        """
+        格式化物件描述子句
+        Args:
+            object_clauses: 物件描述子句列表
+        Returns:
+            str: 格式化後的描述
+        """
+        if not object_clauses:
+            return "No common objects were confidently identified for detailed description."
+        # 處理第一個子句
+        first_clause = object_clauses.pop(0)
+        result = first_clause + "."
+        # 處理剩餘子句
+        if object_clauses:
+            result += " The scene features:"
+            joined_object_clauses = ". ".join(object_clauses)
+            if joined_object_clauses and not joined_object_clauses.endswith("."):
+                joined_object_clauses += "."
+            result += " " + joined_object_clauses
+        return result
+    def _generate_location_description(self, group_of_objects: List[Dict], count: int,
+                                     image_width: Optional[int], image_height: Optional[int],
+                                     region_analyzer: Optional[Any] = None) -> str:
+        """
+        生成位置描述
+        Args:
+            group_of_objects: 物件組
+            count: 物件數量
+            image_width: 圖像寬度
+            image_height: 圖像高度
+            region_analyzer: 區域分析器實例
+        Returns:
+            str: 位置描述
+        """
+        if count == 1:
+            if self.spatial_handler:
+                spatial_desc = self.spatial_handler.generate_spatial_description(
+                    group_of_objects[0], image_width, image_height, region_analyzer
+                )
+            else:
+                spatial_desc = self._get_spatial_description_phrase(group_of_objects[0].get("region", ""))
+            if spatial_desc:
+                return f"is {spatial_desc}"
+            else:
+                distinct_regions = sorted(list(set(obj.get("region", "") for obj in group_of_objects if obj.get("region"))))
+                valid_regions = [r for r in distinct_regions if r and r != "unknown" and r.strip()]
+                if not valid_regions:
+                    return "is positioned in the scene"
+                elif len(valid_regions) == 1:
+                    spatial_desc = self._get_spatial_description_phrase(valid_regions[0])
+                    return f"is primarily {spatial_desc}" if spatial_desc else "is positioned in the scene"
+                elif len(valid_regions) == 2:
+                    clean_region1 = valid_regions[0].replace('_', ' ')
+                    clean_region2 = valid_regions[1].replace('_', ' ')
+                    return f"is mainly across the {clean_region1} and {clean_region2} areas"
+                else:
+                    return "is distributed in various parts of the scene"
+        else:
+            distinct_regions = sorted(list(set(obj.get("region", "") for obj in group_of_objects if obj.get("region"))))
+            valid_regions = [r for r in distinct_regions if r and r != "unknown" and r.strip()]
+            if not valid_regions:
+                return "are visible in the scene"
+            elif len(valid_regions) == 1:
+                clean_region = valid_regions[0].replace('_', ' ')
+                return f"are primarily in the {clean_region} area"
+            elif len(valid_regions) == 2:
+                clean_region1 = valid_regions[0].replace('_', ' ')
+                clean_region2 = valid_regions[1].replace('_', ' ')
+                return f"are mainly across the {clean_region1} and {clean_region2} areas"
+            else:
+                return "are distributed in various parts of the scene"
+    def _get_spatial_description_phrase(self, region: str) -> str:
+        """
+        獲取空間描述短語的備用方法
+        Args:
+            region: 區域字符串
+        Returns:
+            str: 空間描述短語
+        """
+        if not region or region == "unknown":
+            return ""
+        clean_region = region.replace('_', ' ').strip().lower()
+        region_map = {
+            "top left": "in the upper left area",
+            "top center": "in the upper area",
+            "top right": "in the upper right area",
+            "middle left": "on the left side",
+            "middle center": "in the center",
+            "center": "in the center",
+            "middle right": "on the right side",
+            "bottom left": "in the lower left area",
+            "bottom center": "in the lower area",
+            "bottom right": "in the lower right area"
+        }
+        return region_map.get(clean_region, "")
+    def _normalize_object_class_name(self, class_name: str) -> str:
+        """
+        標準化物件類別名稱
+        Args:
+            class_name: 原始類別名稱
+        Returns:
+            str: 標準化後的類別名稱
+        """
+        if self.text_optimizer:
+            return self.text_optimizer.normalize_object_class_name(class_name)
+        else:
+            # 備用標準化邏輯
+            if not class_name or not isinstance(class_name, str):
+                return "object"
+            # 簡單的標準化處理
+            normalized = class_name.replace('_', ' ').strip().lower()
+            return normalized
+    def _format_object_count_description(self, class_name: str, count: int,
+                                       scene_type: Optional[str] = None,
+                                       detected_objects: Optional[List[Dict]] = None,
+                                       avg_confidence: float = 0.0) -> str:
+        """
+        格式化物件數量描述
+        Args:
+            class_name: 標準化後的類別名稱
+            count: 物件數量
+            scene_type: 場景類型
+            detected_objects: 該類型的所有檢測物件
+            avg_confidence: 平均檢測置信度
+        Returns:
+            str: 完整的格式化數量描述
+        """
+        if self.text_optimizer:
+            return self.text_optimizer.format_object_count_description(
+                class_name, count, scene_type, detected_objects, avg_confidence
+            )
+        else:
+            # 備用格式化邏輯
+            if count <= 0:
+                return ""
+            elif count == 1:
+                article = "an" if class_name[0].lower() in 'aeiou' else "a"
+                return f"{article} {class_name}"
+            else:
+                # 簡單的複數處理
+                plural_form = class_name + "s" if not class_name.endswith("s") else class_name
+                number_words = {
+                    2: "two", 3: "three", 4: "four", 5: "five", 6: "six",
+                    7: "seven", 8: "eight", 9: "nine", 10: "ten",
+                    11: "eleven", 12: "twelve"
+                }
+                if count in number_words:
+                    return f"{number_words[count]} {plural_form}"
+                elif count <= 20:
+                    return f"several {plural_form}"
+                else:
+                    return f"numerous {plural_form}"

pattern_analyzer.py ADDED Viewed

	@@ -0,0 +1,371 @@

+import logging
+import traceback
+import numpy as np
+from typing import Dict, List, Any, Optional
+logger = logging.getLogger(__name__)
+class PatternAnalyzer:
+    """
+    負責各種模式分析，包含交通流動、行人穿越、車輛分佈等的辨識
+    專門處理動態區域和移動相關的區域分析
+    """
+    def __init__(self):
+        """初始化模式分析器"""
+        try:
+            logger.info("PatternAnalyzer initialized successfully")
+        except Exception as e:
+            logger.error(f"Failed to initialize PatternAnalyzer: {str(e)}")
+            logger.error(traceback.format_exc())
+            raise
+    def analyze_crossing_patterns(self, pedestrians: List[Dict], traffic_lights: List[Dict]) -> Dict:
+        """
+        Analyze pedestrian crossing patterns to identify crossing zones.
+        若同一 region 中同時有行人與紅綠燈，則將兩者都放入該區域的 objects。
+        Args:
+            pedestrians: 行人物件列表（每個 obj 應包含 'class_id', 'region', 'confidence' 等）
+            traffic_lights: 紅綠燈物件列表（每個 obj 應包含 'class_id', 'region', 'confidence' 等）
+        Returns:
+            crossing_zones: 字典，key 為 zone 名稱，value 包含 'region', 'objects', 'description'
+        """
+        try:
+            crossing_zones = {}
+            # 如果沒有任何行人，就不辨識任何 crossing zone
+            if not pedestrians:
+                return crossing_zones
+            # (1) 按照 region 分組行人
+            pedestrian_regions = {}
+            for p in pedestrians:
+                region = p["region"]
+                pedestrian_regions.setdefault(region, []).append(p)
+            # (2) 針對每個 region，看是否同時有紅綠燈
+            # 建立一個對照表 mapping： region -> { "pedestrians": [...], "traffic_lights": [...] }
+            combined_regions = {}
+            for region, peds in pedestrian_regions.items():
+                # 取得該 region 下所有紅綠燈
+                tls_in_region = [t for t in traffic_lights if t["region"] == region]
+                combined_regions[region] = {
+                    "pedestrians": peds,
+                    "traffic_lights": tls_in_region
+                }
+            # (3) 按照行人數量排序，找出前兩個需要建立 crossing zone 的 region
+            sorted_regions = sorted(
+                combined_regions.items(),
+                key=lambda x: len(x[1]["pedestrians"]),
+                reverse=True
+            )
+            # (4) 將前兩個 region 建立 Crossing Zone，objects 同時包含行人與紅綠燈
+            for idx, (region, group) in enumerate(sorted_regions[:2]):
+                peds = group["pedestrians"]
+                tls  = group["traffic_lights"]
+                has_nearby_signals = len(tls) > 0
+                # 生成 zone_name（基於 region 方向 + idx 決定主/次 crossing）
+                direction = self._get_directional_description_local(region)
+                if direction and direction != "central":
+                    zone_name = f"{direction} crossing area"
+                else:
+                    zone_name = "main crossing area" if idx == 0 else "secondary crossing area"
+                # 組合 description
+                description = f"Pedestrian crossing area with {len(peds)} "
+                description += "person" if len(peds) == 1 else "people"
+                if direction:
+                    description += f" in {direction} direction"
+                if has_nearby_signals:
+                    description += " near traffic signals"
+                # 將行人 + 同區紅綠燈一併放入 objects
+                obj_list = ["pedestrian"] * len(peds)
+                if has_nearby_signals:
+                    obj_list += ["traffic light"] * len(tls)
+                crossing_zones[zone_name] = {
+                    "region": region,
+                    "objects": obj_list,
+                    "description": description
+                }
+            return crossing_zones
+        except Exception as e:
+            logger.error(f"Error in analyze_crossing_patterns: {str(e)}")
+            logger.error(traceback.format_exc())
+            return {}
+    def analyze_traffic_zones(self, vehicles: List[Dict]) -> Dict:
+        """
+        分析車輛分布以識別具有方向感知的交通區域
+        Args:
+            vehicles: 車輛物件列表
+        Returns:
+            識別出的交通區域字典
+        """
+        try:
+            traffic_zones = {}
+            if not vehicles:
+                return traffic_zones
+            # 按區域分組車輛
+            vehicle_regions = {}
+            for v in vehicles:
+                region = v["region"]
+                if region not in vehicle_regions:
+                    vehicle_regions[region] = []
+                vehicle_regions[region].append(v)
+            # 為有車輛的區域創建交通區域
+            main_traffic_region = max(vehicle_regions.items(), key=lambda x: len(x[1]), default=(None, []))
+            if main_traffic_region[0] is not None:
+                region = main_traffic_region[0]
+                vehicles_in_region = main_traffic_region[1]
+                # 獲取車輛類型列表用於描述
+                vehicle_types = [v["class_name"] for v in vehicles_in_region]
+                unique_types = list(set(vehicle_types))
+                # 獲取方向描述
+                direction = self._get_directional_description_local(region)
+                # 創建描述性區域
+                traffic_zones["vehicle_zone"] = {
+                    "region": region,
+                    "objects": vehicle_types,
+                    "description": f"Vehicle traffic area with {', '.join(unique_types[:3])}" +
+                                (f" in {direction} area" if direction else "")
+                }
+                # 如果車輛分布在多個區域，創建次要區域
+                if len(vehicle_regions) > 1:
+                    # 獲取第二大車輛聚集區域
+                    sorted_regions = sorted(vehicle_regions.items(), key=lambda x: len(x[1]), reverse=True)
+                    if len(sorted_regions) > 1:
+                        second_region, second_vehicles = sorted_regions[1]
+                        direction = self._get_directional_description_local(second_region)
+                        vehicle_types = [v["class_name"] for v in second_vehicles]
+                        unique_types = list(set(vehicle_types))
+                        traffic_zones["secondary_vehicle_zone"] = {
+                            "region": second_region,
+                            "objects": vehicle_types,
+                            "description": f"Secondary traffic area with {', '.join(unique_types[:2])}" +
+                                        (f" in {direction} direction" if direction else "")
+                        }
+            return traffic_zones
+        except Exception as e:
+            logger.error(f"Error analyzing traffic zones: {str(e)}")
+            logger.error(traceback.format_exc())
+            return {}
+    def analyze_aerial_traffic_patterns(self, vehicle_objs: List[Dict]) -> Dict:
+        """
+        分析空中視角的車輛交通模式
+        Args:
+            vehicle_objs: 車輛物件列表
+        Returns:
+            交通模式區域字典
+        """
+        try:
+            zones = {}
+            if not vehicle_objs:
+                return zones
+            # 將位置轉換為數組進行模式分析
+            positions = np.array([obj["normalized_center"] for obj in vehicle_objs])
+            if len(positions) >= 2:
+                # 計算分布指標
+                x_coords = positions[:, 0]
+                y_coords = positions[:, 1]
+                x_mean = np.mean(x_coords)
+                y_mean = np.mean(y_coords)
+                x_std = np.std(x_coords)
+                y_std = np.std(y_coords)
+                # 判斷車輛是否組織成車道
+                if x_std < y_std * 0.5:
+                    # 車輛垂直對齊 - 代表南北交通
+                    zones["vertical_traffic_flow"] = {
+                        "region": "central_vertical",
+                        "objects": [obj["class_name"] for obj in vehicle_objs[:5]],
+                        "description": "North-south traffic flow visible from aerial view"
+                    }
+                elif y_std < x_std * 0.5:
+                    # 車輛水平對齊 - 代表東西交通
+                    zones["horizontal_traffic_flow"] = {
+                        "region": "central_horizontal",
+                        "objects": [obj["class_name"] for obj in vehicle_objs[:5]],
+                        "description": "East-west traffic flow visible from aerial view"
+                    }
+                else:
+                    # 車輛多方向 - 代表十字路口
+                    zones["intersection_traffic"] = {
+                        "region": "central",
+                        "objects": [obj["class_name"] for obj in vehicle_objs[:5]],
+                        "description": "Multi-directional traffic at intersection visible from aerial view"
+                    }
+            return zones
+        except Exception as e:
+            logger.error(f"Error analyzing aerial traffic patterns: {str(e)}")
+            logger.error(traceback.format_exc())
+            return {}
+    def identify_park_recreational_zones(self, detected_objects: List[Dict]) -> Dict:
+        """
+        識別公園的休閒活動區域
+        Args:
+            detected_objects: 檢測到的物件列表
+        Returns:
+            休閒區域字典
+        """
+        try:
+            zones = {}
+            # 尋找休閒物件（運動球、風箏等）
+            rec_items = []
+            rec_regions = {}
+            for obj in detected_objects:
+                if obj["class_id"] in [32, 33, 34, 35, 38]:  # sports ball, kite, baseball bat, glove, tennis racket
+                    region = obj["region"]
+                    if region not in rec_regions:
+                        rec_regions[region] = []
+                    rec_regions[region].append(obj)
+                    rec_items.append(obj["class_name"])
+            if rec_items:
+                main_rec_region = max(rec_regions.items(),
+                                key=lambda x: len(x[1]),
+                                default=(None, []))
+                if main_rec_region[0] is not None:
+                    zones["recreational_zone"] = {
+                        "region": main_rec_region[0],
+                        "objects": list(set(rec_items)),
+                        "description": f"Recreational area with {', '.join(list(set(rec_items)))}"
+                    }
+            return zones
+        except Exception as e:
+            logger.error(f"Error identifying park recreational zones: {str(e)}")
+            logger.error(traceback.format_exc())
+            return {}
+    def identify_parking_zones(self, detected_objects: List[Dict]) -> Dict:
+        """
+        停車場的停車區域
+        Args:
+            detected_objects: 檢測到的物件列表
+        Returns:
+            停車區域字典
+        """
+        try:
+            zones = {}
+            # 尋找停放的汽車
+            car_objs = [obj for obj in detected_objects if obj["class_id"] == 2]  # cars
+            if len(car_objs) >= 3:
+                # 檢查汽車是否按模式排列
+                car_positions = [obj["normalized_center"] for obj in car_objs]
+                # 通過分析垂直位置檢查行模式
+                y_coords = [pos[1] for pos in car_positions]
+                y_clusters = {}
+                # 按相似y坐標分組汽車
+                for i, y in enumerate(y_coords):
+                    assigned = False
+                    for cluster_y in y_clusters.keys():
+                        if abs(y - cluster_y) < 0.1:  # 圖像高度的10%內
+                            y_clusters[cluster_y].append(i)
+                            assigned = True
+                            break
+                    if not assigned:
+                        y_clusters[y] = [i]
+                # 如果有行模式
+                if max(len(indices) for indices in y_clusters.values()) >= 2:
+                    zones["parking_row"] = {
+                        "region": "central",
+                        "objects": ["car"] * len(car_objs),
+                        "description": f"Organized parking area with vehicles arranged in rows"
+                    }
+                else:
+                    zones["parking_area"] = {
+                        "region": "wide",
+                        "objects": ["car"] * len(car_objs),
+                        "description": f"Parking area with {len(car_objs)} vehicles"
+                    }
+            return zones
+        except Exception as e:
+            logger.error(f"Error identifying parking zones: {str(e)}")
+            logger.error(traceback.format_exc())
+            return {}
+    def _get_directional_description_local(self, region: str) -> str:
+        """
+        本地方向描述方法
+        將區域名稱轉換為方位描述（東西南北）
+        Args:
+            region: 區域名稱
+        Returns:
+            方位描述字串
+        """
+        try:
+            region_lower = region.lower()
+            if "top" in region_lower and "left" in region_lower:
+                return "northwest"
+            elif "top" in region_lower and "right" in region_lower:
+                return "northeast"
+            elif "bottom" in region_lower and "left" in region_lower:
+                return "southwest"
+            elif "bottom" in region_lower and "right" in region_lower:
+                return "southeast"
+            elif "top" in region_lower:
+                return "north"
+            elif "bottom" in region_lower:
+                return "south"
+            elif "left" in region_lower:
+                return "west"
+            elif "right" in region_lower:
+                return "east"
+            else:
+                return "central"
+        except Exception as e:
+            logger.error(f"Error getting directional description for region '{region}': {str(e)}")
+            return "central"

prominence_calculator.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import logging
+import numpy as np
+from typing import Dict, List, Optional, Any
+class ProminenceCalculator:
+    """
+    重要性計算器 - 專門處理物件重要性評估和篩選邏輯
+    負責計算物件的重要性分數、類別重要性係數以及重要物件的篩選
+    """
+    def __init__(self, min_prominence_score: float = 0.1):
+        """
+        初始化重要性計算器
+        Args:
+            min_prominence_score: 物件顯著性的最低分數閾值
+        """
+        self.logger = logging.getLogger(self.__class__.__name__)
+        self.min_prominence_score = min_prominence_score
+    def calculate_prominence_score(self, obj: Dict) -> float:
+        """
+        計算物件的重要性評分
+        基本上權重設定為信心度 > 尺寸 > 空間 > 類別重要性
+        Args:
+            obj: 物件字典，包含檢測信息
+        Returns:
+            float: 重要性評分 (0.0-1.0)
+        """
+        try:
+            # 基礎置信度評分 (權重: 40%)
+            confidence = obj.get("confidence", 0.5)
+            confidence_score = confidence * 0.4
+            # 大小評分 (權重: 30%)
+            normalized_area = obj.get("normalized_area", 0.1)
+            # 使用對數縮放避免過大物件主導評分
+            size_score = min(np.log(normalized_area * 10 + 1) / np.log(11), 1.0) * 0.3
+            # 位置評分 (權重: 20%)
+            # 中心區域的物件通常更重要
+            center_x, center_y = obj.get("normalized_center", [0.5, 0.5])
+            distance_from_center = np.sqrt((center_x - 0.5)**2 + (center_y - 0.5)**2)
+            position_score = (1 - min(distance_from_center * 2, 1.0)) * 0.2
+            # 類別重要性評分 (權重: 10%)
+            class_importance = self.get_class_importance(obj.get("class_name", "unknown"))
+            class_score = class_importance * 0.1
+            total_score = confidence_score + size_score + position_score + class_score
+            # 確保評分在有效範圍內
+            return max(0.0, min(1.0, total_score))
+        except Exception as e:
+            self.logger.warning(f"Error calculating prominence score for object: {str(e)}")
+            return 0.5  # 返回中等評分作為備用
+    def get_class_importance(self, class_name: str) -> float:
+        """
+        根據物件類別返回重要性係數
+        Args:
+            class_name: 物件類別名稱
+        Returns:
+            float: 類別重要性係數 (0.0-1.0)
+        """
+        # 高重要性物件（人、車輛、建築）
+        high_importance = ["person", "car", "truck", "bus", "motorcycle", "bicycle", "building"]
+        # 中等重要性物件（家具、電器）
+        medium_importance = ["chair", "couch", "tv", "laptop", "refrigerator", "dining table", "bed"]
+        # 低重要性物件（小物品、配件）
+        low_importance = ["handbag", "backpack", "umbrella", "cell phone", "remote", "mouse"]
+        class_name_lower = class_name.lower()
+        if any(item in class_name_lower for item in high_importance):
+            return 1.0
+        elif any(item in class_name_lower for item in medium_importance):
+            return 0.7
+        elif any(item in class_name_lower for item in low_importance):
+            return 0.4
+        else:
+            return 0.6  # 預設中等重要性
+    def filter_prominent_objects(self, detected_objects: List[Dict],
+                                min_prominence_score: float = 0.5,
+                                max_categories_to_return: Optional[int] = None) -> List[Dict]:
+        """
+        獲取最重要的物件，基於置信度、大小和位置計算重要性評分
+        Args:
+            detected_objects: 檢測到的物件列表
+            min_prominence_score: 最小重要性分數閾值，範圍 0.0-1.0
+            max_categories_to_return: 可選的最大返回類別數量限制
+        Returns:
+            List[Dict]: 按重要性排序的物件列表
+        """
+        try:
+            if not detected_objects:
+                return []
+            prominent_objects = []
+            for obj in detected_objects:
+                # 計算重要性評分
+                prominence_score = self.calculate_prominence_score(obj)
+                # 只保留超過閾值的物件
+                if prominence_score >= min_prominence_score:
+                    obj_copy = obj.copy()
+                    obj_copy['prominence_score'] = prominence_score
+                    prominent_objects.append(obj_copy)
+            # 按重要性評分排序（從高到低）
+            prominent_objects.sort(key=lambda x: x.get('prominence_score', 0), reverse=True)
+            # 如果指定了最大類別數量限制，進行過濾
+            if max_categories_to_return is not None and max_categories_to_return > 0:
+                categories_seen = set()
+                filtered_objects = []
+                for obj in prominent_objects:
+                    class_name = obj.get("class_name", "unknown")
+                    # 如果是新類別且未達到限制
+                    if class_name not in categories_seen:
+                        if len(categories_seen) < max_categories_to_return:
+                            categories_seen.add(class_name)
+                            filtered_objects.append(obj)
+                    else:
+                        # 已見過的類別，直接添加
+                        filtered_objects.append(obj)
+                return filtered_objects
+            return prominent_objects
+        except Exception as e:
+            self.logger.error(f"Error calculating prominent objects: {str(e)}")
+            return []

scene_zone_identifier.py CHANGED Viewed

@@ -3,6 +3,9 @@ import logging
 import traceback
 import numpy as np
 from typing import Dict, List, Any, Optional
 logger = logging.getLogger(__name__)
@@ -10,11 +13,17 @@ class SceneZoneIdentifier:
     """
     負責不同場景類型的區域識別邏輯
     專注於根據場景類型執行相應的功能區域識別策略
     """
     def __init__(self):
         """初始化場景區域辨識器"""
         try:
             logger.info("SceneZoneIdentifier initialized successfully")
         except Exception as e:
@@ -39,18 +48,18 @@ class SceneZoneIdentifier:
             zones = {}
             # 主要功能區域（基於物件關聯性而非場景類型）
-            primary_zone = self._identify_primary_functional_area(detected_objects)
             if primary_zone:
                 # 基於區域內容生成描述性鍵名
-                descriptive_key = self._generate_descriptive_zone_key_from_data(primary_zone, "primary")
                 zones[descriptive_key] = primary_zone
             # 只有明確證據且物件數量足夠時創建次要功能區域
             if len(zones) >= 1 and len(detected_objects) >= 6:
-                secondary_zone = self._identify_secondary_functional_area(detected_objects, zones)
                 if secondary_zone:
                     # 基於區域內容生成描述性鍵名
-                    descriptive_key = self._generate_descriptive_zone_key_from_data(secondary_zone, "secondary")
                     zones[descriptive_key] = secondary_zone
             logger.info(f"Identified {len(zones)} indoor zones for scene type '{scene_type}'")
@@ -61,92 +70,9 @@ class SceneZoneIdentifier:
             logger.error(traceback.format_exc())
             return {}
-    def _generate_descriptive_zone_key_from_data(self, zone_data: Dict, priority_level: str) -> str:
-        """
-        基於區域數據生成描述性鍵名
-        Args:
-            zone_data: 區域數據字典
-            priority_level: 優先級別（primary/secondary）
-        Returns:
-            str: 描述性區域鍵名
-        """
-        try:
-            objects = zone_data.get("objects", [])
-            region = zone_data.get("region", "")
-            description = zone_data.get("description", "")
-            # 基於物件內容確定功能類型
-            if any("dining" in obj.lower() or "table" in obj.lower() for obj in objects):
-                base_name = "dining area"
-            elif any("chair" in obj.lower() or "sofa" in obj.lower() for obj in objects):
-                base_name = "seating area"
-            elif any("bed" in obj.lower() for obj in objects):
-                base_name = "sleeping area"
-            elif any("laptop" in obj.lower() or "keyboard" in obj.lower() for obj in objects):
-                base_name = "workspace area"
-            elif any("plant" in obj.lower() or "vase" in obj.lower() for obj in objects):
-                base_name = "decorative area"
-            elif any("refrigerator" in obj.lower() or "microwave" in obj.lower() for obj in objects):
-                base_name = "kitchen area"
-            else:
-                # 基於描述內容推斷
-                if "dining" in description.lower():
-                    base_name = "dining area"
-                elif "seating" in description.lower() or "relaxation" in description.lower():
-                    base_name = "seating area"
-                elif "work" in description.lower():
-                    base_name = "workspace area"
-                elif "decorative" in description.lower():
-                    base_name = "decorative area"
-                else:
-                    base_name = "functional area"
-            # 為次要區域添加位置標識以區分
-            if priority_level == "secondary" and region:
-                spatial_context = self._get_spatial_context_description(region)
-                if spatial_context:
-                    return f"{spatial_context} {base_name}"
-            return base_name
-        except Exception as e:
-            logger.warning(f"Error generating descriptive zone key: {str(e)}")
-            return "activity area"
-    def _get_spatial_context_description(self, region: str) -> str:
-        """
-        獲取空間上下文描述
-        Args:
-            region: 區域位置標識
-        Returns:
-            str: 空間上下文描述
-        """
-        try:
-            spatial_mapping = {
-                "top_left": "upper left",
-                "top_center": "upper",
-                "top_right": "upper right",
-                "middle_left": "left side",
-                "middle_center": "central",
-                "middle_right": "right side",
-                "bottom_left": "lower left",
-                "bottom_center": "lower",
-                "bottom_right": "lower right"
-            }
-            return spatial_mapping.get(region, "")
-        except Exception as e:
-            logger.warning(f"Error getting spatial context for region '{region}': {str(e)}")
-            return ""
     def identify_outdoor_general_zones(self, category_regions: Dict, detected_objects: List[Dict], scene_type: str) -> Dict:
         """
-        識別一般戶外場景的功能區域
         Args:
             category_regions: 按類別和區域分組的物件字典
@@ -215,11 +141,11 @@ class SceneZoneIdentifier:
             # 針對公園區域的特殊處理
             if scene_type == "park_area":
-                zones.update(self._identify_park_recreational_zones(detected_objects))
             # 針對停車場的特殊處理
             if scene_type == "parking_lot":
-                zones.update(self._identify_parking_zones(detected_objects))
             logger.info(f"Identified {len(zones)} outdoor zones for scene type '{scene_type}'")
             return zones
@@ -232,7 +158,7 @@ class SceneZoneIdentifier:
     def identify_intersection_zones(self, category_regions: Dict, detected_objects: List[Dict], viewpoint: str) -> Dict:
         """
         辨識城市十字路口的功能區域，無論是否有行人，只要偵測到紅綠燈就一定顯示 Traffic Control Area；
-        若有行人，則額外建立 Crossing Zone 並把行人 + 同 region 的紅綠燈歸在一起。
         Args:
             category_regions: 按類別和 region 分組的物件字典
@@ -251,7 +177,7 @@ class SceneZoneIdentifier:
             traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]
             # 2. Step A: 無條件建立 Traffic Control Area
-            #    把每個 region 下的紅綠燈都先分群，生成對應 zone，確保「只要偵測到紅綠燈就一定顯示」
             signal_regions_all = {}
             for t in traffic_light_objs:
                 region = t["region"]
@@ -285,8 +211,8 @@ class SceneZoneIdentifier:
             # 3. Step B: 如果有行人，就建立 Crossing Zone，並移除已被打包的紅綠燈
             if pedestrian_objs:
-                # 先呼叫 _analyze_crossing_patterns，讓它回傳「行人 + 同 region 的紅綠燈」區
-                crossing_zones = self._analyze_crossing_patterns(pedestrian_objs, traffic_light_objs)
                 # 把 Crossing Zone 加到最終 zones，並同時記錄已使用掉的紅綠燈數量
                 for zone_key, zone_info in crossing_zones.items():
@@ -323,8 +249,8 @@ class SceneZoneIdentifier:
             # 5. Step D: 分析車輛交通區域（Vehicle Zones）
             if vehicle_objs:
-                traffic_zones = self._analyze_traffic_zones(vehicle_objs)
-                # _analyze_traffic_zones 內部已用英文 debug，直接更新
                 for zone_key, zone_info in traffic_zones.items():
                     if zone_key in zones:
                         suffix = 1
@@ -396,15 +322,15 @@ class SceneZoneIdentifier:
             # 識別車輛模式進行交通分析
             vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [1, 2, 3, 5, 6, 7]]
             if vehicle_objs:
-                zones.update(self._analyze_aerial_traffic_patterns(vehicle_objs))
             # 針對十字路口特定空中視角的處理
             if "intersection" in scene_type:
-                zones.update(self._identify_aerial_intersection_features(detected_objects))
             # 針對廣場空中視角的處理
             if "plaza" in scene_type:
-                zones.update(self._identify_aerial_plaza_features(people_objs))
             logger.info(f"Identified {len(zones)} aerial view zones")
             return zones
@@ -460,11 +386,11 @@ class SceneZoneIdentifier:
                         "description": f"Asian commercial storefront with pedestrian activity"
                     }
-            # 辨識行人通道
-            zones.update(self._identify_asian_pedestrian_pathway(detected_objects))
             # 辨識攤販區域（小攤/商店 - 從情境推斷）
-            zones.update(self._identify_vendor_zones(detected_objects))
             # 針對夜市的特殊處理
             if scene_type == "asian_night_market":
@@ -521,13 +447,13 @@ class SceneZoneIdentifier:
                     }
             # 識別裝飾區域，增強檢測
-            zones.update(self._identify_upscale_decorative_zones(detected_objects))
             # 識別座位安排區域
-            zones.update(self._identify_dining_seating_zones(detected_objects))
             # 識別服務區域（如果與餐飲區域不同）
-            zones.update(self._identify_serving_zones(detected_objects, zones))
             logger.info(f"Identified {len(zones)} upscale dining zones")
             return zones
@@ -576,10 +502,10 @@ class SceneZoneIdentifier:
                     }
             # 側邊建築區域（從場景情境推斷）
-            zones.update(self._identify_building_zones(detected_objects))
             # 行人區域
-            zones.update(self._identify_financial_pedestrian_zones(detected_objects))
             logger.info(f"Identified {len(zones)} financial district zones")
             return zones
@@ -666,7 +592,7 @@ class SceneZoneIdentifier:
             }
             # 創建相關輔助功能區，如攝影區、紀念品販賣區
-            auxiliary_zones = self._create_landmark_auxiliary_zones(landmark, 0)
             if auxiliary_zones:
                 landmark_zones.update(auxiliary_zones)
@@ -678,357 +604,10 @@ class SceneZoneIdentifier:
             logger.error(traceback.format_exc())
             return {}
-    def _identify_primary_functional_area(self, detected_objects: List[Dict]) -> Dict:
-        """
-        識別主要功能區域，基於最強的物件關聯性組合
-        採用通用邏輯處理各種室內場景
-        Args:
-            detected_objects: 檢測到的物件列表
-        Returns:
-            主要功能區域字典或None
-        """
-        try:
-            # 用餐區域檢測（桌椅組合）
-            dining_area = self._detect_functional_combination(
-                detected_objects,
-                primary_objects=[60],  # dining table
-                supporting_objects=[56, 40, 41, 42, 43],  # chair, wine glass, cup, fork, knife
-                min_supporting=2,
-                description_template="Dining area with table and seating arrangement"
-            )
-            if dining_area:
-                return dining_area
-            # 休息區域檢測（沙發電視組合或床）
-            seating_area = self._detect_functional_combination(
-                detected_objects,
-                primary_objects=[57, 59],  # sofa, bed
-                supporting_objects=[62, 58, 56],  # tv, potted plant, chair
-                min_supporting=1,
-                description_template="Seating and relaxation area"
-            )
-            if seating_area:
-                return seating_area
-            # 工作區域檢測（電子設備與家具組合）
-            work_area = self._detect_functional_combination(
-                detected_objects,
-                primary_objects=[63, 66],  # laptop, keyboard
-                supporting_objects=[60, 56, 64],  # dining table, chair, mouse
-                min_supporting=2,
-                description_template="Workspace area with electronics and furniture"
-            )
-            if work_area:
-                return work_area
-            return None
-        except Exception as e:
-            logger.error(f"Error identifying primary functional area: {str(e)}")
-            logger.error(traceback.format_exc())
-            return None
-    def _identify_secondary_functional_area(self, detected_objects: List[Dict], existing_zones: Dict) -> Dict:
-        """
-        識別次要功能區域，避免與主要區域重疊
-        Args:
-            detected_objects: 檢測到的物件列表
-            existing_zones: 已存在的功能區域
-        Returns:
-            次要功能區域字典或None
-        """
-        try:
-            # 獲取已使用的區域
-            used_regions = set(zone.get("region") for zone in existing_zones.values())
-            # 裝飾區域檢測（植物集中區域）
-            decorative_area = self._detect_functional_combination(
-                detected_objects,
-                primary_objects=[58],  # potted plant
-                supporting_objects=[75],  # vase
-                min_supporting=0,
-                min_primary=3,  # 至少需要3個植物
-                description_template="Decorative area with plants and ornamental items",
-                exclude_regions=used_regions
-            )
-            if decorative_area:
-                return decorative_area
-            # 儲存區域檢測（廚房電器組合）
-            storage_area = self._detect_functional_combination(
-                detected_objects,
-                primary_objects=[72, 68, 69],  # refrigerator, microwave, oven
-                supporting_objects=[71],  # sink
-                min_supporting=0,
-                min_primary=2,
-                description_template="Kitchen appliance and storage area",
-                exclude_regions=used_regions
-            )
-            if storage_area:
-                return storage_area
-            return None
-        except Exception as e:
-            logger.error(f"Error identifying secondary functional area: {str(e)}")
-            logger.error(traceback.format_exc())
-            return None
-    def _detect_functional_combination(self, detected_objects: List[Dict], primary_objects: List[int],
-                                    supporting_objects: List[int], min_supporting: int,
-                                    description_template: str, min_primary: int = 1,
-                                    exclude_regions: set = None) -> Dict:
-        """
-        通用的功能組合檢測方法
-        基於主要物件和支持物件的組合判斷功能區域
-        Args:
-            detected_objects: 檢測到的物件列表
-            primary_objects: 主要物件的class_id列表
-            supporting_objects: 支持物件的class_id列表
-            min_supporting: 最少需要的支持物件數量
-            description_template: 描述模板
-            min_primary: 最少需要的主要物件數量
-            exclude_regions: 需要排除的區域集合
-        Returns:
-            功能區域資訊字典，如果不符合條件則返回None
-        """
-        try:
-            if exclude_regions is None:
-                exclude_regions = set()
-            # 收集主要物件
-            primary_objs = [obj for obj in detected_objects
-                        if obj.get("class_id") in primary_objects and obj.get("confidence", 0) >= 0.4]
-            # 收集支持物件
-            supporting_objs = [obj for obj in detected_objects
-                            if obj.get("class_id") in supporting_objects and obj.get("confidence", 0) >= 0.4]
-            # 檢查是否滿足最少數量要求
-            if len(primary_objs) < min_primary or len(supporting_objs) < min_supporting:
-                return None
-            # 按區域組織物件
-            region_combinations = {}
-            all_relevant_objs = primary_objs + supporting_objs
-            for obj in all_relevant_objs:
-                region = obj.get("region")
-                # 排除指定區域
-                if region in exclude_regions:
-                    continue
-                if region not in region_combinations:
-                    region_combinations[region] = {"primary": [], "supporting": [], "all": []}
-                region_combinations[region]["all"].append(obj)
-                if obj.get("class_id") in primary_objects:
-                    region_combinations[region]["primary"].append(obj)
-                else:
-                    region_combinations[region]["supporting"].append(obj)
-            # 找到最佳區域組合
-            best_region = None
-            best_score = 0
-            for region, objs in region_combinations.items():
-                # 計算該區域的評分
-                primary_count = len(objs["primary"])
-                supporting_count = len(objs["supporting"])
-                # 必須滿足最低要求
-                if primary_count < min_primary or supporting_count < min_supporting:
-                    continue
-                # 計算組合評分（主要物件權重較高）
-                score = primary_count * 2 + supporting_count
-                if score > best_score:
-                    best_score = score
-                    best_region = region
-            if best_region is None:
-                return None
-            best_combination = region_combinations[best_region]
-            all_objects = [obj["class_name"] for obj in best_combination["all"]]
-            return {
-                "region": best_region,
-                "objects": all_objects,
-                "description": description_template
-            }
-        except Exception as e:
-            logger.error(f"Error detecting functional combination: {str(e)}")
-            logger.error(traceback.format_exc())
-            return None
-    def _analyze_crossing_patterns(self, pedestrians: List[Dict], traffic_lights: List[Dict]) -> Dict:
-        """
-        Analyze pedestrian crossing patterns to identify crossing zones.
-        若同一 region 中同時有行人與紅綠燈，則將兩者都放入該區域的 objects。
-        Args:
-            pedestrians: 行人物件列表（每個 obj 應包含 'class_id', 'region', 'confidence' 等）
-            traffic_lights: 紅綠燈物件列表（每個 obj 應包含 'class_id', 'region', 'confidence' 等）
-        Returns:
-            crossing_zones: 字典，key 為 zone 名稱，value 包含 'region', 'objects', 'description'
-        """
-        try:
-            crossing_zones = {}
-            # 如果沒有任何行人，就不辨識任何 crossing zone
-            if not pedestrians:
-                return crossing_zones
-            # (1) 按照 region 分組行人
-            pedestrian_regions = {}
-            for p in pedestrians:
-                region = p["region"]
-                pedestrian_regions.setdefault(region, []).append(p)
-            # (2) 針對每個 region，看是否同時有紅綠燈
-            # 建立一個 mapping： region -> { "pedestrians": [...], "traffic_lights": [...] }
-            combined_regions = {}
-            for region, peds in pedestrian_regions.items():
-                # 取得該 region 下所有紅綠燈
-                tls_in_region = [t for t in traffic_lights if t["region"] == region]
-                combined_regions[region] = {
-                    "pedestrians": peds,
-                    "traffic_lights": tls_in_region
-                }
-            # (3) 按照行人數量排序，找出前兩個需要建立 crossing zone 的 region
-            sorted_regions = sorted(
-                combined_regions.items(),
-                key=lambda x: len(x[1]["pedestrians"]),
-                reverse=True
-            )
-            # (4) 將前兩個 region 建立 Crossing Zone，objects 同時包含行人與紅綠燈
-            for idx, (region, group) in enumerate(sorted_regions[:2]):
-                peds = group["pedestrians"]
-                tls  = group["traffic_lights"]
-                has_nearby_signals = len(tls) > 0
-                # 生成 zone_name（基於 region 方向 + idx 決定主/次 crossing）
-                direction = self._get_directional_description(region)
-                if direction and direction != "central":
-                    zone_name = f"{direction} crossing area"
-                else:
-                    zone_name = "main crossing area" if idx == 0 else "secondary crossing area"
-                # 組合 description
-                description = f"Pedestrian crossing area with {len(peds)} "
-                description += "person" if len(peds) == 1 else "people"
-                if direction:
-                    description += f" in {direction} direction"
-                if has_nearby_signals:
-                    description += " near traffic signals"
-                # ======= 將行人 + 同區紅綠燈一併放入 objects =======
-                obj_list = ["pedestrian"] * len(peds)
-                if has_nearby_signals:
-                    obj_list += ["traffic light"] * len(tls)
-                crossing_zones[zone_name] = {
-                    "region": region,
-                    "objects": obj_list,
-                    "description": description
-                }
-            return crossing_zones
-        except Exception as e:
-            logger.error(f"Error in _analyze_crossing_patterns: {str(e)}")
-            logger.error(traceback.format_exc())
-            return {}
-    def _analyze_traffic_zones(self, vehicles: List[Dict]) -> Dict:
-        """
-        分析車輛分布以識別具有方向感知的交通區域
-        Args:
-            vehicles: 車輛物件列表
-        Returns:
-            識別出的交通區域字典
-        """
-        try:
-            traffic_zones = {}
-            if not vehicles:
-                return traffic_zones
-            # 按區域分組車輛
-            vehicle_regions = {}
-            for v in vehicles:
-                region = v["region"]
-                if region not in vehicle_regions:
-                    vehicle_regions[region] = []
-                vehicle_regions[region].append(v)
-            # 為有車輛的區域創建交通區域
-            main_traffic_region = max(vehicle_regions.items(), key=lambda x: len(x[1]), default=(None, []))
-            if main_traffic_region[0] is not None:
-                region = main_traffic_region[0]
-                vehicles_in_region = main_traffic_region[1]
-                # 獲取車輛類型列表用於描述
-                vehicle_types = [v["class_name"] for v in vehicles_in_region]
-                unique_types = list(set(vehicle_types))
-                # 獲取方向描述
-                direction = self._get_directional_description(region)
-                # 創建描述性區域
-                traffic_zones["vehicle_zone"] = {
-                    "region": region,
-                    "objects": vehicle_types,
-                    "description": f"Vehicle traffic area with {', '.join(unique_types[:3])}" +
-                                (f" in {direction} area" if direction else "")
-                }
-                # 如果車輛分布在多個區域，創建次要區域
-                if len(vehicle_regions) > 1:
-                    # 獲取第二大車輛聚集區域
-                    sorted_regions = sorted(vehicle_regions.items(), key=lambda x: len(x[1]), reverse=True)
-                    if len(sorted_regions) > 1:
-                        second_region, second_vehicles = sorted_regions[1]
-                        direction = self._get_directional_description(second_region)
-                        vehicle_types = [v["class_name"] for v in second_vehicles]
-                        unique_types = list(set(vehicle_types))
-                        traffic_zones["secondary_vehicle_zone"] = {
-                            "region": second_region,
-                            "objects": vehicle_types,
-                            "description": f"Secondary traffic area with {', '.join(unique_types[:2])}" +
-                                        (f" in {direction} direction" if direction else "")
-                        }
-            return traffic_zones
-        except Exception as e:
-            logger.error(f"Error analyzing traffic zones: {str(e)}")
-            logger.error(traceback.format_exc())
-            return {}
     def _get_directional_description(self, region: str) -> str:
         """
         將區域名稱轉換為方位描述（東西南北）
         Args:
             region: 區域名稱
@@ -1061,668 +640,3 @@ class SceneZoneIdentifier:
         except Exception as e:
             logger.error(f"Error getting directional description for region '{region}': {str(e)}")
             return "central"
-    def _identify_park_recreational_zones(self, detected_objects: List[Dict]) -> Dict:
-        """
-        識別公園的休閒活動區域
-        Args:
-            detected_objects: 檢測到的物件列表
-        Returns:
-            休閒區域字典
-        """
-        try:
-            zones = {}
-            # 尋找休閒物件（運動球、風箏等）
-            rec_items = []
-            rec_regions = {}
-            for obj in detected_objects:
-                if obj["class_id"] in [32, 33, 34, 35, 38]:  # sports ball, kite, baseball bat, glove, tennis racket
-                    region = obj["region"]
-                    if region not in rec_regions:
-                        rec_regions[region] = []
-                    rec_regions[region].append(obj)
-                    rec_items.append(obj["class_name"])
-            if rec_items:
-                main_rec_region = max(rec_regions.items(),
-                                key=lambda x: len(x[1]),
-                                default=(None, []))
-                if main_rec_region[0] is not None:
-                    zones["recreational_zone"] = {
-                        "region": main_rec_region[0],
-                        "objects": list(set(rec_items)),
-                        "description": f"Recreational area with {', '.join(list(set(rec_items)))}"
-                    }
-            return zones
-        except Exception as e:
-            logger.error(f"Error identifying park recreational zones: {str(e)}")
-            logger.error(traceback.format_exc())
-            return {}
-    def _identify_parking_zones(self, detected_objects: List[Dict]) -> Dict:
-        """
-        停車場的停車區域
-        Args:
-            detected_objects: 檢測到的物件列表
-        Returns:
-            停車區域字典
-        """
-        try:
-            zones = {}
-            # 尋找停放的汽車
-            car_objs = [obj for obj in detected_objects if obj["class_id"] == 2]  # cars
-            if len(car_objs) >= 3:
-                # 檢查汽車是否按模式排列（簡化）
-                car_positions = [obj["normalized_center"] for obj in car_objs]
-                # 通過分析垂直位置檢查行模式
-                y_coords = [pos[1] for pos in car_positions]
-                y_clusters = {}
-                # 簡化聚類 - 按相似y坐標分組汽車
-                for i, y in enumerate(y_coords):
-                    assigned = False
-                    for cluster_y in y_clusters.keys():
-                        if abs(y - cluster_y) < 0.1:  # 圖像高度的10%內
-                            y_clusters[cluster_y].append(i)
-                            assigned = True
-                            break
-                    if not assigned:
-                        y_clusters[y] = [i]
-                # 如果有行模式
-                if max(len(indices) for indices in y_clusters.values()) >= 2:
-                    zones["parking_row"] = {
-                        "region": "central",
-                        "objects": ["car"] * len(car_objs),
-                        "description": f"Organized parking area with vehicles arranged in rows"
-                    }
-                else:
-                    zones["parking_area"] = {
-                        "region": "wide",
-                        "objects": ["car"] * len(car_objs),
-                        "description": f"Parking area with {len(car_objs)} vehicles"
-                    }
-            return zones
-        except Exception as e:
-            logger.error(f"Error identifying parking zones: {str(e)}")
-            logger.error(traceback.format_exc())
-            return {}
-    def _analyze_aerial_traffic_patterns(self, vehicle_objs: List[Dict]) -> Dict:
-        """
-        分析空中視角的車輛交通模式
-        Args:
-            vehicle_objs: 車輛物件列表
-        Returns:
-            交通模式區域字典
-        """
-        try:
-            zones = {}
-            if not vehicle_objs:
-                return zones
-            # 將位置轉換為數組進行模式分析
-            positions = np.array([obj["normalized_center"] for obj in vehicle_objs])
-            if len(positions) >= 2:
-                # 計算分布指標
-                x_coords = positions[:, 0]
-                y_coords = positions[:, 1]
-                x_mean = np.mean(x_coords)
-                y_mean = np.mean(y_coords)
-                x_std = np.std(x_coords)
-                y_std = np.std(y_coords)
-                # 判斷車輛是否組織成車道
-                if x_std < y_std * 0.5:
-                    # 車輛垂直對齊 - 表示南北交通
-                    zones["vertical_traffic_flow"] = {
-                        "region": "central_vertical",
-                        "objects": [obj["class_name"] for obj in vehicle_objs[:5]],
-                        "description": "North-south traffic flow visible from aerial view"
-                    }
-                elif y_std < x_std * 0.5:
-                    # 車輛水平對齊 - 表示東西交通
-                    zones["horizontal_traffic_flow"] = {
-                        "region": "central_horizontal",
-                        "objects": [obj["class_name"] for obj in vehicle_objs[:5]],
-                        "description": "East-west traffic flow visible from aerial view"
-                    }
-                else:
-                    # 車輛多方向 - 表示十字路口
-                    zones["intersection_traffic"] = {
-                        "region": "central",
-                        "objects": [obj["class_name"] for obj in vehicle_objs[:5]],
-                        "description": "Multi-directional traffic at intersection visible from aerial view"
-                    }
-            return zones
-        except Exception as e:
-            logger.error(f"Error analyzing aerial traffic patterns: {str(e)}")
-            logger.error(traceback.format_exc())
-            return {}
-    def _identify_aerial_intersection_features(self, detected_objects: List[Dict]) -> Dict:
-        """
-        空中視角十字路口特徵
-        Args:
-            detected_objects: 檢測到的物件列表
-        Returns:
-            十字路口特徵區域字典
-        """
-        try:
-            zones = {}
-            # 檢查交通信號
-            traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]
-            if traffic_light_objs:
-                zones["traffic_control_pattern"] = {
-                    "region": "intersection",
-                    "objects": ["traffic light"] * len(traffic_light_objs),
-                    "description": f"Intersection traffic control with {len(traffic_light_objs)} signals visible from above"
-                }
-            # 人行道從空中視角的情境推斷
-            zones["crossing_pattern"] = {
-                "region": "central",
-                "objects": ["inferred crosswalk"],
-                "description": "Crossing pattern visible from aerial perspective"
-            }
-            return zones
-        except Exception as e:
-            logger.error(f"Error identifying aerial intersection features: {str(e)}")
-            logger.error(traceback.format_exc())
-            return {}
-    def _identify_aerial_plaza_features(self, people_objs: List[Dict]) -> Dict:
-        """
-        識別空中視角廣場特徵
-        Args:
-            people_objs: 行人物件列表
-        Returns:
-            廣場特徵區域字典
-        """
-        try:
-            zones = {}
-            if people_objs:
-                # 檢查人群是否聚集在中央區域
-                central_people = [obj for obj in people_objs
-                                if "middle" in obj["region"]]
-                if central_people:
-                    zones["central_gathering"] = {
-                        "region": "middle_center",
-                        "objects": ["person"] * len(central_people),
-                        "description": f"Central plaza gathering area with {len(central_people)} people viewed from above"
-                    }
-            return zones
-        except Exception as e:
-            logger.error(f"Error identifying aerial plaza features: {str(e)}")
-            logger.error(traceback.format_exc())
-            return {}
-    def _identify_asian_pedestrian_pathway(self, detected_objects: List[Dict]) -> Dict:
-        """
-        亞洲文化場景中的行人通道
-        Args:
-            detected_objects: 檢測到的物件列表
-        Returns:
-            行人通道區域字典
-        """
-        try:
-            zones = {}
-            pathway_items = []
-            pathway_regions = {}
-            # 提取人群用於通道分析
-            people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
-            # 分析人群是否形成線形（商業街的特徵）
-            people_positions = [obj["normalized_center"] for obj in people_objs]
-            structured_path = False
-            path_direction = "meandering"
-            if len(people_positions) >= 3:
-                # 檢查人群是否沿相似y坐標排列（水平路徑）
-                y_coords = [pos[1] for pos in people_positions]
-                y_mean = sum(y_coords) / len(y_coords)
-                y_variance = sum((y - y_mean)**2 for y in y_coords) / len(y_coords)
-                horizontal_path = y_variance < 0.05  # 低變異表示水平對齊
-                # 檢查人群是否沿相似x坐標排列（垂直路徑）
-                x_coords = [pos[0] for pos in people_positions]
-                x_mean = sum(x_coords) / len(x_coords)
-                x_variance = sum((x - x_mean)**2 for x in x_coords) / len(x_coords)
-                vertical_path = x_variance < 0.05  # 低變異表示垂直對齊
-                structured_path = horizontal_path or vertical_path
-                path_direction = "horizontal" if horizontal_path else "vertical" if vertical_path else "meandering"
-            # 收集通道物件（人、自行車、摩托車在中間區域）
-            for obj in detected_objects:
-                if obj["class_id"] in [0, 1, 3]:  # Person, bicycle, motorcycle
-                    y_pos = obj["normalized_center"][1]
-                    # 按垂直位置分組（圖像中間可能是通道）
-                    if 0.25 <= y_pos <= 0.75:
-                        region = obj["region"]
-                        if region not in pathway_regions:
-                            pathway_regions[region] = []
-                        pathway_regions[region].append(obj)
-                        pathway_items.append(obj["class_name"])
-            if pathway_items:
-                path_desc = "Pedestrian walkway with people moving through the commercial area"
-                if structured_path:
-                    path_desc = f"{path_direction.capitalize()} pedestrian walkway with organized foot traffic"
-                zones["pedestrian_pathway"] = {
-                    "region": "middle_center",  # 假設：通道通常在中間
-                    "objects": list(set(pathway_items)),
-                    "description": path_desc
-                }
-            return zones
-        except Exception as e:
-            logger.error(f"Error identifying Asian pedestrian pathway: {str(e)}")
-            logger.error(traceback.format_exc())
-            return {}
-    def _identify_vendor_zones(self, detected_objects: List[Dict]) -> Dict:
-        """
-        識別攤販區域
-        Args:
-            detected_objects: 檢測到的物件列表
-        Returns:
-            攤販區域字典
-        """
-        try:
-            zones = {}
-            # 識別攤販區域（小攤/商店 - 從情境推斷）
-            has_small_objects = any(obj["class_id"] in [24, 26, 39, 41] for obj in detected_objects)  # bags, bottles, cups
-            has_people = any(obj["class_id"] == 0 for obj in detected_objects)
-            if has_small_objects and has_people:
-                # 可能的攤販區域是人群和小物件聚集的地方
-                small_obj_regions = {}
-                for obj in detected_objects:
-                    if obj["class_id"] in [24, 26, 39, 41, 67]:  # bags, bottles, cups, phones
-                        region = obj["region"]
-                        if region not in small_obj_regions:
-                            small_obj_regions[region] = []
-                        small_obj_regions[region].append(obj)
-                if small_obj_regions:
-                    main_vendor_region = max(small_obj_regions.items(),
-                                        key=lambda x: len(x[1]),
-                                        default=(None, []))
-                    if main_vendor_region[0] is not None:
-                        vendor_items = [obj["class_name"] for obj in main_vendor_region[1]]
-                        zones["vendor_zone"] = {
-                            "region": main_vendor_region[0],
-                            "objects": list(set(vendor_items)),
-                            "description": "Vendor or market stall area with small merchandise"
-                        }
-            return zones
-        except Exception as e:
-            logger.error(f"Error identifying vendor zones: {str(e)}")
-            logger.error(traceback.format_exc())
-            return {}
-    def _identify_upscale_decorative_zones(self, detected_objects: List[Dict]) -> Dict:
-        """
-        識別高級餐飲的裝飾區域
-        Args:
-            detected_objects: 檢測到的物件列表
-        Returns:
-            裝飾區域字典
-        """
-        try:
-            zones = {}
-            decor_items = []
-            decor_regions = {}
-            # 尋找裝飾元素（花瓶、酒杯、未使用的餐具）
-            for obj in detected_objects:
-                if obj["class_id"] in [75, 40]:  # Vase, wine glass
-                    region = obj["region"]
-                    if region not in decor_regions:
-                        decor_regions[region] = []
-                    decor_regions[region].append(obj)
-                    decor_items.append(obj["class_name"])
-            if decor_items:
-                main_decor_region = max(decor_regions.items(),
-                                    key=lambda x: len(x[1]),
-                                    default=(None, []))
-                if main_decor_region[0] is not None:
-                    zones["decorative_zone"] = {
-                        "region": main_decor_region[0],
-                        "objects": list(set(decor_items)),
-                        "description": f"Decorative area with {', '.join(list(set(decor_items)))}"
-                    }
-            return zones
-        except Exception as e:
-            logger.error(f"Error identifying upscale decorative zones: {str(e)}")
-            logger.error(traceback.format_exc())
-            return {}
-    def _identify_dining_seating_zones(self, detected_objects: List[Dict]) -> Dict:
-        """
-        識別餐廳座位安排區域
-        Args:
-            detected_objects: 檢測到的物件列表
-        Returns:
-            座位區域字典
-        """
-        try:
-            zones = {}
-            # 識別座位安排區域
-            chairs = [obj for obj in detected_objects if obj["class_id"] == 56]  # chairs
-            if len(chairs) >= 2:
-                chair_regions = {}
-                for obj in chairs:
-                    region = obj["region"]
-                    if region not in chair_regions:
-                        chair_regions[region] = []
-                    chair_regions[region].append(obj)
-                if chair_regions:
-                    main_seating_region = max(chair_regions.items(),
-                                        key=lambda x: len(x[1]),
-                                        default=(None, []))
-                    if main_seating_region[0] is not None:
-                        zones["dining_seating_zone"] = {
-                            "region": main_seating_region[0],
-                            "objects": ["chair"] * len(main_seating_region[1]),
-                            "description": f"Formal dining seating arrangement with {len(main_seating_region[1])} chairs"
-                        }
-            return zones
-        except Exception as e:
-            logger.error(f"Error identifying dining seating zones: {str(e)}")
-            logger.error(traceback.format_exc())
-            return {}
-    def _identify_serving_zones(self, detected_objects: List[Dict], existing_zones: Dict) -> Dict:
-        """
-        識別服務區域
-        Args:
-            detected_objects: 檢測到的物件列表
-            existing_zones: 已存在的功能區域
-        Returns:
-            服務區域字典
-        """
-        try:
-            zones = {}
-            serving_items = []
-            serving_regions = {}
-            # 服務區域可能有瓶子、碗、容器
-            for obj in detected_objects:
-                if obj["class_id"] in [39, 45]:  # Bottle, bowl
-                    # 檢查是否在與主餐桌不同的區域
-                    if "formal_dining_zone" in existing_zones and obj["region"] != existing_zones["formal_dining_zone"]["region"]:
-                        region = obj["region"]
-                        if region not in serving_regions:
-                            serving_regions[region] = []
-                        serving_regions[region].append(obj)
-                        serving_items.append(obj["class_name"])
-            if serving_items:
-                main_serving_region = max(serving_regions.items(),
-                                    key=lambda x: len(x[1]),
-                                    default=(None, []))
-                if main_serving_region[0] is not None:
-                    zones["serving_zone"] = {
-                        "region": main_serving_region[0],
-                        "objects": list(set(serving_items)),
-                        "description": f"Serving or sideboard area with {', '.join(list(set(serving_items)))}"
-                    }
-            return zones
-        except Exception as e:
-            logger.error(f"Error identifying serving zones: {str(e)}")
-            logger.error(traceback.format_exc())
-            return {}
-    def _identify_building_zones(self, detected_objects: List[Dict]) -> Dict:
-        """
-        識別建築區域（從場景情境推斷）
-        Args:
-            detected_objects: 檢測到的物件列表
-        Returns:
-            建築區域字典
-        """
-        try:
-            zones = {}
-            # 側邊建築區域（從場景情境推斷）
-            # 檢查是否有實際可能包含建築物的區域
-            left_side_regions = ["top_left", "middle_left", "bottom_left"]
-            right_side_regions = ["top_right", "middle_right", "bottom_right"]
-            # 檢查左側
-            left_building_evidence = True
-            for region in left_side_regions:
-                # 如果此區域有很多車輛或人群，不太可能是建築物
-                vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
-                                    for obj in detected_objects)
-                people_in_region = any(obj["region"] == region and obj["class_id"] == 0
-                                    for obj in detected_objects)
-                if vehicle_in_region or people_in_region:
-                    left_building_evidence = False
-                    break
-            # 檢查右側
-            right_building_evidence = True
-            for region in right_side_regions:
-                # 如果此區域有很多車輛或人群，不太可能是建築物
-                vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
-                                    for obj in detected_objects)
-                people_in_region = any(obj["region"] == region and obj["class_id"] == 0
-                                    for obj in detected_objects)
-                if vehicle_in_region or people_in_region:
-                    right_building_evidence = False
-                    break
-            # 如果證據支持，添加建築區域
-            if left_building_evidence:
-                zones["building_zone_left"] = {
-                    "region": "middle_left",
-                    "objects": ["building"],  # 推斷
-                    "description": "Tall buildings line the left side of the street"
-                }
-            if right_building_evidence:
-                zones["building_zone_right"] = {
-                    "region": "middle_right",
-                    "objects": ["building"],  # 推斷
-                    "description": "Tall buildings line the right side of the street"
-                }
-            return zones
-        except Exception as e:
-            logger.error(f"Error identifying building zones: {str(e)}")
-            logger.error(traceback.format_exc())
-            return {}
-    def _identify_financial_pedestrian_zones(self, detected_objects: List[Dict]) -> Dict:
-        """
-        識別金融區的行人區域
-        Args:
-            detected_objects: 檢測到的物件列表
-        Returns:
-            行人區域字典
-        """
-        try:
-            zones = {}
-            # 識別行人區域（如果有人群）
-            people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
-            if people_objs:
-                people_regions = {}
-                for obj in people_objs:
-                    region = obj["region"]
-                    if region not in people_regions:
-                        people_regions[region] = []
-                    people_regions[region].append(obj)
-                if people_regions:
-                    main_pedestrian_region = max(people_regions.items(),
-                                            key=lambda x: len(x[1]),
-                                            default=(None, []))
-                    if main_pedestrian_region[0] is not None:
-                        zones["pedestrian_zone"] = {
-                            "region": main_pedestrian_region[0],
-                            "objects": ["person"] * len(main_pedestrian_region[1]),
-                            "description": f"Pedestrian area with {len(main_pedestrian_region[1])} people navigating the financial district"
-                        }
-            return zones
-        except Exception as e:
-            logger.error(f"Error identifying financial pedestrian zones: {str(e)}")
-            logger.error(traceback.format_exc())
-            return {}
-    def _create_landmark_auxiliary_zones(self, landmark: Dict, index: int) -> Dict:
-        """
-        創建地標相關的輔助區域（攝影區、紀念品區等）
-        Args:
-            landmark: 地標物件字典
-            index: 地標索引
-        Returns:
-            輔助區域字典
-        """
-        try:
-            auxiliary_zones = {}
-            landmark_region = landmark.get("region", "middle_center")
-            landmark_name = landmark.get("class_name", "Landmark")
-            # 創建攝影區
-            # 根據地標位置調整攝影區位置（地標前方通常是攝影區）
-            region_mapping = {
-                "top_left": "bottom_right",
-                "top_center": "bottom_center",
-                "top_right": "bottom_left",
-                "middle_left": "middle_right",
-                "middle_center": "bottom_center",
-                "middle_right": "middle_left",
-                "bottom_left": "top_right",
-                "bottom_center": "top_center",
-                "bottom_right": "top_left"
-            }
-            photo_region = region_mapping.get(landmark_region, landmark_region)
-            photo_key = f"{landmark_name.lower().replace(' ', '_')}_photography_spot"
-            auxiliary_zones[photo_key] = {
-                "name": f"{landmark_name} Photography Spot",
-                "description": f"Popular position for photographing {landmark_name} with optimal viewing angle.",
-                "objects": ["camera", "person", "cell phone"],
-                "region": photo_region,
-                "primary_function": "Tourist photography"
-            }
-            # 如果是著名地標，可能有紀念品販售區
-            if landmark.get("confidence", 0) > 0.7:  # 高置信度地標更可能有紀念品區
-                # 根據地標位置找到適合的紀念品區位置（通常在地標附近但不直接在地標上）
-                adjacent_regions = {
-                    "top_left": ["top_center", "middle_left"],
-                    "top_center": ["top_left", "top_right"],
-                    "top_right": ["top_center", "middle_right"],
-                    "middle_left": ["top_left", "bottom_left"],
-                    "middle_center": ["middle_left", "middle_right"],
-                    "middle_right": ["top_right", "bottom_right"],
-                    "bottom_left": ["middle_left", "bottom_center"],
-                    "bottom_center": ["bottom_left", "bottom_right"],
-                    "bottom_right": ["bottom_center", "middle_right"]
-                }
-                if landmark_region in adjacent_regions:
-                    souvenir_region = adjacent_regions[landmark_region][0]  # 選擇第一個相鄰區域
-                    souvenir_key = f"{landmark_name.lower().replace(' ', '_')}_souvenir_area"
-                    auxiliary_zones[souvenir_key] = {
-                        "name": f"{landmark_name} Souvenir Area",
-                        "description": f"Area where visitors can purchase souvenirs and memorabilia related to {landmark_name}.",
-                        "objects": ["person", "handbag", "backpack"],
-                        "region": souvenir_region,
-                        "primary_function": "Tourism commerce"
-                    }
-            return auxiliary_zones
-        except Exception as e:
-            logger.error(f"Error creating landmark auxiliary zones: {str(e)}")
-            logger.error(traceback.format_exc())
-            return {}

 import traceback
 import numpy as np
 from typing import Dict, List, Any, Optional
+from functional_zone_detector import FunctionalZoneDetector
+from pattern_analyzer import PatternAnalyzer
+from specialized_scene_processor import SpecializedSceneProcessor
 logger = logging.getLogger(__name__)
     """
     負責不同場景類型的區域識別邏輯
     專注於根據場景類型執行相應的功能區域識別策略
+    整合所有專門的區域辨識組件，主要須整合至SpatialAnalyzer
     """
     def __init__(self):
         """初始化場景區域辨識器"""
         try:
+            # 初始化各個專門組件
+            self.functional_detector = FunctionalZoneDetector()
+            self.pattern_analyzer = PatternAnalyzer()
+            self.scene_processor = SpecializedSceneProcessor()
             logger.info("SceneZoneIdentifier initialized successfully")
         except Exception as e:
             zones = {}
             # 主要功能區域（基於物件關聯性而非場景類型）
+            primary_zone = self.functional_detector.identify_primary_functional_area(detected_objects)
             if primary_zone:
                 # 基於區域內容生成描述性鍵名
+                descriptive_key = self.functional_detector.generate_descriptive_zone_key_from_data(primary_zone, "primary")
                 zones[descriptive_key] = primary_zone
             # 只有明確證據且物件數量足夠時創建次要功能區域
             if len(zones) >= 1 and len(detected_objects) >= 6:
+                secondary_zone = self.functional_detector.identify_secondary_functional_area(detected_objects, zones)
                 if secondary_zone:
                     # 基於區域內容生成描述性鍵名
+                    descriptive_key = self.functional_detector.generate_descriptive_zone_key_from_data(secondary_zone, "secondary")
                     zones[descriptive_key] = secondary_zone
             logger.info(f"Identified {len(zones)} indoor zones for scene type '{scene_type}'")
             logger.error(traceback.format_exc())
             return {}
     def identify_outdoor_general_zones(self, category_regions: Dict, detected_objects: List[Dict], scene_type: str) -> Dict:
         """
+        辨識一般戶外場景的功能區域
         Args:
             category_regions: 按類別和區域分組的物件字典
             # 針對公園區域的特殊處理
             if scene_type == "park_area":
+                zones.update(self.pattern_analyzer.identify_park_recreational_zones(detected_objects))
             # 針對停車場的特殊處理
             if scene_type == "parking_lot":
+                zones.update(self.pattern_analyzer.identify_parking_zones(detected_objects))
             logger.info(f"Identified {len(zones)} outdoor zones for scene type '{scene_type}'")
             return zones
     def identify_intersection_zones(self, category_regions: Dict, detected_objects: List[Dict], viewpoint: str) -> Dict:
         """
         辨識城市十字路口的功能區域，無論是否有行人，只要偵測到紅綠燈就一定顯示 Traffic Control Area；
+        如果有行人，則額外建立 Crossing Zone 並把行人 + 同 region 的紅綠燈歸在一起。
         Args:
             category_regions: 按類別和 region 分組的物件字典
             traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]
             # 2. Step A: 無條件建立 Traffic Control Area
+            #    把每個 region 下的紅綠燈都先分群，生成對應 zone，確保"只要偵測到紅綠燈就一定顯示"
             signal_regions_all = {}
             for t in traffic_light_objs:
                 region = t["region"]
             # 3. Step B: 如果有行人，就建立 Crossing Zone，並移除已被打包的紅綠燈
             if pedestrian_objs:
+                # 先呼叫 analyze_crossing_patterns，讓它回傳「行人 + 同 region 的紅綠燈」區
+                crossing_zones = self.pattern_analyzer.analyze_crossing_patterns(pedestrian_objs, traffic_light_objs)
                 # 把 Crossing Zone 加到最終 zones，並同時記錄已使用掉的紅綠燈數量
                 for zone_key, zone_info in crossing_zones.items():
             # 5. Step D: 分析車輛交通區域（Vehicle Zones）
             if vehicle_objs:
+                traffic_zones = self.pattern_analyzer.analyze_traffic_zones(vehicle_objs)
+                # analyze_traffic_zones 內部已用英文 debug，直接更新
                 for zone_key, zone_info in traffic_zones.items():
                     if zone_key in zones:
                         suffix = 1
             # 識別車輛模式進行交通分析
             vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [1, 2, 3, 5, 6, 7]]
             if vehicle_objs:
+                zones.update(self.pattern_analyzer.analyze_aerial_traffic_patterns(vehicle_objs))
             # 針對十字路口特定空中視角的處理
             if "intersection" in scene_type:
+                zones.update(self.scene_processor.identify_aerial_intersection_features(detected_objects))
             # 針對廣場空中視角的處理
             if "plaza" in scene_type:
+                zones.update(self.scene_processor.identify_aerial_plaza_features(people_objs))
             logger.info(f"Identified {len(zones)} aerial view zones")
             return zones
                         "description": f"Asian commercial storefront with pedestrian activity"
                     }
+            # 辨識行人通道
+            zones.update(self.scene_processor.identify_asian_pedestrian_pathway(detected_objects))
             # 辨識攤販區域（小攤/商店 - 從情境推斷）
+            zones.update(self.scene_processor.identify_vendor_zones(detected_objects))
             # 針對夜市的特殊處理
             if scene_type == "asian_night_market":
                     }
             # 識別裝飾區域，增強檢測
+            zones.update(self.scene_processor.identify_upscale_decorative_zones(detected_objects))
             # 識別座位安排區域
+            zones.update(self.scene_processor.identify_dining_seating_zones(detected_objects))
             # 識別服務區域（如果與餐飲區域不同）
+            zones.update(self.scene_processor.identify_serving_zones(detected_objects, zones))
             logger.info(f"Identified {len(zones)} upscale dining zones")
             return zones
                     }
             # 側邊建築區域（從場景情境推斷）
+            zones.update(self.scene_processor.identify_building_zones(detected_objects))
             # 行人區域
+            zones.update(self.scene_processor.identify_financial_pedestrian_zones(detected_objects))
             logger.info(f"Identified {len(zones)} financial district zones")
             return zones
             }
             # 創建相關輔助功能區，如攝影區、紀念品販賣區
+            auxiliary_zones = self.scene_processor.create_landmark_auxiliary_zones(landmark, 0)
             if auxiliary_zones:
                 landmark_zones.update(auxiliary_zones)
             logger.error(traceback.format_exc())
             return {}
     def _get_directional_description(self, region: str) -> str:
         """
         將區域名稱轉換為方位描述（東西南北）
+        這是核心工具方法，供所有組件使用
         Args:
             region: 區域名稱
         except Exception as e:
             logger.error(f"Error getting directional description for region '{region}': {str(e)}")
             return "central"

spatial_location_handler.py ADDED Viewed

	@@ -0,0 +1,346 @@

+import logging
+import traceback
+import numpy as np
+from typing import Dict, List, Optional, Any, Tuple
+class SpatialLocationHandler:
+    """
+    空間位置處理器 - 專門處理空間描述生成和排列模式分析
+    負責生成物件的空間位置描述、分析排列模式以及與 RegionAnalyzer 的整合
+    """
+    def __init__(self, region_analyzer: Optional[Any] = None):
+        """
+        初始化空間位置處理器
+        Args:
+            region_analyzer: RegionAnalyzer實例
+        """
+        self.logger = logging.getLogger(self.__class__.__name__)
+        self.region_analyzer = region_analyzer
+    def set_region_analyzer(self, region_analyzer: Any) -> None:
+        """
+        設置RegionAnalyzer，用於標準化空間描述生成
+        Args:
+            region_analyzer: RegionAnalyzer實例
+        """
+        try:
+            self.region_analyzer = region_analyzer
+            self.logger.info("RegionAnalyzer instance set for SpatialLocationHandler")
+        except Exception as e:
+            self.logger.warning(f"Error setting RegionAnalyzer: {str(e)}")
+    def generate_spatial_description(self, obj: Dict, image_width: Optional[int] = None,
+                                   image_height: Optional[int] = None,
+                                   region_analyzer: Optional[Any] = None) -> str:
+        """
+        為物件生成空間位置描述
+        Args:
+            obj: 物件字典
+            image_width: 可選的圖像寬度
+            image_height: 可選的圖像高度
+            region_analyzer: 可選的RegionAnalyzer實例，用於生成標準化描述
+        Returns:
+            str: 空間描述字符串，空值region時返回空字串
+        """
+        try:
+            region = obj.get("region") or ""
+            object_type = obj.get("class_name", "")
+            # 處理空值或無效region，直接返回空字串避免不完整描述
+            if not region.strip() or region == "unknown":
+                # 根據物件類型提供合適的預設位置描述
+                if object_type and any(vehicle in object_type.lower() for vehicle in ["car", "truck", "bus"]):
+                    return "positioned in the scene"
+                elif object_type and "person" in object_type.lower():
+                    return "present in the area"
+                else:
+                    return "located in the scene"
+            # 如果提供了RegionAnalyzer實例，使用其標準化方法
+            if region_analyzer and hasattr(region_analyzer, 'get_spatial_description_phrase'):
+                if hasattr(region_analyzer, 'get_contextual_spatial_description'):
+                    spatial_desc = region_analyzer.get_contextual_spatial_description(region, object_type)
+                else:
+                    spatial_desc = region_analyzer.get_spatial_description_phrase(region)
+                if spatial_desc:
+                    return spatial_desc
+            # 備用邏輯：使用改進的內建映射
+            clean_region = region.replace('_', ' ').strip().lower()
+            region_map = {
+                "top left": "in the upper left area",
+                "top center": "in the upper area",
+                "top right": "in the upper right area",
+                "middle left": "on the left side",
+                "middle center": "in the center",
+                "center": "in the center",
+                "middle right": "on the right side",
+                "bottom left": "in the lower left area",
+                "bottom center": "in the lower area",
+                "bottom right": "in the lower right area"
+            }
+            # 直接映射匹配
+            if clean_region in region_map:
+                return region_map[clean_region]
+            # 比較模糊籠統的方位匹配
+            if "top" in clean_region and "left" in clean_region:
+                return "in the upper left area"
+            elif "top" in clean_region and "right" in clean_region:
+                return "in the upper right area"
+            elif "bottom" in clean_region and "left" in clean_region:
+                return "in the lower left area"
+            elif "bottom" in clean_region and "right" in clean_region:
+                return "in the lower right area"
+            elif "top" in clean_region:
+                return "in the upper area"
+            elif "bottom" in clean_region:
+                return "in the lower area"
+            elif "left" in clean_region:
+                return "on the left side"
+            elif "right" in clean_region:
+                return "on the right side"
+            elif "center" in clean_region or "middle" in clean_region:
+                return "in the center"
+            # 如果region無法辨識，使用normalized_center作為備用
+            norm_center = obj.get("normalized_center")
+            if norm_center and image_width and image_height:
+                x_norm, y_norm = norm_center
+                h_pos = "left" if x_norm < 0.4 else "right" if x_norm > 0.6 else "center"
+                v_pos = "upper" if y_norm < 0.4 else "lower" if y_norm > 0.6 else "center"
+                if h_pos == "center" and v_pos == "center":
+                    return "in the center"
+                return f"in the {v_pos} {h_pos} area"
+            # 如果所有方法都失敗，返回空字串
+            return ""
+        except Exception as e:
+            self.logger.warning(f"Error generating spatial description: {str(e)}")
+            return ""
+    def get_standardized_spatial_description(self, obj: Dict) -> str:
+        """
+        使用RegionAnalyzer生成標準化空間描述的內部方法
+        Args:
+            obj: 物件字典
+        Returns:
+            str: 標準化空間描述，失敗時返回空字串
+        """
+        try:
+            if hasattr(self, 'region_analyzer') and self.region_analyzer:
+                region = obj.get("region", "")
+                object_type = obj.get("class_name", "")
+                if hasattr(self.region_analyzer, 'get_contextual_spatial_description'):
+                    return self.region_analyzer.get_contextual_spatial_description(region, object_type)
+                elif hasattr(self.region_analyzer, 'get_spatial_description_phrase'):
+                    return self.region_analyzer.get_spatial_description_phrase(region)
+            return ""
+        except Exception as e:
+            self.logger.warning(f"Error getting standardized spatial description: {str(e)}")
+            object_type = obj.get("class_name", "")
+            if object_type:
+                return "visible in the scene"
+            return "present in the view"
+    def analyze_spatial_arrangement(self, class_name: str, scene_type: Optional[str],
+                                  detected_objects: Optional[List[Dict]],
+                                  count: int) -> Optional[str]:
+        """
+        分析物件的空間排列模式並生成相應描述
+        Args:
+            class_name: 物件類別名稱
+            scene_type: 場景類型
+            detected_objects: 該類型的所有檢測物件
+            count: 物件數量
+        Returns:
+            Optional[str]: 空間排列描述，如果無法分析則返回None
+        """
+        if not detected_objects or len(detected_objects) < 2:
+            return None
+        try:
+            # 提取物件的標準化位置
+            positions = []
+            for obj in detected_objects:
+                center = obj.get("normalized_center", [0.5, 0.5])
+                if isinstance(center, (list, tuple)) and len(center) >= 2:
+                    positions.append(center)
+            if len(positions) < 2:
+                return None
+            # 分析排列模式
+            arrangement_pattern = self._analyze_arrangement_pattern(positions)
+            # 根據物件類型和場景生成描述
+            return self._generate_arrangement_description(class_name, scene_type,
+                                                        arrangement_pattern, count)
+        except Exception as e:
+            self.logger.warning(f"Error analyzing spatial arrangement: {str(e)}")
+            return None
+    def _analyze_arrangement_pattern(self, positions: List[List[float]]) -> str:
+        """
+        分析位置點的排列模式
+        Args:
+            positions: 標準化的位置座標列表
+        Returns:
+            str: 排列模式類型（linear, clustered, scattered, circular等）
+        """
+        if len(positions) < 2:
+            return "single"
+        # 轉換為numpy陣列便於計算
+        pos_array = np.array(positions)
+        # 計算位置的分布特徵
+        x_coords = pos_array[:, 0]
+        y_coords = pos_array[:, 1]
+        # 分析x和y方向的變異程度
+        x_variance = np.var(x_coords)
+        y_variance = np.var(y_coords)
+        # 計算物件間的平均距離
+        distances = []
+        for i in range(len(positions)):
+            for j in range(i + 1, len(positions)):
+                dist = np.sqrt((positions[i][0] - positions[j][0])**2 +
+                            (positions[i][1] - positions[j][1])**2)
+                distances.append(dist)
+        avg_distance = np.mean(distances) if distances else 0
+        distance_variance = np.var(distances) if distances else 0
+        # 判斷排列模式
+        if len(positions) >= 4 and self._is_circular_pattern(positions):
+            return "circular"
+        elif x_variance < 0.05 or y_variance < 0.05:  # 一個方向變異很小
+            return "linear"
+        elif avg_distance < 0.3 and distance_variance < 0.02:  # 物件聚集且距離相近
+            return "clustered"
+        elif avg_distance > 0.6:  # 物件分散
+            return "scattered"
+        elif distance_variance < 0.03:  # 距離一致，可能是規則排列
+            return "regular"
+        else:
+            return "distributed"
+    def _is_circular_pattern(self, positions: List[List[float]]) -> bool:
+        """
+        檢查位置是否形成圓形或環形排列
+        Args:
+            positions: 位置座標列表
+        Returns:
+            bool: 是否為圓形排列
+        """
+        if len(positions) < 4:
+            return False
+        try:
+            pos_array = np.array(positions)
+            # 計算中心點
+            center_x = np.mean(pos_array[:, 0])
+            center_y = np.mean(pos_array[:, 1])
+            # 計算每個點到中心的距離
+            distances_to_center = []
+            for pos in positions:
+                dist = np.sqrt((pos[0] - center_x)**2 + (pos[1] - center_y)**2)
+                distances_to_center.append(dist)
+            # 如果所有距離都相近，可能是圓形排列
+            distance_variance = np.var(distances_to_center)
+            return distance_variance < 0.05 and np.mean(distances_to_center) > 0.2
+        except:
+            return False
+    def _generate_arrangement_description(self, class_name: str, scene_type: Optional[str],
+                                        arrangement_pattern: str, count: int) -> Optional[str]:
+        """
+        根據物件類型、場景和排列模式生成空間描述
+        Args:
+            class_name: 物件類別名稱
+            scene_type: 場景類型
+            arrangement_pattern: 排列模式
+            count: 物件數量
+        Returns:
+            Optional[str]: 生成的空間排列描述
+        """
+        # 基於物件類型的描述模板
+        arrangement_templates = {
+            "chair": {
+                "linear": "arranged in a row",
+                "clustered": "grouped together for conversation",
+                "circular": "arranged around the table",
+                "scattered": "positioned throughout the space",
+                "regular": "evenly spaced",
+                "distributed": "thoughtfully positioned"
+            },
+            "dining table": {
+                "linear": "aligned to create a unified dining space",
+                "clustered": "grouped to form intimate dining areas",
+                "scattered": "distributed to optimize space flow",
+                "regular": "systematically positioned",
+                "distributed": "strategically placed"
+            },
+            "car": {
+                "linear": "parked in sequence",
+                "clustered": "grouped in the parking area",
+                "scattered": "distributed throughout the lot",
+                "regular": "neatly parked",
+                "distributed": "positioned across the area"
+            },
+            "person": {
+                "linear": "moving in a line",
+                "clustered": "gathered together",
+                "circular": "forming a circle",
+                "scattered": "spread across the area",
+                "distributed": "positioned throughout the scene"
+            }
+        }
+        # 獲取對應的描述模板
+        if class_name in arrangement_templates:
+            template_dict = arrangement_templates[class_name]
+            base_description = template_dict.get(arrangement_pattern, "positioned in the scene")
+        else:
+            # 通用的排列描述
+            generic_templates = {
+                "linear": "arranged in a line",
+                "clustered": "grouped together",
+                "circular": "arranged in a circular pattern",
+                "scattered": "distributed across the space",
+                "regular": "evenly positioned",
+                "distributed": "thoughtfully placed"
+            }
+            base_description = generic_templates.get(arrangement_pattern, "positioned in the scene")
+        return base_description

specialized_scene_processor.py ADDED Viewed

	@@ -0,0 +1,527 @@

+import logging
+import traceback
+import numpy as np
+from typing import Dict, List, Any, Optional
+logger = logging.getLogger(__name__)
+class SpecializedSceneProcessor:
+    """
+    負責處理特殊場景類型和地標識別
+    包含亞洲文化場景、高級餐飲、金融區、空中視角等專門處理邏輯
+    """
+    def __init__(self):
+        """初始化特殊場景處理器"""
+        try:
+            logger.info("SpecializedSceneProcessor initialized successfully")
+        except Exception as e:
+            logger.error(f"Failed to initialize SpecializedSceneProcessor: {str(e)}")
+            logger.error(traceback.format_exc())
+            raise
+    def identify_aerial_intersection_features(self, detected_objects: List[Dict]) -> Dict:
+        """
+        空中視角十字路口特徵
+        Args:
+            detected_objects: 檢測到的物件列表
+        Returns:
+            十字路口特徵區域字典
+        """
+        try:
+            zones = {}
+            # 檢查交通信號
+            traffic_light_objs = [obj for obj in detected_objects if obj["class_id"] == 9]
+            if traffic_light_objs:
+                zones["traffic_control_pattern"] = {
+                    "region": "intersection",
+                    "objects": ["traffic light"] * len(traffic_light_objs),
+                    "description": f"Intersection traffic control with {len(traffic_light_objs)} signals visible from above"
+                }
+            # 人行道從空中視角的情境推斷
+            zones["crossing_pattern"] = {
+                "region": "central",
+                "objects": ["inferred crosswalk"],
+                "description": "Crossing pattern visible from aerial perspective"
+            }
+            return zones
+        except Exception as e:
+            logger.error(f"Error identifying aerial intersection features: {str(e)}")
+            logger.error(traceback.format_exc())
+            return {}
+    def identify_aerial_plaza_features(self, people_objs: List[Dict]) -> Dict:
+        """
+        識別空中視角廣場特徵
+        Args:
+            people_objs: 行人物件列表
+        Returns:
+            廣場特徵區域字典
+        """
+        try:
+            zones = {}
+            if people_objs:
+                # 檢查人群是否聚集在中央區域
+                central_people = [obj for obj in people_objs
+                                if "middle" in obj["region"]]
+                if central_people:
+                    zones["central_gathering"] = {
+                        "region": "middle_center",
+                        "objects": ["person"] * len(central_people),
+                        "description": f"Central plaza gathering area with {len(central_people)} people viewed from above"
+                    }
+            return zones
+        except Exception as e:
+            logger.error(f"Error identifying aerial plaza features: {str(e)}")
+            logger.error(traceback.format_exc())
+            return {}
+    def identify_asian_pedestrian_pathway(self, detected_objects: List[Dict]) -> Dict:
+        """
+        亞洲文化場景中的行人通道
+        Args:
+            detected_objects: 檢測到的物件列表
+        Returns:
+            行人通道區域字典
+        """
+        try:
+            zones = {}
+            pathway_items = []
+            pathway_regions = {}
+            # 提取人群用於通道分析
+            people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
+            # 分析人群是否形成線形（商業街的特徵）
+            people_positions = [obj["normalized_center"] for obj in people_objs]
+            structured_path = False
+            path_direction = "meandering"
+            if len(people_positions) >= 3:
+                # 檢查人群是否沿相似y坐標排列（水平路徑）
+                y_coords = [pos[1] for pos in people_positions]
+                y_mean = sum(y_coords) / len(y_coords)
+                y_variance = sum((y - y_mean)**2 for y in y_coords) / len(y_coords)
+                horizontal_path = y_variance < 0.05  # 低變異表示水平對齊
+                # 檢查人群是否沿相似x坐標排列（垂直路徑）
+                x_coords = [pos[0] for pos in people_positions]
+                x_mean = sum(x_coords) / len(x_coords)
+                x_variance = sum((x - x_mean)**2 for x in x_coords) / len(x_coords)
+                vertical_path = x_variance < 0.05  # 低變異表示垂直對齊
+                structured_path = horizontal_path or vertical_path
+                path_direction = "horizontal" if horizontal_path else "vertical" if vertical_path else "meandering"
+            # 收集通道物件（人、自行車、摩托車在中間區域）
+            for obj in detected_objects:
+                if obj["class_id"] in [0, 1, 3]:  # Person, bicycle, motorcycle
+                    y_pos = obj["normalized_center"][1]
+                    # 按垂直位置分組（圖像中間可能是通道）
+                    if 0.25 <= y_pos <= 0.75:
+                        region = obj["region"]
+                        if region not in pathway_regions:
+                            pathway_regions[region] = []
+                        pathway_regions[region].append(obj)
+                        pathway_items.append(obj["class_name"])
+            if pathway_items:
+                path_desc = "Pedestrian walkway with people moving through the commercial area"
+                if structured_path:
+                    path_desc = f"{path_direction.capitalize()} pedestrian walkway with organized foot traffic"
+                zones["pedestrian_pathway"] = {
+                    "region": "middle_center",  # 通道通常會在中間area
+                    "objects": list(set(pathway_items)),
+                    "description": path_desc
+                }
+            return zones
+        except Exception as e:
+            logger.error(f"Error identifying Asian pedestrian pathway: {str(e)}")
+            logger.error(traceback.format_exc())
+            return {}
+    def identify_vendor_zones(self, detected_objects: List[Dict]) -> Dict:
+        """
+        識別攤販區域
+        Args:
+            detected_objects: 檢測到的物件列表
+        Returns:
+            攤販區域字典
+        """
+        try:
+            zones = {}
+            # 識別攤販區域（小攤/商店 - 從情境推斷）
+            has_small_objects = any(obj["class_id"] in [24, 26, 39, 41] for obj in detected_objects)  # bags, bottles, cups
+            has_people = any(obj["class_id"] == 0 for obj in detected_objects)
+            if has_small_objects and has_people:
+                # 可能的攤販區域是人群和小物件聚集的地方
+                small_obj_regions = {}
+                for obj in detected_objects:
+                    if obj["class_id"] in [24, 26, 39, 41, 67]:  # bags, bottles, cups, phones
+                        region = obj["region"]
+                        if region not in small_obj_regions:
+                            small_obj_regions[region] = []
+                        small_obj_regions[region].append(obj)
+                if small_obj_regions:
+                    main_vendor_region = max(small_obj_regions.items(),
+                                        key=lambda x: len(x[1]),
+                                        default=(None, []))
+                    if main_vendor_region[0] is not None:
+                        vendor_items = [obj["class_name"] for obj in main_vendor_region[1]]
+                        zones["vendor_zone"] = {
+                            "region": main_vendor_region[0],
+                            "objects": list(set(vendor_items)),
+                            "description": "Vendor or market stall area with small merchandise"
+                        }
+            return zones
+        except Exception as e:
+            logger.error(f"Error identifying vendor zones: {str(e)}")
+            logger.error(traceback.format_exc())
+            return {}
+    def identify_upscale_decorative_zones(self, detected_objects: List[Dict]) -> Dict:
+        """
+        識別高級餐飲的裝飾區域
+        Args:
+            detected_objects: 檢測到的物件列表
+        Returns:
+            裝飾區域字典
+        """
+        try:
+            zones = {}
+            decor_items = []
+            decor_regions = {}
+            # 尋找裝飾元素（花瓶、酒杯、未使用的餐具）
+            for obj in detected_objects:
+                if obj["class_id"] in [75, 40]:  # Vase, wine glass
+                    region = obj["region"]
+                    if region not in decor_regions:
+                        decor_regions[region] = []
+                    decor_regions[region].append(obj)
+                    decor_items.append(obj["class_name"])
+            if decor_items:
+                main_decor_region = max(decor_regions.items(),
+                                    key=lambda x: len(x[1]),
+                                    default=(None, []))
+                if main_decor_region[0] is not None:
+                    zones["decorative_zone"] = {
+                        "region": main_decor_region[0],
+                        "objects": list(set(decor_items)),
+                        "description": f"Decorative area with {', '.join(list(set(decor_items)))}"
+                    }
+            return zones
+        except Exception as e:
+            logger.error(f"Error identifying upscale decorative zones: {str(e)}")
+            logger.error(traceback.format_exc())
+            return {}
+    def identify_dining_seating_zones(self, detected_objects: List[Dict]) -> Dict:
+        """
+        識別餐廳座位安排區域
+        Args:
+            detected_objects: 檢測到的物件列表
+        Returns:
+            座位區域字典
+        """
+        try:
+            zones = {}
+            # 識別座位安排區域
+            chairs = [obj for obj in detected_objects if obj["class_id"] == 56]  # chairs
+            if len(chairs) >= 2:
+                chair_regions = {}
+                for obj in chairs:
+                    region = obj["region"]
+                    if region not in chair_regions:
+                        chair_regions[region] = []
+                    chair_regions[region].append(obj)
+                if chair_regions:
+                    main_seating_region = max(chair_regions.items(),
+                                        key=lambda x: len(x[1]),
+                                        default=(None, []))
+                    if main_seating_region[0] is not None:
+                        zones["dining_seating_zone"] = {
+                            "region": main_seating_region[0],
+                            "objects": ["chair"] * len(main_seating_region[1]),
+                            "description": f"Formal dining seating arrangement with {len(main_seating_region[1])} chairs"
+                        }
+            return zones
+        except Exception as e:
+            logger.error(f"Error identifying dining seating zones: {str(e)}")
+            logger.error(traceback.format_exc())
+            return {}
+    def identify_serving_zones(self, detected_objects: List[Dict], existing_zones: Dict) -> Dict:
+        """
+        識別服務區域
+        Args:
+            detected_objects: 檢測到的物件列表
+            existing_zones: 已存在的功能區域
+        Returns:
+            服務區域字典
+        """
+        try:
+            zones = {}
+            serving_items = []
+            serving_regions = {}
+            # 服務區域可能有瓶子、碗、容器
+            for obj in detected_objects:
+                if obj["class_id"] in [39, 45]:  # Bottle, bowl
+                    # 檢查是否在與主餐桌不同的區域
+                    if "formal_dining_zone" in existing_zones and obj["region"] != existing_zones["formal_dining_zone"]["region"]:
+                        region = obj["region"]
+                        if region not in serving_regions:
+                            serving_regions[region] = []
+                        serving_regions[region].append(obj)
+                        serving_items.append(obj["class_name"])
+            if serving_items:
+                main_serving_region = max(serving_regions.items(),
+                                    key=lambda x: len(x[1]),
+                                    default=(None, []))
+                if main_serving_region[0] is not None:
+                    zones["serving_zone"] = {
+                        "region": main_serving_region[0],
+                        "objects": list(set(serving_items)),
+                        "description": f"Serving or sideboard area with {', '.join(list(set(serving_items)))}"
+                    }
+            return zones
+        except Exception as e:
+            logger.error(f"Error identifying serving zones: {str(e)}")
+            logger.error(traceback.format_exc())
+            return {}
+    def identify_building_zones(self, detected_objects: List[Dict]) -> Dict:
+        """
+        識別建築區域（從場景情境推斷）
+        Args:
+            detected_objects: 檢測到的物件列表
+        Returns:
+            建築區域字典
+        """
+        try:
+            zones = {}
+            # 側邊建築區域（從場景情境推斷）
+            # 檢查是否有實際可能包含建築物的區域
+            left_side_regions = ["top_left", "middle_left", "bottom_left"]
+            right_side_regions = ["top_right", "middle_right", "bottom_right"]
+            # 檢查左側
+            left_building_evidence = True
+            for region in left_side_regions:
+                # 如果此區域有很多車輛或人群，不太可能是建築物
+                vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
+                                    for obj in detected_objects)
+                people_in_region = any(obj["region"] == region and obj["class_id"] == 0
+                                    for obj in detected_objects)
+                if vehicle_in_region or people_in_region:
+                    left_building_evidence = False
+                    break
+            # 檢查右側
+            right_building_evidence = True
+            for region in right_side_regions:
+                # 如果此區域有很多車輛或人群，不太可能是建築物
+                vehicle_in_region = any(obj["region"] == region and obj["class_id"] in [1, 2, 3, 5, 7]
+                                    for obj in detected_objects)
+                people_in_region = any(obj["region"] == region and obj["class_id"] == 0
+                                    for obj in detected_objects)
+                if vehicle_in_region or people_in_region:
+                    right_building_evidence = False
+                    break
+            # 如果證據支持，添加建築區域
+            if left_building_evidence:
+                zones["building_zone_left"] = {
+                    "region": "middle_left",
+                    "objects": ["building"],  # 推斷
+                    "description": "Tall buildings line the left side of the street"
+                }
+            if right_building_evidence:
+                zones["building_zone_right"] = {
+                    "region": "middle_right",
+                    "objects": ["building"],  # 推斷
+                    "description": "Tall buildings line the right side of the street"
+                }
+            return zones
+        except Exception as e:
+            logger.error(f"Error identifying building zones: {str(e)}")
+            logger.error(traceback.format_exc())
+            return {}
+    def identify_financial_pedestrian_zones(self, detected_objects: List[Dict]) -> Dict:
+        """
+        識別金融區的行人區域
+        Args:
+            detected_objects: 檢測到的物件列表
+        Returns:
+            行人區域字典
+        """
+        try:
+            zones = {}
+            # 辨識行人區域（如果有人群）
+            people_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
+            if people_objs:
+                people_regions = {}
+                for obj in people_objs:
+                    region = obj["region"]
+                    if region not in people_regions:
+                        people_regions[region] = []
+                    people_regions[region].append(obj)
+                if people_regions:
+                    main_pedestrian_region = max(people_regions.items(),
+                                            key=lambda x: len(x[1]),
+                                            default=(None, []))
+                    if main_pedestrian_region[0] is not None:
+                        zones["pedestrian_zone"] = {
+                            "region": main_pedestrian_region[0],
+                            "objects": ["person"] * len(main_pedestrian_region[1]),
+                            "description": f"Pedestrian area with {len(main_pedestrian_region[1])} people navigating the financial district"
+                        }
+            return zones
+        except Exception as e:
+            logger.error(f"Error identifying financial pedestrian zones: {str(e)}")
+            logger.error(traceback.format_exc())
+            return {}
+    def create_landmark_auxiliary_zones(self, landmark: Dict, index: int) -> Dict:
+        """
+        創建地標相關的輔助區域（攝影區、紀念品區等）
+        Args:
+            landmark: 地標物件字典
+            index: 地標索引
+        Returns:
+            輔助區域字典
+        """
+        try:
+            auxiliary_zones = {}
+            landmark_region = landmark.get("region", "middle_center")
+            landmark_name = landmark.get("class_name", "Landmark")
+            # 創建攝影區
+            # 根據地標位置調整攝影區位置（地標前方通常是攝影區）
+            region_mapping = {
+                "top_left": "bottom_right",
+                "top_center": "bottom_center",
+                "top_right": "bottom_left",
+                "middle_left": "middle_right",
+                "middle_center": "bottom_center",
+                "middle_right": "middle_left",
+                "bottom_left": "top_right",
+                "bottom_center": "top_center",
+                "bottom_right": "top_left"
+            }
+            photo_region = region_mapping.get(landmark_region, landmark_region)
+            photo_key = f"{landmark_name.lower().replace(' ', '_')}_photography_spot"
+            auxiliary_zones[photo_key] = {
+                "name": f"{landmark_name} Photography Spot",
+                "description": f"Popular position for photographing {landmark_name} with optimal viewing angle.",
+                "objects": ["camera", "person", "cell phone"],
+                "region": photo_region,
+                "primary_function": "Tourist photography"
+            }
+            # 如果是著名地標，可能有紀念品販售區
+            if landmark.get("confidence", 0) > 0.7:  # 高置信度地標更可能有紀念品區
+                # 根據地標位置找到適合的紀念品區位置（通常在地標附近但不直接在地標上）
+                adjacent_regions = {
+                    "top_left": ["top_center", "middle_left"],
+                    "top_center": ["top_left", "top_right"],
+                    "top_right": ["top_center", "middle_right"],
+                    "middle_left": ["top_left", "bottom_left"],
+                    "middle_center": ["middle_left", "middle_right"],
+                    "middle_right": ["top_right", "bottom_right"],
+                    "bottom_left": ["middle_left", "bottom_center"],
+                    "bottom_center": ["bottom_left", "bottom_right"],
+                    "bottom_right": ["bottom_center", "middle_right"]
+                }
+                if landmark_region in adjacent_regions:
+                    souvenir_region = adjacent_regions[landmark_region][0]  # 選擇第一個相鄰區域
+                    souvenir_key = f"{landmark_name.lower().replace(' ', '_')}_souvenir_area"
+                    auxiliary_zones[souvenir_key] = {
+                        "name": f"{landmark_name} Souvenir Area",
+                        "description": f"Area where visitors can purchase souvenirs and memorabilia related to {landmark_name}.",
+                        "objects": ["person", "handbag", "backpack"],
+                        "region": souvenir_region,
+                        "primary_function": "Tourism commerce"
+                    }
+            return auxiliary_zones
+        except Exception as e:
+            logger.error(f"Error creating landmark auxiliary zones: {str(e)}")
+            logger.error(traceback.format_exc())
+            return {}

statistics_processor.py ADDED Viewed

	@@ -0,0 +1,343 @@

+import logging
+from typing import Dict, List, Optional, Any
+class StatisticsProcessor:
+    """
+    統計分析處理器 - 負責複雜的物件統計分析和數據轉換
+    此類別專門處理物件統計信息的深度分析、Places365信息處理，
+    以及基於統計數據生成替換內容的複雜邏輯。
+    """
+    def __init__(self):
+        """初始化統計分析處理器"""
+        self.logger = logging.getLogger(self.__class__.__name__)
+        self.logger.debug("StatisticsProcessor initialized successfully")
+    def generate_statistics_replacements(self, object_statistics: Optional[Dict]) -> Dict[str, str]:
+        """
+        基於物體統計信息生成模板替換內容
+        Args:
+            object_statistics: 物體統計信息
+        Returns:
+            Dict[str, str]: 統計信息基礎的替換內容
+        """
+        replacements = {}
+        if not object_statistics:
+            return replacements
+        try:
+            # 處理植物元素
+            if "potted plant" in object_statistics:
+                count = object_statistics["potted plant"]["count"]
+                if count == 1:
+                    replacements["plant_elements"] = "a potted plant"
+                elif count <= 3:
+                    replacements["plant_elements"] = f"{count} potted plants"
+                else:
+                    replacements["plant_elements"] = f"multiple potted plants ({count} total)"
+            # 處理座位(椅子)相關
+            if "chair" in object_statistics:
+                count = object_statistics["chair"]["count"]
+                # 使用統一的數字轉換邏輯
+                number_words = {
+                    1: "one", 2: "two", 3: "three", 4: "four",
+                    5: "five", 6: "six", 7: "seven", 8: "eight",
+                    9: "nine", 10: "ten", 11: "eleven", 12: "twelve"
+                }
+                if count == 1:
+                    replacements["seating"] = "a chair"
+                    replacements["furniture"] = "a chair"
+                elif count in number_words:
+                    word_count = number_words[count]
+                    replacements["seating"] = f"{word_count} chairs"
+                    replacements["furniture"] = f"{word_count} chairs"
+                elif count <= 20:
+                    replacements["seating"] = f"several chairs"
+                    replacements["furniture"] = f"several chairs"
+                else:
+                    replacements["seating"] = f"numerous chairs ({count} total)"
+                    replacements["furniture"] = f"numerous chairs"
+            # 處理混合家具情況（當存在多種家具類型時）
+            furniture_items = []
+            furniture_counts = []
+            # 收集所有家具類型的統計
+            for furniture_type in ["chair", "dining table", "couch", "bed"]:
+                if furniture_type in object_statistics:
+                    count = object_statistics[furniture_type]["count"]
+                    if count > 0:
+                        furniture_items.append(furniture_type)
+                        furniture_counts.append(count)
+            # 如果只有椅子,那就用上面的方式
+            # 如果有多種家具類型，生成組合描述
+            if len(furniture_items) > 1 and "furniture" not in replacements:
+                main_furniture = furniture_items[0]  # 數量最多的家具類型
+                main_count = furniture_counts[0]
+                if main_furniture == "chair":
+                    number_words = ["", "one", "two", "three", "four", "five", "six"]
+                    if main_count <= 6:
+                        replacements["furniture"] = f"{number_words[main_count]} chairs and other furniture"
+                    else:
+                        replacements["furniture"] = "multiple chairs and other furniture"
+            # 處理人員
+            if "person" in object_statistics:
+                count = object_statistics["person"]["count"]
+                if count == 1:
+                    replacements["people_and_vehicles"] = "a person"
+                    replacements["pedestrian_flow"] = "an individual walking"
+                elif count <= 5:
+                    replacements["people_and_vehicles"] = f"{count} people"
+                    replacements["pedestrian_flow"] = f"{count} people walking"
+                else:
+                    replacements["people_and_vehicles"] = f"many people ({count} individuals)"
+                    replacements["pedestrian_flow"] = f"a crowd of {count} people"
+            # 處理桌子設置
+            if "dining table" in object_statistics:
+                count = object_statistics["dining table"]["count"]
+                if count == 1:
+                    replacements["table_setup"] = "a dining table"
+                    replacements["table_description"] = "a dining surface"
+                else:
+                    replacements["table_setup"] = f"{count} dining tables"
+                    replacements["table_description"] = f"{count} dining surfaces"
+            self.logger.debug(f"Generated {len(replacements)} statistics-based replacements")
+        except Exception as e:
+            self.logger.warning(f"Error generating statistics replacements: {str(e)}")
+        return replacements
+    def generate_places365_replacements(self, places365_info: Optional[Dict]) -> Dict[str, str]:
+        """
+        基於Places365信息生成模板替換內容
+        Args:
+            places365_info: Places365場景分類信息
+        Returns:
+            Dict[str, str]: Places365基礎的替換內容
+        """
+        replacements = {}
+        if not places365_info or places365_info.get('confidence', 0) <= 0.35:
+            replacements["places365_context"] = ""
+            replacements["places365_atmosphere"] = ""
+            return replacements
+        try:
+            scene_label = places365_info.get('scene_label', '').replace('_', ' ')
+            attributes = places365_info.get('attributes', [])
+            # 生成場景上下文
+            if scene_label:
+                replacements["places365_context"] = f"characteristic of a {scene_label}"
+            else:
+                replacements["places365_context"] = ""
+            # 生成氛圍描述
+            if 'natural_lighting' in attributes:
+                replacements["places365_atmosphere"] = "with natural illumination"
+            elif 'artificial_lighting' in attributes:
+                replacements["places365_atmosphere"] = "under artificial lighting"
+            else:
+                replacements["places365_atmosphere"] = ""
+            self.logger.debug("Generated Places365-based replacements")
+        except Exception as e:
+            self.logger.warning(f"Error generating Places365 replacements: {str(e)}")
+            replacements["places365_context"] = ""
+            replacements["places365_atmosphere"] = ""
+        return replacements
+    def analyze_scene_composition(self, detected_objects: List[Dict]) -> Dict:
+        """
+        分析場景組成以確定模板複雜度
+        Args:
+            detected_objects: 檢測到的物件列表
+        Returns:
+            Dict: 場景組成統計信息
+        """
+        try:
+            total_objects = len(detected_objects)
+            # 統計不同類型的物件
+            object_categories = {}
+            for obj in detected_objects:
+                class_name = obj.get("class_name", "unknown")
+                object_categories[class_name] = object_categories.get(class_name, 0) + 1
+            # 計算場景多樣性
+            unique_categories = len(object_categories)
+            return {
+                "total_objects": total_objects,
+                "unique_categories": unique_categories,
+                "category_distribution": object_categories,
+                "complexity_score": min(total_objects * 0.3 + unique_categories * 0.7, 10)
+            }
+        except Exception as e:
+            self.logger.warning(f"Error analyzing scene composition: {str(e)}")
+            return {"total_objects": 0, "unique_categories": 0, "complexity_score": 0}
+    def generate_zone_descriptions(self, zone_data: Dict[str, Any], section: Dict[str, Any]) -> List[str]:
+        """
+        生成功能區域描述
+        Args:
+            zone_data: 區域數據字典
+            section: 區域配置信息
+        Returns:
+            List[str]: 區域描述列表
+        """
+        try:
+            descriptions = []
+            if not zone_data:
+                return descriptions
+            # 直接處理區域資料（zone_data 本身就是區域字典）
+            sorted_zones = sorted(zone_data.items(),
+                                key=lambda x: len(x[1].get("objects", [])),
+                                reverse=True)
+            for zone_name, zone_info in sorted_zones:
+                description = zone_info.get("description", "")
+                objects = zone_info.get("objects", [])
+                if objects:
+                    # 使用現有描述或生成基於物件的描述
+                    if description and not any(tech in description.lower() for tech in ['zone', 'area', 'region']):
+                        zone_desc = description
+                    else:
+                        # 生成更自然的區域描述
+                        clean_zone_name = zone_name.replace('_', ' ').replace(' area', '').replace(' zone', '')
+                        object_list = ', '.join(objects[:3])
+                        if 'crossing' in zone_name or 'pedestrian' in zone_name:
+                            zone_desc = f"In the central crossing area, there are {object_list}."
+                        elif 'vehicle' in zone_name or 'traffic' in zone_name:
+                            zone_desc = f"The vehicle movement area includes {object_list}."
+                        elif 'control' in zone_name:
+                            zone_desc = f"Traffic control elements include {object_list}."
+                        else:
+                            zone_desc = f"The {clean_zone_name} contains {object_list}."
+                        if len(objects) > 3:
+                            zone_desc += f" Along with {len(objects) - 3} additional elements."
+                    descriptions.append(zone_desc)
+            return descriptions
+        except Exception as e:
+            self.logger.error(f"Error generating zone descriptions: {str(e)}")
+            return []
+    def generate_object_summary(self, object_data: List[Dict], section: Dict[str, Any]) -> str:
+        """
+        生成物件摘要描述
+        Args:
+            object_data: 物件數據列表
+            section: 摘要配置信息
+        Returns:
+            str: 物件摘要描述
+        """
+        try:
+            if not object_data:
+                return ""
+            # 統計物件類型並計算重要性
+            object_stats = {}
+            for obj in object_data:
+                class_name = obj.get("class_name", "unknown")
+                confidence = obj.get("confidence", 0.5)
+                if class_name not in object_stats:
+                    object_stats[class_name] = {"count": 0, "total_confidence": 0}
+                object_stats[class_name]["count"] += 1
+                object_stats[class_name]["total_confidence"] += confidence
+            # 按重要性排序（結合數量和置信度）
+            sorted_objects = []
+            for class_name, stats in object_stats.items():
+                count = stats["count"]
+                avg_confidence = stats["total_confidence"] / count
+                importance = count * 0.6 + avg_confidence * 0.4
+                sorted_objects.append((class_name, count, importance))
+            sorted_objects.sort(key=lambda x: x[2], reverse=True)
+            # 生成自然語言描述
+            descriptions = []
+            for class_name, count, _ in sorted_objects[:5]:
+                clean_name = class_name.replace('_', ' ')
+                if count == 1:
+                    article = "an" if clean_name[0].lower() in 'aeiou' else "a"
+                    descriptions.append(f"{article} {clean_name}")
+                else:
+                    descriptions.append(f"{count} {clean_name}s")
+            if len(descriptions) == 1:
+                return f"The scene features {descriptions[0]}."
+            elif len(descriptions) == 2:
+                return f"The scene features {descriptions[0]} and {descriptions[1]}."
+            else:
+                main_items = ", ".join(descriptions[:-1])
+                return f"The scene features {main_items}, and {descriptions[-1]}."
+        except Exception as e:
+            self.logger.error(f"Error generating object summary: {str(e)}")
+            return ""
+    def generate_conclusion(self, template: Dict[str, Any], zone_data: Dict[str, Any],
+                           object_data: List[Dict]) -> str:
+        """
+        生成結論描述
+        Args:
+            template: 模板配置信息
+            zone_data: 區域數據
+            object_data: 物件數據
+        Returns:
+            str: 結論描述
+        """
+        try:
+            scene_type = template.get("scene_type", "general")
+            zones_count = len(zone_data)
+            objects_count = len(object_data)
+            if scene_type == "indoor":
+                conclusion = f"This indoor environment demonstrates clear functional organization with {zones_count} distinct areas and {objects_count} identified objects."
+            elif scene_type == "outdoor":
+                conclusion = f"This outdoor scene shows dynamic activity patterns across {zones_count} functional zones with {objects_count} detected elements."
+            else:
+                conclusion = f"The scene analysis reveals {zones_count} functional areas containing {objects_count} identifiable objects."
+            return conclusion
+        except Exception as e:
+            self.logger.error(f"Error generating conclusion: {str(e)}")
+            return ""

template_manager.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

template_processor.py ADDED Viewed

	@@ -0,0 +1,429 @@

+import logging
+import traceback
+import re
+from typing import Dict, List, Optional, Union, Any
+class TemplateProcessor:
+    """
+    模板處理器 - 負責模板填充、後處理和結構化模板渲染
+    此類別專門處理模板的最終填充過程、文本格式化、
+    語法修復以及結構化模板的渲染邏輯。
+    """
+    def __init__(self):
+        """初始化模板處理器"""
+        self.logger = logging.getLogger(self.__class__.__name__)
+        self.logger.debug("TemplateProcessor initialized successfully")
+    def preprocess_template(self, template: str) -> str:
+        """
+        預處理模板，修復常見問題
+        Args:
+            template: 原始模板字符串
+        Returns:
+            str: 預處理後的模板
+        """
+        try:
+            # 移除可能導致問題的模式
+            template = re.sub(r'\{[^}]*\}\s*,\s*\{[^}]*\}', '{combined_elements}', template)
+            # 確保模板不以逗號開始
+            template = re.sub(r'^[,\s]*', '', template)
+            return template.strip()
+        except Exception as e:
+            self.logger.warning(f"Error preprocessing template: {str(e)}")
+            return template
+    def postprocess_filled_template(self, filled_template: str) -> str:
+        """
+        後處理填充完成的模板，修復語法問題
+        Args:
+            filled_template: 填充後的模板字符串
+        Returns:
+            str: 修復後的模板字符串
+        """
+        try:
+            # 修復 "In , " 模式
+            filled_template = re.sub(r'\bIn\s*,\s*', 'In this scene, ', filled_template)
+            filled_template = re.sub(r'\bAt\s*,\s*', 'At this location, ', filled_template)
+            filled_template = re.sub(r'\bWithin\s*,\s*', 'Within this area, ', filled_template)
+            # 修復連續逗號
+            filled_template = re.sub(r',\s*,', ',', filled_template)
+            # 修復開頭的逗號
+            filled_template = re.sub(r'^[,\s]*', '', filled_template)
+            # 確保首字母大寫
+            if filled_template and not filled_template[0].isupper():
+                filled_template = filled_template[0].upper() + filled_template[1:]
+            # 確保以句號結尾
+            if filled_template and not filled_template.endswith(('.', '!', '?')):
+                filled_template += '.'
+            return filled_template.strip()
+        except Exception as e:
+            self.logger.warning(f"Error postprocessing filled template: {str(e)}")
+            return filled_template
+    def get_template_by_scene_type(self, scene_type: str, detected_objects: List[Dict],
+                                  functional_zones: Dict, template_repository) -> str:
+        """
+        根據場景類型選擇合適的模板並進行標準化處理
+        Args:
+            scene_type: 場景類型
+            detected_objects: 檢測到的物件列表
+            functional_zones: 功能區域字典
+            template_repository: 模板庫實例
+        Returns:
+            str: 標準化後的模板字符串
+        """
+        try:
+            # 獲取場景的物件統計信息
+            object_stats = self._analyze_scene_composition(detected_objects)
+            zone_count = len(functional_zones) if functional_zones else 0
+            # 根據場景複雜度和類型選擇模板
+            templates = template_repository.templates
+            if scene_type in templates:
+                scene_templates = templates[scene_type]
+                # 根據複雜度選擇合適的模板變體
+                if zone_count >= 3 and object_stats.get("total_objects", 0) >= 10:
+                    template_key = "complex"
+                elif zone_count >= 2 or object_stats.get("total_objects", 0) >= 5:
+                    template_key = "moderate"
+                else:
+                    template_key = "simple"
+                if template_key in scene_templates:
+                    raw_template = scene_templates[template_key]
+                else:
+                    raw_template = scene_templates.get("default", scene_templates[list(scene_templates.keys())[0]])
+            else:
+                # 如果沒有特定場景的模板，使用通用模板
+                raw_template = self._get_generic_template(object_stats, zone_count)
+            # 標準化模板中的佔位符和格式
+            standardized_template = self._standardize_template_format(raw_template)
+            return standardized_template
+        except Exception as e:
+            self.logger.error(f"Error selecting template for scene type '{scene_type}': {str(e)}")
+            return self._get_fallback_template()
+    def _analyze_scene_composition(self, detected_objects: List[Dict]) -> Dict:
+        """
+        分析場景組成以確定模板複雜度
+        Args:
+            detected_objects: 檢測到的物件列表
+        Returns:
+            Dict: 場景組成統計信息
+        """
+        try:
+            total_objects = len(detected_objects)
+            # 統計不同類型的物件
+            object_categories = {}
+            for obj in detected_objects:
+                class_name = obj.get("class_name", "unknown")
+                object_categories[class_name] = object_categories.get(class_name, 0) + 1
+            # 計算場景多樣性
+            unique_categories = len(object_categories)
+            return {
+                "total_objects": total_objects,
+                "unique_categories": unique_categories,
+                "category_distribution": object_categories,
+                "complexity_score": min(total_objects * 0.3 + unique_categories * 0.7, 10)
+            }
+        except Exception as e:
+            self.logger.warning(f"Error analyzing scene composition: {str(e)}")
+            return {"total_objects": 0, "unique_categories": 0, "complexity_score": 0}
+    def _get_generic_template(self, object_stats: Dict, zone_count: int) -> str:
+        """
+        獲取通用模板
+        Args:
+            object_stats: 物件統計信息
+            zone_count: 功能區域數量
+        Returns:
+            str: 通用模板字符串
+        """
+        try:
+            complexity_score = object_stats.get("complexity_score", 0)
+            if complexity_score >= 7 or zone_count >= 3:
+                return "This scene presents a comprehensive view featuring {functional_area} with {primary_objects}. The spatial organization demonstrates {spatial_arrangement} across multiple {activity_areas}, creating a dynamic environment with diverse elements and clear functional zones."
+            elif complexity_score >= 4 or zone_count >= 2:
+                return "The scene displays {functional_area} containing {primary_objects}. The arrangement shows {spatial_organization} with distinct areas serving different purposes within the overall space."
+            else:
+                return "A {scene_description} featuring {primary_objects} arranged in {basic_layout} within the visible area."
+        except Exception as e:
+            self.logger.warning(f"Error getting generic template: {str(e)}")
+            return self._get_fallback_template()
+    def _get_fallback_template(self) -> str:
+        """
+        獲取備用模板
+        Returns:
+            str: 備用模板字符串
+        """
+        return "A scene featuring various elements and organized areas of activity within the visible space."
+    def _standardize_template_format(self, template: str) -> str:
+        """
+        標準化模板格式，確保佔位符和表達方式符合自然語言要求
+        Args:
+            template: 原始模板字符串
+        Returns:
+            str: 標準化後的模板字符串
+        """
+        try:
+            if not template:
+                return self._get_fallback_template()
+            standardized = template
+            # 標準化佔位符格式，移除技術性標記
+            placeholder_mapping = {
+                r'\{zone_\d+\}': '{functional_area}',
+                r'\{object_group_\d+\}': '{primary_objects}',
+                r'\{region_\d+\}': '{spatial_area}',
+                r'\{category_\d+\}': '{object_category}',
+                r'\{area_\d+\}': '{activity_area}',
+                r'\{section_\d+\}': '{scene_section}'
+            }
+            for pattern, replacement in placeholder_mapping.items():
+                standardized = re.sub(pattern, replacement, standardized)
+            # 標準化常見的技術性術語
+            term_replacements = {
+                'functional_zones': 'areas of activity',
+                'object_detection': 'visible elements',
+                'category_regions': 'organized sections',
+                'spatial_distribution': 'arrangement throughout the space',
+                'viewpoint_analysis': 'perspective view'
+            }
+            for tech_term, natural_term in term_replacements.items():
+                standardized = standardized.replace(tech_term, natural_term)
+            # 確保模板語法的自然性
+            standardized = self._improve_template_readability(standardized)
+            return standardized
+        except Exception as e:
+            self.logger.warning(f"Error standardizing template format: {str(e)}")
+            return template if template else self._get_fallback_template()
+    def _improve_template_readability(self, template: str) -> str:
+        """
+        改善模板的可讀性和自然性
+        Args:
+            template: 模板字符串
+        Returns:
+            str: 改善後的模板字符串
+        """
+        try:
+            # 移除多餘的空格和換行
+            improved = re.sub(r'\s+', ' ', template).strip()
+            # 改善句子連接
+            improved = improved.replace(' . ', '. ')
+            improved = improved.replace(' , ', ', ')
+            improved = improved.replace(' ; ', '; ')
+            # 確保適當的句號結尾
+            if improved and not improved.endswith(('.', '!', '?')):
+                improved += '.'
+            # 改善常見的表達問題
+            readability_fixes = [
+                (r'\bthe the\b', 'the'),
+                (r'\ba a\b', 'a'),
+                (r'\ban an\b', 'an'),
+                (r'\bwith with\b', 'with'),
+                (r'\bin in\b', 'in'),
+                (r'\bof of\b', 'of'),
+                (r'\band and\b', 'and')
+            ]
+            for pattern, replacement in readability_fixes:
+                improved = re.sub(pattern, replacement, improved, flags=re.IGNORECASE)
+            return improved
+        except Exception as e:
+            self.logger.warning(f"Error improving template readability: {str(e)}")
+            return template
+    def process_structured_template(self, template: Dict[str, Any], scene_data: Dict[str, Any],
+                                  statistics_processor) -> str:
+        """
+        處理結構化模板字典
+        Args:
+            template: 結構化模板字典
+            scene_data: 場景分析資料
+            statistics_processor: 統計處理器實例
+        Returns:
+            str: 生成的場景描述
+        """
+        try:
+            # 提取 scene_data 中各區塊資料
+            zone_data = scene_data.get("functional_zones", scene_data.get("zones", {}))
+            object_data = scene_data.get("detected_objects", [])
+            scene_context = scene_data.get("scene_context", "")
+            # 獲取模板結構
+            structure = template.get("structure", [])
+            if not structure:
+                self.logger.warning("Template has no structure defined")
+                return self._generate_fallback_scene_description(scene_data)
+            description_parts = []
+            # 按照模板結構生成描述
+            for section in structure:
+                section_type = section.get("type", "")
+                content = section.get("content", "")
+                if section_type == "opening":
+                    description_parts.append(content)
+                elif section_type == "zone_analysis":
+                    zone_descriptions = statistics_processor.generate_zone_descriptions(zone_data, section)
+                    if zone_descriptions:
+                        description_parts.extend(zone_descriptions)
+                elif section_type == "object_summary":
+                    object_summary = statistics_processor.generate_object_summary(object_data, section)
+                    if object_summary:
+                        description_parts.append(object_summary)
+                elif section_type == "conclusion":
+                    conclusion = statistics_processor.generate_conclusion(template, zone_data, object_data)
+                    if conclusion:
+                        description_parts.append(conclusion)
+            # 合併並標準化輸出
+            final_description = self._standardize_final_description(" ".join(description_parts))
+            self.logger.info("Successfully applied structured template")
+            return final_description
+        except Exception as e:
+            self.logger.error(f"Error processing structured template: {str(e)}")
+            return self._generate_fallback_scene_description(scene_data)
+    def _generate_fallback_scene_description(self, scene_data: Dict[str, Any]) -> str:
+        """
+        生成備用場景描述
+        Args:
+            scene_data: 場景分析資料
+        Returns:
+            str: 備用場景描述
+        """
+        try:
+            detected_objects = scene_data.get("detected_objects", [])
+            zones = scene_data.get("functional_zones", scene_data.get("zones", {}))
+            scene_type = scene_data.get("scene_type", "general")
+            object_count = len(detected_objects)
+            zone_count = len(zones)
+            if zone_count > 0 and object_count > 0:
+                return f"Scene analysis completed with {zone_count} functional areas containing {object_count} identified objects."
+            elif object_count > 0:
+                return f"Scene analysis identified {object_count} objects in this {scene_type.replace('_', ' ')} environment."
+            else:
+                return f"Scene analysis completed for this {scene_type.replace('_', ' ')} environment."
+        except Exception as e:
+            self.logger.warning(f"Error generating fallback description: {str(e)}")
+            return "Scene analysis completed with detected objects and functional areas."
+    def _standardize_final_description(self, description: str) -> str:
+        """
+        對最終描述進行標準化處理
+        Args:
+            description: 原始描述文本
+        Returns:
+            str: 標準化後的描述文本
+        """
+        try:
+            # 移除多餘空格
+            description = " ".join(description.split())
+            # 確保句子間有適當間距
+            description = description.replace(". ", ". ")
+            # 移除任何殘留的技術性標識符
+            technical_patterns = [
+                r'zone_\d+', r'area_\d+', r'region_\d+',
+                r'_zone', r'_area', r'_region'
+            ]
+            for pattern in technical_patterns:
+                description = re.sub(pattern, '', description, flags=re.IGNORECASE)
+            return description.strip()
+        except Exception as e:
+            self.logger.error(f"Error standardizing final description: {str(e)}")
+            return description
+    def generate_fallback_description(self, scene_type: str, detected_objects: List[Dict]) -> str:
+        """
+        生成備用描述，當模板填充完全失敗時使用
+        Args:
+            scene_type: 場景類型
+            detected_objects: 檢測到的物體列表
+        Returns:
+            str: 備用描述
+        """
+        try:
+            object_count = len(detected_objects)
+            if object_count == 0:
+                return f"A {scene_type.replace('_', ' ')} scene."
+            elif object_count == 1:
+                return f"A {scene_type.replace('_', ' ')} scene with one visible element."
+            else:
+                return f"A {scene_type.replace('_', ' ')} scene with {object_count} visible elements."
+        except Exception as e:
+            self.logger.warning(f"Error generating fallback description: {str(e)}")
+            return "A scene with various elements."

template_repository.py ADDED Viewed

	@@ -0,0 +1,834 @@

+import logging
+import traceback
+from typing import Dict, List, Optional, Any
+from scene_detail_templates import SCENE_DETAIL_TEMPLATES
+from object_template_fillers import OBJECT_TEMPLATE_FILLERS
+from viewpoint_templates import VIEWPOINT_TEMPLATES
+from cultural_templates import CULTURAL_TEMPLATES
+from lighting_conditions import LIGHTING_CONDITIONS
+from confidence_templates import CONFIDENCE_TEMPLATES
+class TemplateRepository:
+    """
+    模板資料的管理器 - 負責模板的載入、儲存、檢索和驗證
+    此類別專門處理模板資源的管理，包括從各種來源載入模板、
+    驗證模板完整性，以及提供統一的模板檢索介面。
+    """
+    def __init__(self, custom_templates_db: Optional[Dict] = None):
+        """
+        初始化模板庫管理器
+        Args:
+            custom_templates_db: 可選的自定義模板數據庫，如果提供則會與默認模板合併
+        """
+        self.logger = logging.getLogger(self.__class__.__name__)
+        self.templates = {}
+        self.template_registry = {}
+        try:
+            # 載入模板數據庫
+            self.templates = self._load_templates()
+            # 初始化模板註冊表
+            self.template_registry = self._initialize_template_registry()
+            # 如果提供了自定義模板，則進行合併
+            if custom_templates_db:
+                self._merge_custom_templates(custom_templates_db)
+            # 驗證模板完整性
+            self._validate_templates()
+            self.logger.info("TemplateRepository initialized successfully with %d template categories",
+                           len(self.templates))
+        except Exception as e:
+            error_msg = f"Failed to initialize TemplateRepository: {str(e)}"
+            self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
+            # 初始化基本的空模板
+            self.templates = self._initialize_fallback_templates()
+    def _load_templates(self) -> Dict:
+        """
+        載入所有描述模板
+        Returns:
+            Dict: 包含所有模板類別的字典
+        """
+        try:
+            templates = {}
+            # 載入場景詳細描述模板
+            self.logger.debug("Loading scene detail templates")
+            try:
+                templates["scene_detail_templates"] = SCENE_DETAIL_TEMPLATES
+            except NameError:
+                self.logger.warning("SCENE_DETAIL_TEMPLATES not defined, using empty dict")
+                templates["scene_detail_templates"] = {}
+            # 載入物體模板填充器
+            self.logger.debug("Loading object template fillers")
+            try:
+                templates["object_template_fillers"] = OBJECT_TEMPLATE_FILLERS
+            except NameError:
+                self.logger.warning("OBJECT_TEMPLATE_FILLERS not defined, using empty dict")
+                templates["object_template_fillers"] = {}
+            # 載入視角模板
+            self.logger.debug("Loading viewpoint templates")
+            try:
+                templates["viewpoint_templates"] = VIEWPOINT_TEMPLATES
+            except NameError:
+                self.logger.warning("VIEWPOINT_TEMPLATES not defined, using empty dict")
+                templates["viewpoint_templates"] = {}
+            # 載入文化模板
+            self.logger.debug("Loading cultural templates")
+            try:
+                templates["cultural_templates"] = CULTURAL_TEMPLATES
+            except NameError:
+                self.logger.warning("CULTURAL_TEMPLATES not defined, using empty dict")
+                templates["cultural_templates"] = {}
+            # 從照明條件模組載入照明模板
+            self.logger.debug("Loading lighting templates")
+            try:
+                templates["lighting_templates"] = self._extract_lighting_templates()
+            except Exception as e:
+                self.logger.warning(f"Failed to extract lighting templates: {str(e)}")
+                templates["lighting_templates"] = {}
+            # 載入信心度模板
+            self.logger.debug("Loading confidence templates")
+            try:
+                templates["confidence_templates"] = CONFIDENCE_TEMPLATES
+            except NameError:
+                self.logger.warning("CONFIDENCE_TEMPLATES not defined, using empty dict")
+                templates["confidence_templates"] = {}
+            # 初始化默認模板（當成備份）
+            self._initialize_default_templates(templates)
+            self.logger.info("Successfully loaded %d template categories", len(templates))
+            return templates
+        except Exception as e:
+            error_msg = f"Unexpected error during template loading: {str(e)}"
+            self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
+            # 返回基本模板
+            return self._initialize_fallback_templates()
+    def _initialize_template_registry(self) -> Dict[str, Dict[str, Any]]:
+        """
+        初始化模板註冊表，包含各種場景類型的結構化模板
+        Returns:
+            Dict[str, Dict[str, Any]]: 模板註冊表字典
+        """
+        try:
+            template_registry = {
+                "indoor_detailed": {
+                    "scene_type": "indoor",
+                    "complexity": "high",
+                    "structure": [
+                        {
+                            "type": "opening",
+                            "content": "This indoor scene presents a comprehensive view of a well-organized living space."
+                        },
+                        {
+                            "type": "zone_analysis",
+                            "priority": "functional_areas",
+                            "detail_level": "detailed"
+                        },
+                        {
+                            "type": "object_summary",
+                            "grouping": "by_category",
+                            "include_counts": True
+                        },
+                        {
+                            "type": "conclusion",
+                            "style": "analytical"
+                        }
+                    ]
+                },
+                "indoor_moderate": {
+                    "scene_type": "indoor",
+                    "complexity": "medium",
+                    "structure": [
+                        {
+                            "type": "opening",
+                            "content": "The indoor environment displays organized functional areas."
+                        },
+                        {
+                            "type": "zone_analysis",
+                            "priority": "main_areas",
+                            "detail_level": "moderate"
+                        },
+                        {
+                            "type": "object_summary",
+                            "grouping": "by_function",
+                            "include_counts": False
+                        },
+                        {
+                            "type": "conclusion",
+                            "style": "descriptive"
+                        }
+                    ]
+                },
+                "indoor_simple": {
+                    "scene_type": "indoor",
+                    "complexity": "low",
+                    "structure": [
+                        {
+                            "type": "opening",
+                            "content": "An indoor space with visible furniture and household items."
+                        },
+                        {
+                            "type": "zone_analysis",
+                            "priority": "basic_areas",
+                            "detail_level": "simple"
+                        },
+                        {
+                            "type": "object_summary",
+                            "grouping": "general",
+                            "include_counts": False
+                        }
+                    ]
+                },
+                "outdoor_detailed": {
+                    "scene_type": "outdoor",
+                    "complexity": "high",
+                    "structure": [
+                        {
+                            "type": "opening",
+                            "content": "This outdoor scene captures a dynamic urban environment with multiple activity zones."
+                        },
+                        {
+                            "type": "zone_analysis",
+                            "priority": "activity_areas",
+                            "detail_level": "detailed"
+                        },
+                        {
+                            "type": "object_summary",
+                            "grouping": "by_location",
+                            "include_counts": True
+                        },
+                        {
+                            "type": "conclusion",
+                            "style": "environmental"
+                        }
+                    ]
+                },
+                "outdoor_moderate": {
+                    "scene_type": "outdoor",
+                    "complexity": "medium",
+                    "structure": [
+                        {
+                            "type": "opening",
+                            "content": "The outdoor scene shows organized public spaces and pedestrian areas."
+                        },
+                        {
+                            "type": "zone_analysis",
+                            "priority": "public_areas",
+                            "detail_level": "moderate"
+                        },
+                        {
+                            "type": "object_summary",
+                            "grouping": "by_type",
+                            "include_counts": False
+                        },
+                        {
+                            "type": "conclusion",
+                            "style": "observational"
+                        }
+                    ]
+                },
+                "outdoor_simple": {
+                    "scene_type": "outdoor",
+                    "complexity": "low",
+                    "structure": [
+                        {
+                            "type": "opening",
+                            "content": "An outdoor area with pedestrians and urban elements."
+                        },
+                        {
+                            "type": "zone_analysis",
+                            "priority": "basic_areas",
+                            "detail_level": "simple"
+                        },
+                        {
+                            "type": "object_summary",
+                            "grouping": "general",
+                            "include_counts": False
+                        }
+                    ]
+                },
+                "commercial_detailed": {
+                    "scene_type": "commercial",
+                    "complexity": "high",
+                    "structure": [
+                        {
+                            "type": "opening",
+                            "content": "This commercial environment demonstrates organized retail and customer service areas."
+                        },
+                        {
+                            "type": "zone_analysis",
+                            "priority": "service_areas",
+                            "detail_level": "detailed"
+                        },
+                        {
+                            "type": "object_summary",
+                            "grouping": "by_function",
+                            "include_counts": True
+                        },
+                        {
+                            "type": "conclusion",
+                            "style": "business"
+                        }
+                    ]
+                },
+                "transportation_detailed": {
+                    "scene_type": "transportation",
+                    "complexity": "high",
+                    "structure": [
+                        {
+                            "type": "opening",
+                            "content": "This transportation hub features organized passenger facilities and transit infrastructure."
+                        },
+                        {
+                            "type": "zone_analysis",
+                            "priority": "transit_areas",
+                            "detail_level": "detailed"
+                        },
+                        {
+                            "type": "object_summary",
+                            "grouping": "by_transit_function",
+                            "include_counts": True
+                        },
+                        {
+                            "type": "conclusion",
+                            "style": "infrastructure"
+                        }
+                    ]
+                },
+                "default": {
+                    "scene_type": "general",
+                    "complexity": "medium",
+                    "structure": [
+                        {
+                            "type": "opening",
+                            "content": "The scene displays various elements organized across functional areas."
+                        },
+                        {
+                            "type": "zone_analysis",
+                            "priority": "general_areas",
+                            "detail_level": "moderate"
+                        },
+                        {
+                            "type": "object_summary",
+                            "grouping": "general",
+                            "include_counts": False
+                        },
+                        {
+                            "type": "conclusion",
+                            "style": "general"
+                        }
+                    ]
+                }
+            }
+            self.logger.debug(f"Initialized template registry with {len(template_registry)} templates")
+            return template_registry
+        except Exception as e:
+            error_msg = f"Error initializing template registry: {str(e)}"
+            self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
+            # 返回最基本的註冊表
+            return {
+                "default": {
+                    "scene_type": "general",
+                    "complexity": "low",
+                    "structure": [
+                        {
+                            "type": "opening",
+                            "content": "Scene analysis completed with identified objects and areas."
+                        }
+                    ]
+                }
+            }
+    def _extract_lighting_templates(self) -> Dict:
+        """
+        從照明條件模組提取照明描述模板
+        Returns:
+            Dict: 照明模板字典
+        """
+        try:
+            lighting_templates = {}
+            # 從 LIGHTING_CONDITIONS 提取時間描述
+            time_descriptions = LIGHTING_CONDITIONS.get("time_descriptions", {})
+            for time_key, time_data in time_descriptions.items():
+                if isinstance(time_data, dict) and "general" in time_data:
+                    lighting_templates[time_key] = time_data["general"]
+                else:
+                    # 如果數據結構不符合預期，使用備用描述
+                    lighting_templates[time_key] = f"The scene is captured during {time_key.replace('_', ' ')}."
+            # 確保至少有基本的照明模板
+            if not lighting_templates:
+                self.logger.warning("No lighting templates found, using defaults")
+                lighting_templates = self._get_default_lighting_templates()
+            self.logger.debug("Extracted %d lighting templates", len(lighting_templates))
+            return lighting_templates
+        except Exception as e:
+            self.logger.warning(f"Error extracting lighting templates: {str(e)}, using defaults")
+            return self._get_default_lighting_templates()
+    def _get_default_lighting_templates(self) -> Dict:
+        """獲取默認照明模板"""
+        return {
+            "day_clear": "The scene is captured during clear daylight conditions.",
+            "day_overcast": "The scene is captured during overcast daylight.",
+            "night": "The scene is captured at night with artificial lighting.",
+            "dawn": "The scene is captured during dawn with soft natural lighting.",
+            "dusk": "The scene is captured during dusk with diminishing natural light.",
+            "unknown": "The lighting conditions are not clearly identifiable."
+        }
+    def _initialize_default_templates(self, templates: Dict):
+        """
+        初始化默認模板作為備份機制
+        Args:
+            templates: 要檢查和補充的模板字典
+        """
+        try:
+            # 置信度模板備份
+            if "confidence_templates" not in templates or not templates["confidence_templates"]:
+                templates["confidence_templates"] = {
+                    "high": "{description} {details}",
+                    "medium": "This appears to be {description} {details}",
+                    "low": "This might be {description}, but the confidence is low. {details}"
+                }
+            # 場景詳細模板備份
+            if "scene_detail_templates" not in templates or not templates["scene_detail_templates"]:
+                templates["scene_detail_templates"] = {
+                    "default": ["A scene with various elements and objects."]
+                }
+            # 物體填充模板備份
+            if "object_template_fillers" not in templates or not templates["object_template_fillers"]:
+                templates["object_template_fillers"] = {
+                    "default": ["various items", "different objects", "multiple elements"]
+                }
+            # 視角模板備份
+            if "viewpoint_templates" not in templates or not templates["viewpoint_templates"]:
+                templates["viewpoint_templates"] = {
+                    "eye_level": {
+                        "prefix": "From eye level, ",
+                        "observation": "the scene is viewed straight ahead.",
+                        "short_desc": "at eye level"
+                    },
+                    "aerial": {
+                        "prefix": "From above, ",
+                        "observation": "the scene is viewed from a bird's-eye perspective.",
+                        "short_desc": "from above"
+                    },
+                    "low_angle": {
+                        "prefix": "From a low angle, ",
+                        "observation": "the scene is viewed from below looking upward.",
+                        "short_desc": "from below"
+                    },
+                    "elevated": {
+                        "prefix": "From an elevated position, ",
+                        "observation": "the scene is viewed from a higher vantage point.",
+                        "short_desc": "from an elevated position"
+                    }
+                }
+            # 文化模板備份
+            if "cultural_templates" not in templates or not templates["cultural_templates"]:
+                templates["cultural_templates"] = {
+                    "asian": {
+                        "elements": ["traditional architectural elements", "cultural signage", "Asian design features"],
+                        "description": "The scene displays distinctive Asian cultural characteristics with {elements}."
+                    },
+                    "european": {
+                        "elements": ["classical architecture", "European design elements", "historic features"],
+                        "description": "The scene exhibits European architectural and cultural elements including {elements}."
+                    }
+                }
+            self.logger.debug("Default templates initialized as backup")
+        except Exception as e:
+            self.logger.error(f"Error initializing default templates: {str(e)}")
+    def _merge_custom_templates(self, custom_templates: Dict):
+        """
+        合併自定義模板到現有模板庫
+        Args:
+            custom_templates: 自定義模板字典
+        """
+        try:
+            for template_category, custom_content in custom_templates.items():
+                if template_category in self.templates:
+                    if isinstance(self.templates[template_category], dict) and isinstance(custom_content, dict):
+                        self.templates[template_category].update(custom_content)
+                        self.logger.debug(f"Merged custom templates for category: {template_category}")
+                    else:
+                        self.templates[template_category] = custom_content
+                        self.logger.debug(f"Replaced templates for category: {template_category}")
+                else:
+                    self.templates[template_category] = custom_content
+                    self.logger.debug(f"Added new template category: {template_category}")
+            self.logger.info("Successfully merged custom templates")
+        except Exception as e:
+            self.logger.warning(f"Error merging custom templates: {str(e)}")
+    def _validate_templates(self):
+        """
+        驗證模板完整性和有效性
+        """
+        try:
+            required_categories = [
+                "scene_detail_templates",
+                "object_template_fillers",
+                "viewpoint_templates",
+                "cultural_templates",
+                "lighting_templates",
+                "confidence_templates"
+            ]
+            missing_categories = []
+            for category in required_categories:
+                if category not in self.templates:
+                    missing_categories.append(category)
+                elif not self.templates[category]:
+                    self.logger.warning(f"Template category '{category}' is empty")
+            if missing_categories:
+                error_msg = f"Missing required template categories: {missing_categories}"
+                self.logger.warning(error_msg)
+                # 為缺失的類別創建空模板
+                for category in missing_categories:
+                    self.templates[category] = {}
+            # 驗證視角模板結構
+            self._validate_viewpoint_templates()
+            # 驗證文化模板結構
+            self._validate_cultural_templates()
+            self.logger.debug("Template validation completed successfully")
+        except Exception as e:
+            error_msg = f"Template validation failed: {str(e)}"
+            self.logger.error(f"{error_msg}\n{traceback.format_exc()}")
+    def _validate_viewpoint_templates(self):
+        """驗證視角模板結構"""
+        viewpoint_templates = self.templates.get("viewpoint_templates", {})
+        for viewpoint, template_data in viewpoint_templates.items():
+            if not isinstance(template_data, dict):
+                self.logger.warning(f"Invalid viewpoint template structure for '{viewpoint}'")
+                continue
+            required_keys = ["prefix", "observation"]
+            for key in required_keys:
+                if key not in template_data:
+                    self.logger.warning(f"Missing '{key}' in viewpoint template '{viewpoint}'")
+    def _validate_cultural_templates(self):
+        """驗證文化模板結構"""
+        cultural_templates = self.templates.get("cultural_templates", {})
+        for culture, template_data in cultural_templates.items():
+            if not isinstance(template_data, dict):
+                self.logger.warning(f"Invalid cultural template structure for '{culture}'")
+                continue
+            if "elements" not in template_data or "description" not in template_data:
+                self.logger.warning(f"Missing required keys in cultural template '{culture}'")
+    def _initialize_fallback_templates(self) -> Dict:
+        """
+        初始化備用模板系統，當主要載入失敗時使用
+        Returns:
+            Dict: 最基本的模板字典
+        """
+        return {
+            "scene_detail_templates": {"default": ["A scene with various elements."]},
+            "object_template_fillers": {"default": ["various items"]},
+            "viewpoint_templates": {
+                "eye_level": {
+                    "prefix": "From eye level, ",
+                    "observation": "the scene is viewed straight ahead.",
+                    "short_desc": "at eye level"
+                }
+            },
+            "cultural_templates": {"default": {"elements": ["elements"], "description": "The scene displays cultural elements."}},
+            "lighting_templates": {"unknown": "The lighting conditions are not clearly identifiable."},
+            "confidence_templates": {"medium": "{description} {details}"}
+        }
+    def get_template(self, category: str, key: Optional[str] = None) -> Any:
+        """
+        獲取指定類別的模板
+        Args:
+            category: 模板類別名稱
+            key: 可選的具體模板鍵值
+        Returns:
+            Any: 請求的模板內容，如果不存在則返回空字典或空字符串
+        """
+        try:
+            if category not in self.templates:
+                self.logger.warning(f"Template category '{category}' not found")
+                return {} if key is None else ""
+            if key is None:
+                return self.templates[category]
+            category_templates = self.templates[category]
+            if not isinstance(category_templates, dict):
+                self.logger.warning(f"Template category '{category}' is not a dictionary")
+                return ""
+            if key not in category_templates:
+                self.logger.warning(f"Template key '{key}' not found in category '{category}'")
+                return ""
+            return category_templates[key]
+        except Exception as e:
+            error_msg = f"Error retrieving template {category}.{key}: {str(e)}"
+            self.logger.error(error_msg)
+            return {} if key is None else ""
+    def get_template_categories(self) -> List[str]:
+        """
+        獲取所有可用的模板類別名稱
+        Returns:
+            List[str]: 模板類別名稱列表
+        """
+        return list(self.templates.keys())
+    def template_exists(self, category: str, key: Optional[str] = None) -> bool:
+        """
+        檢查模板是否存在
+        Args:
+            category: 模板類別
+            key: 可選的模板鍵值
+        Returns:
+            bool: 模板是否存在
+        """
+        try:
+            if category not in self.templates:
+                return False
+            if key is None:
+                return True
+            category_templates = self.templates[category]
+            if isinstance(category_templates, dict):
+                return key in category_templates
+            return False
+        except Exception as e:
+            self.logger.warning(f"Error checking template existence for {category}.{key}: {str(e)}")
+            return False
+    def get_confidence_template(self, confidence_level: str) -> str:
+        """
+        獲取指定信心度級別的模板
+        Args:
+            confidence_level: 信心度級別 ('high', 'medium', 'low')
+        Returns:
+            str: 信心度模板字符串
+        """
+        try:
+            confidence_templates = self.templates.get("confidence_templates", {})
+            if confidence_level in confidence_templates:
+                return confidence_templates[confidence_level]
+            # 備用模板
+            fallback_templates = {
+                "high": "{description} {details}",
+                "medium": "This appears to be {description} {details}",
+                "low": "This might be {description}, but the confidence is low. {details}"
+            }
+            return fallback_templates.get(confidence_level, "{description} {details}")
+        except Exception as e:
+            self.logger.warning(f"Error getting confidence template for '{confidence_level}': {str(e)}")
+            return "{description} {details}"
+    def get_lighting_template(self, lighting_type: str) -> str:
+        """
+        獲取指定照明類型的模板
+        Args:
+            lighting_type: 照明類型
+        Returns:
+            str: 照明描述模板
+        """
+        try:
+            lighting_templates = self.templates.get("lighting_templates", {})
+            if lighting_type in lighting_templates:
+                return lighting_templates[lighting_type]
+            # 備用模板
+            return f"The scene is captured with {lighting_type.replace('_', ' ')} lighting conditions."
+        except Exception as e:
+            self.logger.warning(f"Error getting lighting template for '{lighting_type}': {str(e)}")
+            return "The lighting conditions are not clearly identifiable."
+    def get_viewpoint_template(self, viewpoint: str) -> Dict[str, str]:
+        """
+        獲取指定視角的模板
+        Args:
+            viewpoint: 視角類型
+        Returns:
+            Dict[str, str]: 包含prefix、observation等鍵的視角模板字典
+        """
+        try:
+            viewpoint_templates = self.templates.get("viewpoint_templates", {})
+            if viewpoint in viewpoint_templates:
+                return viewpoint_templates[viewpoint]
+            # 備用模板
+            fallback_templates = {
+                "eye_level": {
+                    "prefix": "From eye level, ",
+                    "observation": "the scene is viewed straight ahead.",
+                    "short_desc": "at eye level"
+                },
+                "aerial": {
+                    "prefix": "From above, ",
+                    "observation": "the scene is viewed from a bird's-eye perspective.",
+                    "short_desc": "from above"
+                },
+                "low_angle": {
+                    "prefix": "From a low angle, ",
+                    "observation": "the scene is viewed from below looking upward.",
+                    "short_desc": "from below"
+                },
+                "elevated": {
+                    "prefix": "From an elevated position, ",
+                    "observation": "the scene is viewed from a higher vantage point.",
+                    "short_desc": "from an elevated position"
+                }
+            }
+            return fallback_templates.get(viewpoint, fallback_templates["eye_level"])
+        except Exception as e:
+            self.logger.warning(f"Error getting viewpoint template for '{viewpoint}': {str(e)}")
+            return {
+                "prefix": "",
+                "observation": "the scene is viewed normally.",
+                "short_desc": "normally"
+            }
+    def get_cultural_template(self, cultural_context: str) -> Dict[str, Any]:
+        """
+        獲取指定文化語境的模板
+        Args:
+            cultural_context: 文化語境
+        Returns:
+            Dict[str, Any]: 文化模板字典
+        """
+        try:
+            cultural_templates = self.templates.get("cultural_templates", {})
+            if cultural_context in cultural_templates:
+                return cultural_templates[cultural_context]
+            # 備用模板
+            return {
+                "elements": ["cultural elements"],
+                "description": f"The scene displays {cultural_context} cultural characteristics."
+            }
+        except Exception as e:
+            self.logger.warning(f"Error getting cultural template for '{cultural_context}': {str(e)}")
+            return {
+                "elements": ["various elements"],
+                "description": "The scene displays cultural characteristics."
+            }
+    def get_scene_detail_templates(self, scene_type: str, viewpoint: Optional[str] = None) -> List[str]:
+        """
+        獲取場景詳細描述模板
+        Args:
+            scene_type: 場景類型
+            viewpoint: 可選的視角類型
+        Returns:
+            List[str]: 場景描述模板列表
+        """
+        try:
+            scene_templates = self.templates.get("scene_detail_templates", {})
+            # 首先嘗試獲取特定視角的模板
+            if viewpoint:
+                viewpoint_key = f"{scene_type}_{viewpoint}"
+                if viewpoint_key in scene_templates:
+                    return scene_templates[viewpoint_key]
+            # 然後嘗試獲取場景類型的通用模板
+            if scene_type in scene_templates:
+                return scene_templates[scene_type]
+            # 最後使用默認模板
+            if "default" in scene_templates:
+                return scene_templates["default"]
+            # 備用模板
+            return ["A scene with various elements and objects."]
+        except Exception as e:
+            self.logger.warning(f"Error getting scene detail templates for '{scene_type}': {str(e)}")
+            return ["A scene with various elements and objects."]

text_optimizer.py ADDED Viewed

	@@ -0,0 +1,616 @@

+import re
+import logging
+from typing import Dict, List, Optional, Any, Tuple
+class TextOptimizer:
+    """
+    文本優化器 - 專門處理文本格式化、清理和優化
+    負責物件列表格式化、重複移除、複數形式處理以及描述文本的優化
+    """
+    def __init__(self):
+        """初始化文本優化器"""
+        self.logger = logging.getLogger(self.__class__.__name__)
+    def format_object_list_for_description(self,
+                                          objects: List[Dict],
+                                          use_indefinite_article_for_one: bool = False,
+                                          count_threshold_for_generalization: int = -1,
+                                          max_types_to_list: int = 5) -> str:
+        """
+        將物件列表格式化為人類可讀的字符串，包含總計數字
+        Args:
+            objects: 物件字典列表，每個應包含 'class_name'
+            use_indefinite_article_for_one: 單個物件是否使用 "a/an"，否則使用 "one"
+            count_threshold_for_generalization: 超過此計數時使用通用術語，-1表示精確計數
+            max_types_to_list: 列表中包含的不同物件類型最大數量
+        Returns:
+            str: 格式化的物件描述字符串
+        """
+        try:
+            if not objects:
+                return "no specific objects clearly identified"
+            counts: Dict[str, int] = {}
+            for obj in objects:
+                name = obj.get("class_name", "unknown object")
+                if name == "unknown object" or not name:
+                    continue
+                counts[name] = counts.get(name, 0) + 1
+            if not counts:
+                return "no specific objects clearly identified"
+            descriptions = []
+            # 按計數降序然後按名稱升序排序，限制物件類型數量
+            sorted_counts = sorted(counts.items(), key=lambda item: (-item[1], item[0]))[:max_types_to_list]
+            for name, count in sorted_counts:
+                if count == 1:
+                    if use_indefinite_article_for_one:
+                        if name[0].lower() in 'aeiou':
+                            descriptions.append(f"an {name}")
+                        else:
+                            descriptions.append(f"a {name}")
+                    else:
+                        descriptions.append(f"one {name}")
+                else:
+                    # 處理複數形式
+                    plural_name = self._get_plural_form(name)
+                    if count_threshold_for_generalization != -1 and count > count_threshold_for_generalization:
+                        if count <= count_threshold_for_generalization + 3:
+                            descriptions.append(f"several {plural_name}")
+                        else:
+                            descriptions.append(f"many {plural_name}")
+                    else:
+                        descriptions.append(f"{count} {plural_name}")
+            if not descriptions:
+                return "no specific objects clearly identified"
+            if len(descriptions) == 1:
+                return descriptions[0]
+            elif len(descriptions) == 2:
+                return f"{descriptions[0]} and {descriptions[1]}"
+            else:
+                # 使用牛津逗號格式
+                return ", ".join(descriptions[:-1]) + f", and {descriptions[-1]}"
+        except Exception as e:
+            self.logger.warning(f"Error formatting object list: {str(e)}")
+            return "various objects"
+    def optimize_object_description(self, description: str) -> str:
+        """
+        優化物件描述文本，消除多餘重複並改善表達流暢度
+        這個函數是後處理階段的關鍵組件，負責清理和精簡自然語言生成系統
+        產出的描述文字。它專門處理常見的重複問題，如相同物件的重複
+        列舉和冗餘的空間描述，讓最終的描述更簡潔自然。
+        Args:
+            description: 原始的場景描述文本，可能包含重複或冗餘的表達
+        Returns:
+            str: 經過優化清理的描述文本，如果處理失敗則返回原始文本
+        """
+        try:
+            # 1. 處理多餘的空間限定表達
+            # 使用通用模式來識別和移除不必要的空間描述
+            description = self._remove_redundant_spatial_qualifiers(description)
+            # 2. 辨識並處理物件列表的重複問題
+            # 尋找形如 "with X, Y, Z" 或 "with X and Y" 的物件列表
+            object_lists = re.findall(r'with ([^.]+?)(?=\.|$)', description)
+            # 遍歷每個找到的物件列表進行重複檢測和優化
+            for obj_list in object_lists:
+                # 3. 解析單個物件列表中的項目
+                all_items = self._parse_object_list_items(obj_list)
+                # 4. 統計物件出現頻���
+                item_counts = self._count_object_items(all_items)
+                # 5. 生成優化後的物件列表
+                if item_counts:
+                    new_items = self._generate_optimized_item_list(item_counts)
+                    new_list = self._format_item_list(new_items)
+                    description = description.replace(obj_list, new_list)
+            return description
+        except Exception as e:
+            self.logger.warning(f"Error optimizing object description: {str(e)}")
+            return description
+    def remove_repetitive_descriptors(self, description: str) -> str:
+        """
+        移除描述中的重複性和不適當的描述詞彙，特別是 "identical" 等詞彙
+        Args:
+            description: 原始描述文本
+        Returns:
+            str: 清理後的描述文本
+        """
+        try:
+            # 定義需要移除或替換的模式
+            cleanup_patterns = [
+                # 移除 "identical" 描述模式
+                (r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
+                (r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
+                (r'\bidentical\s+([a-zA-Z\s]+)', r'\1'),
+                # 改善 "comprehensive arrangement" 等過於技術性的表達
+                (r'\bcomprehensive arrangement of\b', 'arrangement of'),
+                (r'\bcomprehensive view featuring\b', 'scene featuring'),
+                (r'\bcomprehensive display of\b', 'display of'),
+                # 簡化過度描述性的短語
+                (r'\bpositioning around\s+(\d+)\s+identical\b', r'positioning around \1'),
+                (r'\barranged around\s+(\d+)\s+identical\b', r'arranged around \1'),
+            ]
+            processed_description = description
+            for pattern, replacement in cleanup_patterns:
+                processed_description = re.sub(pattern, replacement, processed_description, flags=re.IGNORECASE)
+            # 進一步清理可能的多餘空格
+            processed_description = re.sub(r'\s+', ' ', processed_description).strip()
+            self.logger.debug(f"Cleaned description: removed repetitive descriptors")
+            return processed_description
+        except Exception as e:
+            self.logger.warning(f"Error removing repetitive descriptors: {str(e)}")
+            return description
+    def format_object_count_description(self, class_name: str, count: int,
+                                      scene_type: Optional[str] = None,
+                                      detected_objects: Optional[List[Dict]] = None,
+                                      avg_confidence: float = 0.0) -> str:
+        """
+        格式化物件數量描述的核心方法，整合空間排列、材質推斷和場景語境
+        Args:
+            class_name: 標準化後的類別名稱
+            count: 物件數量
+            scene_type: 場景類型，用於語境化描述
+            detected_objects: 該類型的所有檢測物件，用於空間分析
+            avg_confidence: 平均檢測置信度，影響材質推斷的可信度
+        Returns:
+            str: 完整的格式化數量描述
+        """
+        try:
+            if count <= 0:
+                return ""
+            # 獲取基礎的複數形式
+            plural_form = self._get_plural_form(class_name)
+            # 單數情況的處理
+            if count == 1:
+                return self._format_single_object_description(class_name, scene_type,
+                                                            detected_objects, avg_confidence)
+            # 複數情況的處理
+            return self._format_multiple_objects_description(class_name, count, plural_form,
+                                                           scene_type, detected_objects, avg_confidence)
+        except Exception as e:
+            self.logger.warning(f"Error formatting object count for '{class_name}': {str(e)}")
+            return f"{count} {class_name}s" if count > 1 else class_name
+    def normalize_object_class_name(self, class_name: str) -> str:
+        """
+        標準化物件類別名稱，確保輸出自然語言格式
+        Args:
+            class_name: 原始類別名稱
+        Returns:
+            str: 標準化後的類別名稱
+        """
+        try:
+            if not class_name or not isinstance(class_name, str):
+                return "object"
+            # 移除可能的技術性前綴或後綴
+            normalized = re.sub(r'^(class_|id_|type_)', '', class_name.lower())
+            normalized = re.sub(r'(_class|_id|_type)$', '', normalized)
+            # 將下劃線和連字符替換為空格
+            normalized = normalized.replace('_', ' ').replace('-', ' ')
+            # 移除多餘空格
+            normalized = ' '.join(normalized.split())
+            # 特殊類別名稱的標準化映射
+            class_name_mapping = {
+                'traffic light': 'traffic light',
+                'stop sign': 'stop sign',
+                'fire hydrant': 'fire hydrant',
+                'dining table': 'dining table',
+                'potted plant': 'potted plant',
+                'tv monitor': 'television',
+                'cell phone': 'mobile phone',
+                'wine glass': 'wine glass',
+                'hot dog': 'hot dog',
+                'teddy bear': 'teddy bear',
+                'hair drier': 'hair dryer',
+                'toothbrush': 'toothbrush'
+            }
+            return class_name_mapping.get(normalized, normalized)
+        except Exception as e:
+            self.logger.warning(f"Error normalizing class name '{class_name}': {str(e)}")
+            return class_name if isinstance(class_name, str) else "object"
+    def _remove_redundant_spatial_qualifiers(self, description: str) -> str:
+        """
+        移除描述中冗餘的空間限定詞
+        Args:
+            description: 包含可能多餘空間描述的文本
+        Returns:
+            str: 移除多餘空間限定詞後的文本
+        """
+        # 定義常見的多餘空間表達模式
+        redundant_patterns = [
+            # 室內物件的多餘房間描述
+            (r'\b(bed|sofa|couch|chair|table|desk|dresser|nightstand)\s+in\s+the\s+(room|bedroom|living\s+room)', r'\1'),
+            # 廚房物件的多餘描述
+            (r'\b(refrigerator|stove|oven|sink|microwave)\s+in\s+the\s+kitchen', r'\1'),
+            # 浴室物件的多餘描述
+            (r'\b(toilet|shower|bathtub|sink)\s+in\s+the\s+(bathroom|restroom)', r'\1'),
+            # 一般性的多餘表達：「在場景中」、「在圖片中」等
+            (r'\b([\w\s]+)\s+in\s+the\s+(scene|image|picture|frame)', r'\1'),
+        ]
+        for pattern, replacement in redundant_patterns:
+            description = re.sub(pattern, replacement, description, flags=re.IGNORECASE)
+        return description
+    def _parse_object_list_items(self, obj_list: str) -> List[str]:
+        """
+        解析物件列表中的項目
+        Args:
+            obj_list: 物件列表字符串
+        Returns:
+            List[str]: 解析後的項目列表
+        """
+        # 先處理逗號格式 "A, B, and C"
+        if ", and " in obj_list:
+            before_last_and = obj_list.rsplit(", and ", 1)[0]
+            last_item = obj_list.rsplit(", and ", 1)[1]
+            front_items = [item.strip() for item in before_last_and.split(",")]
+            all_items = front_items + [last_item.strip()]
+        elif " and " in obj_list:
+            all_items = [item.strip() for item in obj_list.split(" and ")]
+        else:
+            all_items = [item.strip() for item in obj_list.split(",")]
+        return all_items
+    def _count_object_items(self, all_items: List[str]) -> Dict[str, int]:
+        """
+        統計物件項目的出現次數
+        Args:
+            all_items: 所有項目列表
+        Returns:
+            Dict[str, int]: 項目計數字典
+        """
+        item_counts = {}
+        for item in all_items:
+            item = item.strip()
+            if item and item not in ["and", "with", ""]:
+                clean_item = self._normalize_item_for_counting(item)
+                if clean_item not in item_counts:
+                    item_counts[clean_item] = 0
+                item_counts[clean_item] += 1
+        return item_counts
+    def _generate_optimized_item_list(self, item_counts: Dict[str, int]) -> List[str]:
+        """
+        生成優化後的項目列表
+        Args:
+            item_counts: 項目計數字典
+        Returns:
+            List[str]: 優化後的項目列表
+        """
+        new_items = []
+        for item, count in item_counts.items():
+            if count > 1:
+                plural_item = self._make_plural(item)
+                new_items.append(f"{count} {plural_item}")
+            else:
+                new_items.append(item)
+        return new_items
+    def _format_item_list(self, new_items: List[str]) -> str:
+        """
+        格式化項目列表為字符串
+        Args:
+            new_items: 新項目列表
+        Returns:
+            str: 格式化後的字符串
+        """
+        if len(new_items) == 1:
+            return new_items[0]
+        elif len(new_items) == 2:
+            return f"{new_items[0]} and {new_items[1]}"
+        else:
+            return ", ".join(new_items[:-1]) + f", and {new_items[-1]}"
+    def _normalize_item_for_counting(self, item: str) -> str:
+        """
+        正規化物件項目以便準確計數
+        Args:
+            item: 原始物件項目字串
+        Returns:
+            str: 正規化後的物件項目
+        """
+        item = re.sub(r'^(a|an|the)\s+', '', item.lower())
+        return item.strip()
+    def _make_plural(self, item: str) -> str:
+        """
+        將單數名詞轉換為複數形式
+        Args:
+            item: 單數形式的名詞
+        Returns:
+            str: 複數形式的名詞
+        """
+        if item.endswith("y") and len(item) > 1 and item[-2].lower() not in 'aeiou':
+            return item[:-1] + "ies"
+        elif item.endswith(("s", "sh", "ch", "x", "z")):
+            return item + "es"
+        elif not item.endswith("s"):
+            return item + "s"
+        else:
+            return item
+    def _get_plural_form(self, word: str) -> str:
+        """
+        獲取詞彙的複數形式
+        Args:
+            word: 單數詞彙
+        Returns:
+            str: 複數形式
+        """
+        try:
+            # 特殊複數形式
+            irregular_plurals = {
+                'person': 'people',
+                'child': 'children',
+                'foot': 'feet',
+                'tooth': 'teeth',
+                'mouse': 'mice',
+                'man': 'men',
+                'woman': 'women'
+            }
+            if word.lower() in irregular_plurals:
+                return irregular_plurals[word.lower()]
+            # 規則複數形式
+            if word.endswith(('s', 'sh', 'ch', 'x', 'z')):
+                return word + 'es'
+            elif word.endswith('y') and word[-2] not in 'aeiou':
+                return word[:-1] + 'ies'
+            elif word.endswith('f'):
+                return word[:-1] + 'ves'
+            elif word.endswith('fe'):
+                return word[:-2] + 'ves'
+            else:
+                return word + 's'
+        except Exception as e:
+            self.logger.warning(f"Error getting plural form for '{word}': {str(e)}")
+            return word + 's'
+    def _format_single_object_description(self, class_name: str, scene_type: Optional[str],
+                                        detected_objects: Optional[List[Dict]],
+                                        avg_confidence: float) -> str:
+        """
+        處理單個物件的描述生成
+        Args:
+            class_name: 物件類別名稱
+            scene_type: 場景類型
+            detected_objects: 檢測物件列表
+            avg_confidence: 平均置信度
+        Returns:
+            str: 單個物件的完整描述
+        """
+        article = "an" if class_name[0].lower() in 'aeiou' else "a"
+        # 獲取材質描述符
+        material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
+        # 獲取位置或特徵描述符
+        feature_descriptor = self._get_single_object_feature(class_name, scene_type, detected_objects)
+        # 組合描述
+        descriptors = []
+        if material_descriptor:
+            descriptors.append(material_descriptor)
+        if feature_descriptor:
+            descriptors.append(feature_descriptor)
+        if descriptors:
+            return f"{article} {' '.join(descriptors)} {class_name}"
+        else:
+            return f"{article} {class_name}"
+    def _format_multiple_objects_description(self, class_name: str, count: int, plural_form: str,
+                                           scene_type: Optional[str], detected_objects: Optional[List[Dict]],
+                                           avg_confidence: float) -> str:
+        """
+        處理多個物件的描述生成
+        Args:
+            class_name: 物件類別名稱
+            count: 物件數量
+            plural_form: 複數形式
+            scene_type: 場景類型
+            detected_objects: 檢測物件列表
+            avg_confidence: 平均置信度
+        Returns:
+            str: 多個物件的完整描述
+        """
+        # 數字到文字的轉換映射
+        number_words = {
+            2: "two", 3: "three", 4: "four", 5: "five", 6: "six",
+            7: "seven", 8: "eight", 9: "nine", 10: "ten",
+            11: "eleven", 12: "twelve"
+        }
+        # 確定基礎數量表達
+        if count in number_words:
+            count_expression = number_words[count]
+        elif count <= 20:
+            count_expression = "several"
+        else:
+            count_expression = "numerous"
+        # 獲取材質或功能描述符
+        material_descriptor = self._get_material_descriptor(class_name, scene_type, avg_confidence)
+        # 構建基礎描述
+        descriptors = []
+        if material_descriptor:
+            descriptors.append(material_descriptor)
+        base_description = f"{count_expression} {' '.join(descriptors)} {plural_form}".strip()
+        return base_description
+    def _get_material_descriptor(self, class_name: str, scene_type: Optional[str],
+                               avg_confidence: float) -> Optional[str]:
+        """
+        基於場景語境和置信度進行材質推斷
+        Args:
+            class_name: 物件類別名稱
+            scene_type: 場景類型
+            avg_confidence: 檢測置信度
+        Returns:
+            Optional[str]: 材質描述符
+        """
+        # 只有在置信度足夠高時才進行材質推斷
+        if avg_confidence < 0.5:
+            return None
+        # 餐廳和用餐相關場景
+        if scene_type and scene_type in ["dining_area", "restaurant", "upscale_dining", "cafe"]:
+            material_mapping = {
+                "chair": "wooden" if avg_confidence > 0.7 else None,
+                "dining table": "wooden",
+                "couch": "upholstered",
+                "vase": "decorative"
+            }
+            return material_mapping.get(class_name)
+        # 辦公場景
+        elif scene_type and scene_type in ["office_workspace", "meeting_room", "conference_room"]:
+            material_mapping = {
+                "chair": "office",
+                "dining table": "conference",
+                "laptop": "modern",
+                "book": "reference"
+            }
+            return material_mapping.get(class_name)
+        # 客廳場景
+        elif scene_type and scene_type in ["living_room"]:
+            material_mapping = {
+                "couch": "comfortable",
+                "chair": "accent",
+                "tv": "large",
+                "vase": "decorative"
+            }
+            return material_mapping.get(class_name)
+        # 室外場景
+        elif scene_type and scene_type in ["city_street", "park_area", "parking_lot"]:
+            material_mapping = {
+                "car": "parked",
+                "person": "walking",
+                "bicycle": "stationed"
+            }
+            return material_mapping.get(class_name)
+        # 如果沒有特定的場景映射，返回通用描述符
+        generic_mapping = {
+            "chair": "comfortable",
+            "dining table": "sturdy",
+            "car": "parked",
+            "person": "present"
+        }
+        return generic_mapping.get(class_name)
+    def _get_single_object_feature(self, class_name: str, scene_type: Optional[str],
+                                 detected_objects: Optional[List[Dict]]) -> Optional[str]:
+        """
+        為單個物件生成特徵描述符
+        Args:
+            class_name: 物件類別名稱
+            scene_type: 場景類型
+            detected_objects: 檢測物件
+        Returns:
+            Optional[str]: 特徵描述符
+        """
+        if not detected_objects or len(detected_objects) != 1:
+            return None
+        obj = detected_objects[0]
+        region = obj.get("region", "").lower()
+        # 基於位置的描述
+        if "center" in region:
+            if class_name == "dining table":
+                return "central"
+            elif class_name == "chair":
+                return "centrally placed"
+        elif "corner" in region or "left" in region or "right" in region:
+            return "positioned"
+        # 基於場景的功能描述
+        if scene_type and scene_type in ["dining_area", "restaurant"]:
+            if class_name == "chair":
+                return "dining"
+            elif class_name == "vase":
+                return "decorative"
+        return None