Spaces:
Running
on
Zero
Running
on
Zero
Upload 5 files
Browse filesfixed format ("indoor, ") issues
- llm_enhancer.py +16 -5
- llm_model_manager.py +44 -25
- object_description_generator.py +41 -40
- response_processor.py +105 -31
llm_enhancer.py
CHANGED
|
@@ -126,6 +126,18 @@ class LLMEnhancer:
|
|
| 126 |
# 10. 移除解釋性注釋
|
| 127 |
cleaned_response = self.response_processor.remove_explanatory_notes(raw_cleaned)
|
| 128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
# 11. 事實準確性驗證
|
| 130 |
try:
|
| 131 |
cleaned_response = self.quality_validator.verify_factual_accuracy(
|
|
@@ -142,12 +154,9 @@ class LLMEnhancer:
|
|
| 142 |
cleaned_response, scene_type, original_desc
|
| 143 |
)
|
| 144 |
|
| 145 |
-
#
|
| 146 |
-
perspective = self.quality_validator.extract_perspective_from_description(original_desc)
|
| 147 |
-
if perspective and perspective.lower() not in cleaned_response.lower():
|
| 148 |
-
cleaned_response = f"{perspective}, {cleaned_response[0].lower()}{cleaned_response[1:]}"
|
| 149 |
|
| 150 |
-
# 13.
|
| 151 |
identical_final_cleanup = [
|
| 152 |
(r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
| 153 |
(r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
|
@@ -157,6 +166,7 @@ class LLMEnhancer:
|
|
| 157 |
|
| 158 |
for pattern, replacement in identical_final_cleanup:
|
| 159 |
cleaned_response = re.sub(pattern, replacement, cleaned_response, flags=re.IGNORECASE)
|
|
|
|
| 160 |
|
| 161 |
# 14. 最終驗證:如果結果過短,嘗試fallback
|
| 162 |
final_result = cleaned_response.strip()
|
|
@@ -183,6 +193,7 @@ class LLMEnhancer:
|
|
| 183 |
|
| 184 |
# 15. display enhanced description
|
| 185 |
self.logger.info(f"Scene description enhancement completed successfully ({len(final_result)} chars)")
|
|
|
|
| 186 |
return final_result
|
| 187 |
|
| 188 |
except Exception as e:
|
|
|
|
| 126 |
# 10. 移除解釋性注釋
|
| 127 |
cleaned_response = self.response_processor.remove_explanatory_notes(raw_cleaned)
|
| 128 |
|
| 129 |
+
# self.logger.info(f"DEBUG: Before factual verification: {cleaned_response[:50]}...")
|
| 130 |
+
|
| 131 |
+
# 10.5 事實準確性驗證
|
| 132 |
+
try:
|
| 133 |
+
cleaned_response = self.quality_validator.verify_factual_accuracy(
|
| 134 |
+
original_desc, cleaned_response, object_list
|
| 135 |
+
)
|
| 136 |
+
except Exception:
|
| 137 |
+
self.logger.warning("Fact verification failed; using response without verification")
|
| 138 |
+
|
| 139 |
+
# self.logger.info(f"DEBUG: After factual verification: {cleaned_response[:50]}...")
|
| 140 |
+
|
| 141 |
# 11. 事實準確性驗證
|
| 142 |
try:
|
| 143 |
cleaned_response = self.quality_validator.verify_factual_accuracy(
|
|
|
|
| 154 |
cleaned_response, scene_type, original_desc
|
| 155 |
)
|
| 156 |
|
| 157 |
+
# print(f"DEBUG: After scene type consistency: {cleaned_response[:50]}...")
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
+
# 13. 最終的 identical 詞彙清理(確保LLM輸出不包含重複性描述)
|
| 160 |
identical_final_cleanup = [
|
| 161 |
(r'\b(\d+)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
| 162 |
(r'\b(two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+identical\s+([a-zA-Z\s]+)', r'\1 \2'),
|
|
|
|
| 166 |
|
| 167 |
for pattern, replacement in identical_final_cleanup:
|
| 168 |
cleaned_response = re.sub(pattern, replacement, cleaned_response, flags=re.IGNORECASE)
|
| 169 |
+
# print(f"DEBUG: After identical cleanup: {cleaned_response[:50]}...")
|
| 170 |
|
| 171 |
# 14. 最終驗證:如果結果過短,嘗試fallback
|
| 172 |
final_result = cleaned_response.strip()
|
|
|
|
| 193 |
|
| 194 |
# 15. display enhanced description
|
| 195 |
self.logger.info(f"Scene description enhancement completed successfully ({len(final_result)} chars)")
|
| 196 |
+
# print(f"DEBUG: LLMEnhancer final_result before return: {final_result[:50]}..." if final_result else "DEBUG: LLMEnhancer final_result is empty")
|
| 197 |
return final_result
|
| 198 |
|
| 199 |
except Exception as e:
|
llm_model_manager.py
CHANGED
|
@@ -173,19 +173,6 @@ class LLMModelManager:
|
|
| 173 |
self.logger.debug("GPU cache cleared")
|
| 174 |
|
| 175 |
def generate_response(self, prompt: str, **generation_kwargs) -> str:
|
| 176 |
-
"""
|
| 177 |
-
生成LLM回應
|
| 178 |
-
|
| 179 |
-
Args:
|
| 180 |
-
prompt: 輸入提示詞
|
| 181 |
-
**generation_kwargs: 額外的生成參數,可覆蓋預設值
|
| 182 |
-
|
| 183 |
-
Returns:
|
| 184 |
-
str: 生成的回應文本
|
| 185 |
-
|
| 186 |
-
Raises:
|
| 187 |
-
ModelGenerationError: 當生成失敗時
|
| 188 |
-
"""
|
| 189 |
# 確保模型已載入
|
| 190 |
if not self._model_loaded:
|
| 191 |
self._load_model()
|
|
@@ -194,6 +181,10 @@ class LLMModelManager:
|
|
| 194 |
self.call_count += 1
|
| 195 |
self.logger.info(f"Generating response (call #{self.call_count})")
|
| 196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
# clean GPU
|
| 198 |
self._clear_gpu_cache()
|
| 199 |
|
|
@@ -216,14 +207,21 @@ class LLMModelManager:
|
|
| 216 |
"use_cache": True,
|
| 217 |
})
|
| 218 |
|
| 219 |
-
#
|
| 220 |
with torch.no_grad():
|
| 221 |
outputs = self.model.generate(inputs.input_ids, **generation_params)
|
| 222 |
|
| 223 |
# 解碼回應
|
| 224 |
full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
response = self._extract_generated_response(full_response, prompt)
|
| 226 |
|
|
|
|
|
|
|
|
|
|
| 227 |
if not response or len(response.strip()) < 10:
|
| 228 |
raise ModelGenerationError("Generated response is too short or empty")
|
| 229 |
|
|
@@ -281,13 +279,6 @@ class LLMModelManager:
|
|
| 281 |
def _extract_generated_response(self, full_response: str, prompt: str) -> str:
|
| 282 |
"""
|
| 283 |
從完整回應中提取生成的部分
|
| 284 |
-
|
| 285 |
-
Args:
|
| 286 |
-
full_response: 模型的完整輸出
|
| 287 |
-
prompt: 原始提示詞
|
| 288 |
-
|
| 289 |
-
Returns:
|
| 290 |
-
str: 提取的生成回應
|
| 291 |
"""
|
| 292 |
# 尋找assistant標記
|
| 293 |
assistant_tag = "<|assistant|>"
|
|
@@ -298,14 +289,42 @@ class LLMModelManager:
|
|
| 298 |
user_tag = "<|user|>"
|
| 299 |
if user_tag in response:
|
| 300 |
response = response.split(user_tag)[0].strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
return response
|
| 303 |
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
|
| 308 |
-
return
|
| 309 |
|
| 310 |
def reset_context(self):
|
| 311 |
"""重置模型上下文,清理GPU緩存"""
|
|
|
|
| 173 |
self.logger.debug("GPU cache cleared")
|
| 174 |
|
| 175 |
def generate_response(self, prompt: str, **generation_kwargs) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
# 確保模型已載入
|
| 177 |
if not self._model_loaded:
|
| 178 |
self._load_model()
|
|
|
|
| 181 |
self.call_count += 1
|
| 182 |
self.logger.info(f"Generating response (call #{self.call_count})")
|
| 183 |
|
| 184 |
+
# # record input prompt
|
| 185 |
+
# self.logger.info(f"DEBUG: Input prompt length: {len(prompt)}")
|
| 186 |
+
# self.logger.info(f"DEBUG: Input prompt preview: {prompt[:200]}...")
|
| 187 |
+
|
| 188 |
# clean GPU
|
| 189 |
self._clear_gpu_cache()
|
| 190 |
|
|
|
|
| 207 |
"use_cache": True,
|
| 208 |
})
|
| 209 |
|
| 210 |
+
# response
|
| 211 |
with torch.no_grad():
|
| 212 |
outputs = self.model.generate(inputs.input_ids, **generation_params)
|
| 213 |
|
| 214 |
# 解碼回應
|
| 215 |
full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 216 |
+
|
| 217 |
+
# # record whole response
|
| 218 |
+
# self.logger.info(f"DEBUG: Full LLM response: {full_response}")
|
| 219 |
+
|
| 220 |
response = self._extract_generated_response(full_response, prompt)
|
| 221 |
|
| 222 |
+
# # 記錄提取後的回應
|
| 223 |
+
# self.logger.info(f"DEBUG: Extracted response: {response}")
|
| 224 |
+
|
| 225 |
if not response or len(response.strip()) < 10:
|
| 226 |
raise ModelGenerationError("Generated response is too short or empty")
|
| 227 |
|
|
|
|
| 279 |
def _extract_generated_response(self, full_response: str, prompt: str) -> str:
|
| 280 |
"""
|
| 281 |
從完整回應中提取生成的部分
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
"""
|
| 283 |
# 尋找assistant標記
|
| 284 |
assistant_tag = "<|assistant|>"
|
|
|
|
| 289 |
user_tag = "<|user|>"
|
| 290 |
if user_tag in response:
|
| 291 |
response = response.split(user_tag)[0].strip()
|
| 292 |
+
else:
|
| 293 |
+
# 移除輸入提示詞
|
| 294 |
+
if full_response.startswith(prompt):
|
| 295 |
+
response = full_response[len(prompt):].strip()
|
| 296 |
+
else:
|
| 297 |
+
response = full_response.strip()
|
| 298 |
+
|
| 299 |
+
# 移除不自然的場景類型前綴
|
| 300 |
+
response = self._remove_scene_type_prefixes(response)
|
| 301 |
+
|
| 302 |
+
return response
|
| 303 |
+
|
| 304 |
+
def _remove_scene_type_prefixes(self, response: str) -> str:
|
| 305 |
+
"""
|
| 306 |
+
移除LLM生成回應中的場景類型前綴
|
| 307 |
|
| 308 |
+
Args:
|
| 309 |
+
response: 原始LLM回應
|
| 310 |
+
|
| 311 |
+
Returns:
|
| 312 |
+
str: 移除前綴後的回應
|
| 313 |
+
"""
|
| 314 |
+
if not response:
|
| 315 |
return response
|
| 316 |
|
| 317 |
+
prefix_patterns = [r'^[A-Za-z]+\,\s*']
|
| 318 |
+
|
| 319 |
+
# 應用清理模式
|
| 320 |
+
for pattern in prefix_patterns:
|
| 321 |
+
response = re.sub(pattern, '', response, flags=re.IGNORECASE)
|
| 322 |
+
|
| 323 |
+
# 確保首字母大寫
|
| 324 |
+
if response and response[0].islower():
|
| 325 |
+
response = response[0].upper() + response[1:]
|
| 326 |
|
| 327 |
+
return response.strip()
|
| 328 |
|
| 329 |
def reset_context(self):
|
| 330 |
"""重置模型上下文,清理GPU緩存"""
|
object_description_generator.py
CHANGED
|
@@ -389,7 +389,7 @@ class ObjectDescriptionGenerator:
|
|
| 389 |
def optimize_object_description(self, description: str) -> str:
|
| 390 |
"""
|
| 391 |
優化物件描述文本,消除冗餘重複並改善表達流暢度
|
| 392 |
-
|
| 393 |
這個函數是後處理階段的關鍵組件,負責清理和精簡自然語言生成系統
|
| 394 |
產出的描述文字。它專門處理常見的重複問題,如相同物件的重複
|
| 395 |
列舉和冗餘的空間描述,讓最終的描述更簡潔自然。
|
|
@@ -402,31 +402,31 @@ class ObjectDescriptionGenerator:
|
|
| 402 |
"""
|
| 403 |
try:
|
| 404 |
import re
|
| 405 |
-
|
| 406 |
-
# 1.
|
| 407 |
# 使用通用模式來識別和移除不必要的空間描述
|
| 408 |
# 例如:"bed in the room" -> "bed",因為床本身就表示是室內環境
|
| 409 |
description = self._remove_redundant_spatial_qualifiers(description)
|
| 410 |
|
| 411 |
-
# 2.
|
| 412 |
-
# 尋找形如 "with X, Y, Z" 或 "with X and Y"
|
| 413 |
# 使用正則表達式捕獲 "with" 關鍵字後的物件序列
|
| 414 |
# 注意:正則表達式需要修正以避免貪婪匹配的問題
|
| 415 |
object_lists = re.findall(r'with ([^.]+?)(?=\.|$)', description)
|
| 416 |
-
|
| 417 |
# 遍歷每個找到的物件列表進行重複檢測和優化
|
| 418 |
for obj_list in object_lists:
|
| 419 |
-
# 3. 解析單個物件列表中的項目
|
| 420 |
# 使用更精確的正則表達式來分割物件項目
|
| 421 |
# 處理 "X, Y, and Z" 或 "X and Y" 格式的列表
|
| 422 |
# 需要特別注意處理最後一個 "and" 的情況
|
| 423 |
-
|
| 424 |
# 先處理逗號格式 "A, B, and C"
|
| 425 |
if ", and " in obj_list:
|
| 426 |
# 分割 ", and " 前後的部分
|
| 427 |
before_last_and = obj_list.rsplit(", and ", 1)[0]
|
| 428 |
last_item = obj_list.rsplit(", and ", 1)[1]
|
| 429 |
-
|
| 430 |
# 處理前面的項目(用逗號分割)
|
| 431 |
front_items = [item.strip() for item in before_last_and.split(",")]
|
| 432 |
# 添加最後一個項目
|
|
@@ -437,11 +437,11 @@ class ObjectDescriptionGenerator:
|
|
| 437 |
else:
|
| 438 |
# 處理純逗號分隔的列表
|
| 439 |
all_items = [item.strip() for item in obj_list.split(",")]
|
| 440 |
-
|
| 441 |
-
# 4. 統計物件出現頻率
|
| 442 |
# 建立字典來記錄每個物件的出現次數
|
| 443 |
item_counts = {}
|
| 444 |
-
|
| 445 |
for item in all_items:
|
| 446 |
# 清理項目文字並過濾無效內容
|
| 447 |
item = item.strip()
|
|
@@ -453,11 +453,11 @@ class ObjectDescriptionGenerator:
|
|
| 453 |
if clean_item not in item_counts:
|
| 454 |
item_counts[clean_item] = 0
|
| 455 |
item_counts[clean_item] += 1
|
| 456 |
-
|
| 457 |
-
# 5. 生成優化後的物件列表
|
| 458 |
if item_counts:
|
| 459 |
new_items = []
|
| 460 |
-
|
| 461 |
for item, count in item_counts.items():
|
| 462 |
if count > 1:
|
| 463 |
# 對於重複項目,使用數字加複數形式
|
|
@@ -466,8 +466,8 @@ class ObjectDescriptionGenerator:
|
|
| 466 |
else:
|
| 467 |
# 單個項目保持原樣
|
| 468 |
new_items.append(item)
|
| 469 |
-
|
| 470 |
-
# 6. 重新格式化物件列表
|
| 471 |
# 使用標準的英文列表連接格式
|
| 472 |
if len(new_items) == 1:
|
| 473 |
new_list = new_items[0]
|
|
@@ -476,13 +476,13 @@ class ObjectDescriptionGenerator:
|
|
| 476 |
else:
|
| 477 |
# 使用逗號格式確保清晰度
|
| 478 |
new_list = ", ".join(new_items[:-1]) + f", and {new_items[-1]}"
|
| 479 |
-
|
| 480 |
# 7. 在原文中替換優化後的列表
|
| 481 |
-
#
|
| 482 |
description = description.replace(obj_list, new_list)
|
| 483 |
-
|
| 484 |
return description
|
| 485 |
-
|
| 486 |
except Exception as e:
|
| 487 |
self.logger.warning(f"Error optimizing object description: {str(e)}")
|
| 488 |
return description
|
|
@@ -490,19 +490,19 @@ class ObjectDescriptionGenerator:
|
|
| 490 |
def _remove_redundant_spatial_qualifiers(self, description: str) -> str:
|
| 491 |
"""
|
| 492 |
移除描述中冗餘的空間限定詞
|
| 493 |
-
|
| 494 |
這個方法使用模式匹配來識別和移除不必要的空間描述,例如
|
| 495 |
"bed in the room" 中的 "in the room" 部分通常是多餘的,因為
|
| 496 |
床這個物件本身就是室內環境。
|
| 497 |
-
|
| 498 |
Args:
|
| 499 |
description: 包含可能多餘空間描述的文本
|
| 500 |
-
|
| 501 |
Returns:
|
| 502 |
str: 移除多餘空間限定詞後的文本
|
| 503 |
"""
|
| 504 |
import re
|
| 505 |
-
|
| 506 |
# 定義常見的多餘空間表達模式
|
| 507 |
# 這些模式捕獲「物件 + 不必要的空間限定」的情況
|
| 508 |
redundant_patterns = [
|
|
@@ -515,23 +515,23 @@ class ObjectDescriptionGenerator:
|
|
| 515 |
# 一般性的多餘表達:「在場景中」、「在圖片中」等
|
| 516 |
(r'\b([\w\s]+)\s+in\s+the\s+(scene|image|picture|frame)', r'\1'),
|
| 517 |
]
|
| 518 |
-
|
| 519 |
for pattern, replacement in redundant_patterns:
|
| 520 |
description = re.sub(pattern, replacement, description, flags=re.IGNORECASE)
|
| 521 |
-
|
| 522 |
return description
|
| 523 |
|
| 524 |
|
| 525 |
def _normalize_item_for_counting(self, item: str) -> str:
|
| 526 |
"""
|
| 527 |
正規化物件項目以便準確計數
|
| 528 |
-
|
| 529 |
移除冠詞和其他可能影響計數準確性的前綴詞彙,
|
| 530 |
確保 "a car" 和 "car" 被視為同一物件類型。
|
| 531 |
-
|
| 532 |
Args:
|
| 533 |
item: 原始物件項目字串
|
| 534 |
-
|
| 535 |
Returns:
|
| 536 |
str: 正規化後的物件項目
|
| 537 |
"""
|
|
@@ -542,10 +542,10 @@ class ObjectDescriptionGenerator:
|
|
| 542 |
def _make_plural(self, item: str) -> str:
|
| 543 |
"""
|
| 544 |
將單數名詞轉換為複數形式
|
| 545 |
-
|
| 546 |
Args:
|
| 547 |
item: 單數形式的名詞
|
| 548 |
-
|
| 549 |
Returns:
|
| 550 |
str: 複數形式的名詞
|
| 551 |
"""
|
|
@@ -589,22 +589,23 @@ class ObjectDescriptionGenerator:
|
|
| 589 |
self.logger.debug(f"Generating dynamic description for {len(detected_objects)} objects, "
|
| 590 |
f"viewpoint: {viewpoint}, lighting: {lighting_info is not None}")
|
| 591 |
|
| 592 |
-
# 1.
|
| 593 |
ambiance_parts = []
|
| 594 |
if lighting_info:
|
| 595 |
time_of_day = lighting_info.get("time_of_day", "unknown lighting")
|
| 596 |
is_indoor = lighting_info.get("is_indoor")
|
| 597 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 598 |
if is_indoor is True:
|
| 599 |
-
ambiance_statement
|
| 600 |
elif is_indoor is False:
|
| 601 |
-
ambiance_statement
|
| 602 |
else:
|
| 603 |
-
ambiance_statement
|
| 604 |
|
| 605 |
-
# remove underline
|
| 606 |
-
readable_lighting = f"with {time_of_day.replace('_', ' ')} lighting conditions"
|
| 607 |
-
ambiance_statement += f", likely {readable_lighting}."
|
| 608 |
ambiance_parts.append(ambiance_statement)
|
| 609 |
|
| 610 |
if viewpoint and viewpoint != "eye_level":
|
|
|
|
| 389 |
def optimize_object_description(self, description: str) -> str:
|
| 390 |
"""
|
| 391 |
優化物件描述文本,消除冗餘重複並改善表達流暢度
|
| 392 |
+
|
| 393 |
這個函數是後處理階段的關鍵組件,負責清理和精簡自然語言生成系統
|
| 394 |
產出的描述文字。它專門處理常見的重複問題,如相同物件的重複
|
| 395 |
列舉和冗餘的空間描述,讓最終的描述更簡潔自然。
|
|
|
|
| 402 |
"""
|
| 403 |
try:
|
| 404 |
import re
|
| 405 |
+
|
| 406 |
+
# 1. 處理多餘的空間限定表達
|
| 407 |
# 使用通用模式來識別和移除不必要的空間描述
|
| 408 |
# 例如:"bed in the room" -> "bed",因為床本身就表示是室內環境
|
| 409 |
description = self._remove_redundant_spatial_qualifiers(description)
|
| 410 |
|
| 411 |
+
# 2. 辨識並處理物件列表的重複問題
|
| 412 |
+
# 尋找形如 "with X, Y, Z" 或 "with X and Y" 的物件列表
|
| 413 |
# 使用正則表達式捕獲 "with" 關鍵字後的物件序列
|
| 414 |
# 注意:正則表達式需要修正以避免貪婪匹配的問題
|
| 415 |
object_lists = re.findall(r'with ([^.]+?)(?=\.|$)', description)
|
| 416 |
+
|
| 417 |
# 遍歷每個找到的物件列表進行重複檢測和優化
|
| 418 |
for obj_list in object_lists:
|
| 419 |
+
# 3. 解析單個物件列表中的項目
|
| 420 |
# 使用更精確的正則表達式來分割物件項目
|
| 421 |
# 處理 "X, Y, and Z" 或 "X and Y" 格式的列表
|
| 422 |
# 需要特別注意處理最後一個 "and" 的情況
|
| 423 |
+
|
| 424 |
# 先處理逗號格式 "A, B, and C"
|
| 425 |
if ", and " in obj_list:
|
| 426 |
# 分割 ", and " 前後的部分
|
| 427 |
before_last_and = obj_list.rsplit(", and ", 1)[0]
|
| 428 |
last_item = obj_list.rsplit(", and ", 1)[1]
|
| 429 |
+
|
| 430 |
# 處理前面的項目(用逗號分割)
|
| 431 |
front_items = [item.strip() for item in before_last_and.split(",")]
|
| 432 |
# 添加最後一個項目
|
|
|
|
| 437 |
else:
|
| 438 |
# 處理純逗號分隔的列表
|
| 439 |
all_items = [item.strip() for item in obj_list.split(",")]
|
| 440 |
+
|
| 441 |
+
# 4. 統計物件出現頻率
|
| 442 |
# 建立字典來記錄每個物件的出現次數
|
| 443 |
item_counts = {}
|
| 444 |
+
|
| 445 |
for item in all_items:
|
| 446 |
# 清理項目文字並過濾無效內容
|
| 447 |
item = item.strip()
|
|
|
|
| 453 |
if clean_item not in item_counts:
|
| 454 |
item_counts[clean_item] = 0
|
| 455 |
item_counts[clean_item] += 1
|
| 456 |
+
|
| 457 |
+
# 5. 生成優化後的物件列表
|
| 458 |
if item_counts:
|
| 459 |
new_items = []
|
| 460 |
+
|
| 461 |
for item, count in item_counts.items():
|
| 462 |
if count > 1:
|
| 463 |
# 對於重複項目,使用數字加複數形式
|
|
|
|
| 466 |
else:
|
| 467 |
# 單個項目保持原樣
|
| 468 |
new_items.append(item)
|
| 469 |
+
|
| 470 |
+
# 6. 重新格式化物件列表
|
| 471 |
# 使用標準的英文列表連接格式
|
| 472 |
if len(new_items) == 1:
|
| 473 |
new_list = new_items[0]
|
|
|
|
| 476 |
else:
|
| 477 |
# 使用逗號格式確保清晰度
|
| 478 |
new_list = ", ".join(new_items[:-1]) + f", and {new_items[-1]}"
|
| 479 |
+
|
| 480 |
# 7. 在原文中替換優化後的列表
|
| 481 |
+
# 將原始的多餘列表替換為優化後的簡潔版本
|
| 482 |
description = description.replace(obj_list, new_list)
|
| 483 |
+
|
| 484 |
return description
|
| 485 |
+
|
| 486 |
except Exception as e:
|
| 487 |
self.logger.warning(f"Error optimizing object description: {str(e)}")
|
| 488 |
return description
|
|
|
|
| 490 |
def _remove_redundant_spatial_qualifiers(self, description: str) -> str:
|
| 491 |
"""
|
| 492 |
移除描述中冗餘的空間限定詞
|
| 493 |
+
|
| 494 |
這個方法使用模式匹配來識別和移除不必要的空間描述,例如
|
| 495 |
"bed in the room" 中的 "in the room" 部分通常是多餘的,因為
|
| 496 |
床這個物件本身就是室內環境。
|
| 497 |
+
|
| 498 |
Args:
|
| 499 |
description: 包含可能多餘空間描述的文本
|
| 500 |
+
|
| 501 |
Returns:
|
| 502 |
str: 移除多餘空間限定詞後的文本
|
| 503 |
"""
|
| 504 |
import re
|
| 505 |
+
|
| 506 |
# 定義常見的多餘空間表達模式
|
| 507 |
# 這些模式捕獲「物件 + 不必要的空間限定」的情況
|
| 508 |
redundant_patterns = [
|
|
|
|
| 515 |
# 一般性的多餘表達:「在場景中」、「在圖片中」等
|
| 516 |
(r'\b([\w\s]+)\s+in\s+the\s+(scene|image|picture|frame)', r'\1'),
|
| 517 |
]
|
| 518 |
+
|
| 519 |
for pattern, replacement in redundant_patterns:
|
| 520 |
description = re.sub(pattern, replacement, description, flags=re.IGNORECASE)
|
| 521 |
+
|
| 522 |
return description
|
| 523 |
|
| 524 |
|
| 525 |
def _normalize_item_for_counting(self, item: str) -> str:
|
| 526 |
"""
|
| 527 |
正規化物件項目以便準確計數
|
| 528 |
+
|
| 529 |
移除冠詞和其他可能影響計數準確性的前綴詞彙,
|
| 530 |
確保 "a car" 和 "car" 被視為同一物件類型。
|
| 531 |
+
|
| 532 |
Args:
|
| 533 |
item: 原始物件項目字串
|
| 534 |
+
|
| 535 |
Returns:
|
| 536 |
str: 正規化後的物件項目
|
| 537 |
"""
|
|
|
|
| 542 |
def _make_plural(self, item: str) -> str:
|
| 543 |
"""
|
| 544 |
將單數名詞轉換為複數形式
|
| 545 |
+
|
| 546 |
Args:
|
| 547 |
item: 單數形式的名詞
|
| 548 |
+
|
| 549 |
Returns:
|
| 550 |
str: 複數形式的名詞
|
| 551 |
"""
|
|
|
|
| 589 |
self.logger.debug(f"Generating dynamic description for {len(detected_objects)} objects, "
|
| 590 |
f"viewpoint: {viewpoint}, lighting: {lighting_info is not None}")
|
| 591 |
|
| 592 |
+
# 1. 整體氛圍(照明和視角)- 移除室內外標籤
|
| 593 |
ambiance_parts = []
|
| 594 |
if lighting_info:
|
| 595 |
time_of_day = lighting_info.get("time_of_day", "unknown lighting")
|
| 596 |
is_indoor = lighting_info.get("is_indoor")
|
| 597 |
+
|
| 598 |
+
# 直接描述照明條件,不加入室內外標籤
|
| 599 |
+
readable_lighting = f"{time_of_day.replace('_', ' ')} lighting conditions"
|
| 600 |
+
|
| 601 |
+
# 根據室內外環境調整描述但不直接標明
|
| 602 |
if is_indoor is True:
|
| 603 |
+
ambiance_statement = f"The scene features {readable_lighting} characteristic of an interior space."
|
| 604 |
elif is_indoor is False:
|
| 605 |
+
ambiance_statement = f"The scene displays {readable_lighting} typical of an outdoor environment."
|
| 606 |
else:
|
| 607 |
+
ambiance_statement = f"The scene presents {readable_lighting}."
|
| 608 |
|
|
|
|
|
|
|
|
|
|
| 609 |
ambiance_parts.append(ambiance_statement)
|
| 610 |
|
| 611 |
if viewpoint and viewpoint != "eye_level":
|
response_processor.py
CHANGED
|
@@ -60,7 +60,11 @@ class ResponseProcessor:
|
|
| 60 |
"Here is a rewritten scene description that adheres to the provided critical rules:",
|
| 61 |
"Here is the rewritten scene description:",
|
| 62 |
"Here's a rewritten scene description:",
|
| 63 |
-
"The rewritten scene description is as follows:"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
]
|
| 65 |
|
| 66 |
# 設置需要移除的後綴短語
|
|
@@ -187,23 +191,13 @@ class ResponseProcessor:
|
|
| 187 |
raise ResponseProcessingError(error_msg) from e
|
| 188 |
|
| 189 |
def clean_response(self, response: str, model_type: str = "general") -> str:
|
| 190 |
-
"""
|
| 191 |
-
清理LLM回應
|
| 192 |
-
|
| 193 |
-
Args:
|
| 194 |
-
response: 原始LLM回應
|
| 195 |
-
model_type: 模型類型(用於特定清理規則)
|
| 196 |
-
|
| 197 |
-
Returns:
|
| 198 |
-
str: 清理後的回應
|
| 199 |
-
|
| 200 |
-
Raises:
|
| 201 |
-
ResponseProcessingError: 當回應處理失敗時
|
| 202 |
-
"""
|
| 203 |
if not response:
|
| 204 |
raise ResponseProcessingError("Empty response provided for cleaning")
|
| 205 |
|
| 206 |
try:
|
|
|
|
|
|
|
|
|
|
| 207 |
self.logger.debug(f"Starting response cleaning (original length: {len(response)})")
|
| 208 |
|
| 209 |
# 保存原始回應作為備份
|
|
@@ -215,6 +209,9 @@ class ResponseProcessor:
|
|
| 215 |
else:
|
| 216 |
cleaned_response = self._clean_general_response(response)
|
| 217 |
|
|
|
|
|
|
|
|
|
|
| 218 |
# 如果清理後內容過短,嘗試從原始回應中恢復
|
| 219 |
if len(cleaned_response.strip()) < 40:
|
| 220 |
self.logger.warning("Cleaned response too short, attempting recovery")
|
|
@@ -447,23 +444,52 @@ class ResponseProcessor:
|
|
| 447 |
return response
|
| 448 |
|
| 449 |
def _remove_introduction_prefixes(self, response: str) -> str:
|
| 450 |
-
"""
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 456 |
]
|
| 457 |
|
| 458 |
-
|
| 459 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 460 |
|
| 461 |
-
|
| 462 |
-
for prefix in self.prefixes_to_remove:
|
| 463 |
-
if response.lower().startswith(prefix.lower()):
|
| 464 |
-
response = response[len(prefix):].strip()
|
| 465 |
|
| 466 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
|
| 468 |
def _remove_format_markers(self, response: str) -> str:
|
| 469 |
"""移除格式標記和上下文標籤(保留括號內的地理與細節資訊)"""
|
|
@@ -668,7 +694,7 @@ class ResponseProcessor:
|
|
| 668 |
# 數字到文字
|
| 669 |
number_conversions = {
|
| 670 |
'2': 'two', '3': 'three', '4': 'four', '5': 'five', '6': 'six',
|
| 671 |
-
'7': 'seven', '8': 'eight', '9': 'nine', '10': 'ten',
|
| 672 |
'11': 'eleven', '12': 'twelve'
|
| 673 |
}
|
| 674 |
|
|
@@ -677,15 +703,15 @@ class ResponseProcessor:
|
|
| 677 |
# 模式1: 數字 + 單一複數詞 (如 "7 chairs")
|
| 678 |
pattern1 = rf'\b{digit}\s+([a-zA-Z]+s)\b'
|
| 679 |
processed_response = re.sub(pattern1, rf'{word} \1', processed_response)
|
| 680 |
-
|
| 681 |
# 模式2: 數字 + 修飾詞 + 複數詞 (如 "7 more chairs")
|
| 682 |
pattern2 = rf'\b{digit}\s+(more|additional|other|identical)\s+([a-zA-Z]+s)\b'
|
| 683 |
processed_response = re.sub(pattern2, rf'{word} \1 \2', processed_response, flags=re.IGNORECASE)
|
| 684 |
-
|
| 685 |
# 模式3: 數字 + 形容詞 + 複數詞 (如 "2 dining tables")
|
| 686 |
pattern3 = rf'\b{digit}\s+([a-zA-Z]+)\s+([a-zA-Z]+s)\b'
|
| 687 |
processed_response = re.sub(pattern3, rf'{word} \1 \2', processed_response)
|
| 688 |
-
|
| 689 |
# 模式4: 介詞片語中的數字 (如 "around 2 tables")
|
| 690 |
pattern4 = rf'\b(around|approximately|about)\s+{digit}\s+([a-zA-Z]+s)\b'
|
| 691 |
processed_response = re.sub(pattern4, rf'\1 {word} \2', processed_response, flags=re.IGNORECASE)
|
|
@@ -978,6 +1004,25 @@ class ResponseProcessor:
|
|
| 978 |
|
| 979 |
def _final_formatting(self, response: str) -> str:
|
| 980 |
"""最終格式化處理"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 981 |
# 確保首字母大寫
|
| 982 |
if response and response[0].islower():
|
| 983 |
response = response[0].upper() + response[1:]
|
|
@@ -988,6 +1033,35 @@ class ResponseProcessor:
|
|
| 988 |
|
| 989 |
return response.strip()
|
| 990 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 991 |
def _recover_from_overcleaning(self, original_response: str) -> str:
|
| 992 |
"""從過度清理中恢復內容"""
|
| 993 |
try:
|
|
|
|
| 60 |
"Here is a rewritten scene description that adheres to the provided critical rules:",
|
| 61 |
"Here is the rewritten scene description:",
|
| 62 |
"Here's a rewritten scene description:",
|
| 63 |
+
"The rewritten scene description is as follows:",
|
| 64 |
+
"indoor,",
|
| 65 |
+
"outdoor,",
|
| 66 |
+
"indoor ",
|
| 67 |
+
"outdoor "
|
| 68 |
]
|
| 69 |
|
| 70 |
# 設置需要移除的後綴短語
|
|
|
|
| 191 |
raise ResponseProcessingError(error_msg) from e
|
| 192 |
|
| 193 |
def clean_response(self, response: str, model_type: str = "general") -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
if not response:
|
| 195 |
raise ResponseProcessingError("Empty response provided for cleaning")
|
| 196 |
|
| 197 |
try:
|
| 198 |
+
# 調試:記錄清理前的原始回應
|
| 199 |
+
self.logger.info(f"DEBUG: Response before cleaning: {response}")
|
| 200 |
+
|
| 201 |
self.logger.debug(f"Starting response cleaning (original length: {len(response)})")
|
| 202 |
|
| 203 |
# 保存原始回應作為備份
|
|
|
|
| 209 |
else:
|
| 210 |
cleaned_response = self._clean_general_response(response)
|
| 211 |
|
| 212 |
+
# 調試:記錄清理後的回應
|
| 213 |
+
self.logger.info(f"DEBUG: Response after cleaning: {cleaned_response}")
|
| 214 |
+
|
| 215 |
# 如果清理後內容過短,嘗試從原始回應中恢復
|
| 216 |
if len(cleaned_response.strip()) < 40:
|
| 217 |
self.logger.warning("Cleaned response too short, attempting recovery")
|
|
|
|
| 444 |
return response
|
| 445 |
|
| 446 |
def _remove_introduction_prefixes(self, response: str) -> str:
|
| 447 |
+
"""
|
| 448 |
+
移除介紹性前綴,強化對多種模式的處理。
|
| 449 |
+
"""
|
| 450 |
+
if not response:
|
| 451 |
+
return ""
|
| 452 |
+
|
| 453 |
+
cleaned_response = response.strip()
|
| 454 |
+
|
| 455 |
+
# 1. 將所有要移除的前綴模式合併成一個大的正則表達式
|
| 456 |
+
# - r'^(?: ... )' 表示從字串開頭匹配非捕獲分組
|
| 457 |
+
# - '|' 用於分隔不同的模式
|
| 458 |
+
# - re.escape() 用於安全地處理 self.prefixes_to_remove 中的特殊字符
|
| 459 |
+
# - `\\s*,?` 處理可選的逗號和空格
|
| 460 |
+
# - `\\s*` 處理結尾的任意空格
|
| 461 |
+
all_prefix_patterns = [
|
| 462 |
+
r'Here\s+is\s+(?:a\s+|the\s+)?(?:rewritten\s+|enhanced\s+)?scene\s+description.*?:',
|
| 463 |
+
r'The\s+(?:rewritten\s+|enhanced\s+)?(?:scene\s+)?description\s+is.*?:',
|
| 464 |
+
r'Here\'s\s+(?:a\s+|the\s+)?(?:rewritten\s+|enhanced\s+)?description.*?:',
|
| 465 |
+
|
| 466 |
+
# 這個模式會匹配這些詞,無論後面是逗號還是空格
|
| 467 |
+
r'(?:indoor|outdoor|inside|outside)\s*,?'
|
| 468 |
]
|
| 469 |
|
| 470 |
+
# 將 self.prefixes_to_remove 中的字符串也轉換為正則表達式模式
|
| 471 |
+
# 確保 self.prefixes_to_remove 存在,否則提供一個空列表
|
| 472 |
+
prefixes_to_add = getattr(self, 'prefixes_to_remove', [])
|
| 473 |
+
for prefix in prefixes_to_add:
|
| 474 |
+
# 使用 re.escape 來確保前綴中的任何特殊字符被正確處理
|
| 475 |
+
all_prefix_patterns.append(re.escape(prefix))
|
| 476 |
|
| 477 |
+
cleaned_response = re.sub(r'^(?:indoor|outdoor|inside|outside)\s*,?\s*', '', cleaned_response, flags=re.IGNORECASE).strip()
|
|
|
|
|
|
|
|
|
|
| 478 |
|
| 479 |
+
# 將所有模式用 '|' 連接起來,形成一個大的組合模式
|
| 480 |
+
# 我們在模式的結尾加上 \\s* 來匹配並移除前綴後可能跟隨的空格
|
| 481 |
+
combined_pattern = r'^(?:' + '|'.join(all_prefix_patterns) + r')\s*'
|
| 482 |
+
|
| 483 |
+
# 2. 執行一次性的替換,並忽略大小寫
|
| 484 |
+
# 這一行程式碼會移除所有匹配到的前綴
|
| 485 |
+
cleaned_response = re.sub(combined_pattern, '', cleaned_response, flags=re.IGNORECASE).strip()
|
| 486 |
+
|
| 487 |
+
# 3. 確保首字母大寫
|
| 488 |
+
# 移除前綴後,新的句首可能變成小寫, 這邊得修正
|
| 489 |
+
if cleaned_response:
|
| 490 |
+
cleaned_response = cleaned_response[0].upper() + cleaned_response[1:]
|
| 491 |
+
|
| 492 |
+
return cleaned_response
|
| 493 |
|
| 494 |
def _remove_format_markers(self, response: str) -> str:
|
| 495 |
"""移除格式標記和上下文標籤(保留括號內的地理與細節資訊)"""
|
|
|
|
| 694 |
# 數字到文字
|
| 695 |
number_conversions = {
|
| 696 |
'2': 'two', '3': 'three', '4': 'four', '5': 'five', '6': 'six',
|
| 697 |
+
'7': 'seven', '8': 'eight', '9': 'nine', '10': 'ten',
|
| 698 |
'11': 'eleven', '12': 'twelve'
|
| 699 |
}
|
| 700 |
|
|
|
|
| 703 |
# 模式1: 數字 + 單一複數詞 (如 "7 chairs")
|
| 704 |
pattern1 = rf'\b{digit}\s+([a-zA-Z]+s)\b'
|
| 705 |
processed_response = re.sub(pattern1, rf'{word} \1', processed_response)
|
| 706 |
+
|
| 707 |
# 模式2: 數字 + 修飾詞 + 複數詞 (如 "7 more chairs")
|
| 708 |
pattern2 = rf'\b{digit}\s+(more|additional|other|identical)\s+([a-zA-Z]+s)\b'
|
| 709 |
processed_response = re.sub(pattern2, rf'{word} \1 \2', processed_response, flags=re.IGNORECASE)
|
| 710 |
+
|
| 711 |
# 模式3: 數字 + 形容詞 + 複數詞 (如 "2 dining tables")
|
| 712 |
pattern3 = rf'\b{digit}\s+([a-zA-Z]+)\s+([a-zA-Z]+s)\b'
|
| 713 |
processed_response = re.sub(pattern3, rf'{word} \1 \2', processed_response)
|
| 714 |
+
|
| 715 |
# 模式4: 介詞片語中的數字 (如 "around 2 tables")
|
| 716 |
pattern4 = rf'\b(around|approximately|about)\s+{digit}\s+([a-zA-Z]+s)\b'
|
| 717 |
processed_response = re.sub(pattern4, rf'\1 {word} \2', processed_response, flags=re.IGNORECASE)
|
|
|
|
| 1004 |
|
| 1005 |
def _final_formatting(self, response: str) -> str:
|
| 1006 |
"""最終格式化處理"""
|
| 1007 |
+
# 專門處理 "indoor," 前綴問題
|
| 1008 |
+
indoor_patterns = [
|
| 1009 |
+
r'^indoor\s*,\s*',
|
| 1010 |
+
r'^outdoor\s*,\s*',
|
| 1011 |
+
r'^inside\s*,\s*',
|
| 1012 |
+
r'^outside\s*,\s*',
|
| 1013 |
+
r'^indoor\s+',
|
| 1014 |
+
r'^outdoor\s+',
|
| 1015 |
+
]
|
| 1016 |
+
|
| 1017 |
+
for pattern in indoor_patterns:
|
| 1018 |
+
response = re.sub(pattern, '', response, flags=re.IGNORECASE)
|
| 1019 |
+
|
| 1020 |
+
# 移除開頭的空白和標點符號
|
| 1021 |
+
response = re.sub(r'^[\s,;:.-]+', '', response)
|
| 1022 |
+
|
| 1023 |
+
# 修復常見的語法問題
|
| 1024 |
+
response = self._fix_grammatical_issues(response)
|
| 1025 |
+
|
| 1026 |
# 確保首字母大寫
|
| 1027 |
if response and response[0].islower():
|
| 1028 |
response = response[0].upper() + response[1:]
|
|
|
|
| 1033 |
|
| 1034 |
return response.strip()
|
| 1035 |
|
| 1036 |
+
def _fix_grammatical_issues(self, response: str) -> str:
|
| 1037 |
+
"""修復常見的語法問題"""
|
| 1038 |
+
if not response:
|
| 1039 |
+
return response
|
| 1040 |
+
|
| 1041 |
+
# 修復不完整的句子開頭
|
| 1042 |
+
grammar_fixes = [
|
| 1043 |
+
# 修復 "A dining table with... A dining table..." 重複問題
|
| 1044 |
+
(r'\b(A|An)\s+([^.!?]*?)\s+\1\s+\2', r'\1 \2'),
|
| 1045 |
+
|
| 1046 |
+
# 修復 "This scene presents a scene" 重複
|
| 1047 |
+
(r'\bThis scene presents a scene\b', 'This scene presents'),
|
| 1048 |
+
|
| 1049 |
+
# 修復不完整的句子 "A dining table with four chairs and a dining table"
|
| 1050 |
+
(r'\b([A-Z][^.!?]*?)\s+and\s+a\s+\1\b', r'\1'),
|
| 1051 |
+
|
| 1052 |
+
# 修復空的介詞短語
|
| 1053 |
+
(r'\bwith\s+with\b', 'with'),
|
| 1054 |
+
(r'\band\s+and\b', 'and'),
|
| 1055 |
+
|
| 1056 |
+
# 確保句子完整性
|
| 1057 |
+
(r'(\w+)\s*\.\s*(\w+)', r'\1. \2'),
|
| 1058 |
+
]
|
| 1059 |
+
|
| 1060 |
+
for pattern, replacement in grammar_fixes:
|
| 1061 |
+
response = re.sub(pattern, replacement, response, flags=re.IGNORECASE)
|
| 1062 |
+
|
| 1063 |
+
return response
|
| 1064 |
+
|
| 1065 |
def _recover_from_overcleaning(self, original_response: str) -> str:
|
| 1066 |
"""從過度清理中恢復內容"""
|
| 1067 |
try:
|