Song commited on
Commit
4368e1d
·
1 Parent(s): 4cc218a
Files changed (1) hide show
  1. app.py +31 -20
app.py CHANGED
@@ -263,35 +263,46 @@ class RagPipeline:
263
  except Exception:
264
  pass
265
 
266
- @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
267
- def _llm_call(self, messages, **kwargs) -> str:
268
- start_time = time.time()
269
- log.info(f"LLM 呼叫開始. 模型: {LLM_API_CONFIG['model']}, max_tokens: {kwargs.get('max_tokens', 'N/A')}, temperature: {kwargs.get('temperature', 'N/A')}")
 
 
 
 
 
 
270
 
 
 
 
 
271
  try:
272
- config = {**LLM_MODEL_CONFIG, **kwargs}
273
- response = self.llm_client.chat.completions.create(
274
- model=LLM_API_CONFIG["model"],
275
  messages=messages,
276
- temperature=config["temperature"],
277
- max_tokens=config["max_tokens"],
278
  )
279
 
280
- # [MODIFIED] 檢查回應結構並使用 getattr 安全地獲取內容
281
- if not response or not response.choices or not response.choices[0].message:
282
- log.error(f"LLM 呼叫成功 (200 OK),但回傳的 JSON 結構不完整。回傳: {response.model_dump_json() if response else 'None'}")
283
- raise ValueError("LLM response content is empty or not a string.")
284
 
285
- content = getattr(response.choices[0].message, "content", None)
286
- if not isinstance(content, str) or not content.strip():
287
- log.error(f"LLM 呼叫成功 (200 OK),但回傳內容為空。Response: {content}")
 
 
 
288
  raise ValueError("LLM response content is empty or not a string.")
289
-
290
- elapsed = time.time() - start_time
291
- log.info(f"LLM 呼叫完成,耗時: {elapsed:.2f} 秒。內容長度: {len(content)} 字。")
292
  return content
 
293
  except Exception as e:
294
- log.error(f"LLM API 呼叫失敗: {e}", exc_info=True)
 
295
  raise
296
 
297
  # [MODIFIED] 實現動態流程,根據查詢複雜度決定是否使用 Reranker
 
263
  except Exception:
264
  pass
265
 
266
+ @tenacity.retry(
267
+ wait=tenacity.wait_fixed(2),
268
+ stop=tenacity.stop_after_attempt(3),
269
+ retry=tenacity.retry_if_exception_type(ValueError),
270
+ before_sleep=tenacity.before_sleep_log(log, logging.WARNING),
271
+ after=tenacity.after_log(log, logging.INFO)
272
+ )
273
+ def _llm_call(self, messages: List[Dict[str, str]], max_tokens: Optional[int] = None, temperature: Optional[float] = None) -> str:
274
+ """安全地呼叫 LLM API,並處理可能的回應內容為空錯誤。"""
275
+ log.info(f"LLM 呼叫開始. 模型: {self.model_name}, max_tokens: {max_tokens}, temperature: {temperature}")
276
 
277
+ # [DEBUG] 記錄完整的 LLM 提示內容,以便除錯
278
+ log.info(f"送出的 LLM 提示 (messages): {json.dumps(messages, ensure_ascii=False, indent=2)}")
279
+
280
+ start_time = time.time()
281
  try:
282
+ response = self.client.chat.completions.create(
283
+ model=self.model_name,
 
284
  messages=messages,
285
+ max_tokens=max_tokens,
286
+ temperature=temperature,
287
  )
288
 
289
+ end_time = time.time()
 
 
 
290
 
291
+ # [DEBUG] 記錄 LLM 呼叫的完整 JSON 回應,即使內容為空
292
+ log.info(f"LLM 收到完整回應: {response.model_dump_json(indent=2)}")
293
+
294
+ if not response.choices or not response.choices[0].message.content:
295
+ # 即使狀態碼是 200 OK,若內容為空,也視為錯誤
296
+ log.error("LLM 呼叫成功 (200 OK),但回傳內容為空。")
297
  raise ValueError("LLM response content is empty or not a string.")
298
+
299
+ content = response.choices[0].message.content
300
+ log.info(f"LLM 呼叫完成,耗時: {end_time - start_time:.2f} 秒。內容長度: {len(content)} 字。")
301
  return content
302
+
303
  except Exception as e:
304
+ # 捕獲所有其他可能的錯誤,並重新拋出以便 tenacity 處理
305
+ log.error(f"LLM API 呼叫失敗: {e}")
306
  raise
307
 
308
  # [MODIFIED] 實現動態流程,根據查詢複雜度決定是否使用 Reranker