Spaces:

pcreem
/

brown-cafe

Sleeping

App Files Files Community

Song commited on 24 days ago

Commit

4368e1d

1 Parent(s): 4cc218a

hi

Browse files

Files changed (1) hide show

app.py +31 -20

app.py CHANGED Viewed

@@ -263,35 +263,46 @@ class RagPipeline:
                     except Exception:
                         pass
-    @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
-    def _llm_call(self, messages, **kwargs) -> str:
-        start_time = time.time()
-        log.info(f"LLM 呼叫開始. 模型: {LLM_API_CONFIG['model']}, max_tokens: {kwargs.get('max_tokens', 'N/A')}, temperature: {kwargs.get('temperature', 'N/A')}")
         try:
-            config = {**LLM_MODEL_CONFIG, **kwargs}
-            response = self.llm_client.chat.completions.create(
-                model=LLM_API_CONFIG["model"],
                 messages=messages,
-                temperature=config["temperature"],
-                max_tokens=config["max_tokens"],
             )
-            # [MODIFIED] 檢查回應結構並使用 getattr 安全地獲取內容
-            if not response or not response.choices or not response.choices[0].message:
-                log.error(f"LLM 呼叫成功 (200 OK)，但回傳的 JSON 結構不完整。回傳: {response.model_dump_json() if response else 'None'}")
-                raise ValueError("LLM response content is empty or not a string.")
-            content = getattr(response.choices[0].message, "content", None)
-            if not isinstance(content, str) or not content.strip():
-                log.error(f"LLM 呼叫成功 (200 OK)，但回傳內容為空。Response: {content}")
                 raise ValueError("LLM response content is empty or not a string.")
-            elapsed = time.time() - start_time
-            log.info(f"LLM 呼叫完成，耗時: {elapsed:.2f} 秒。內容長度: {len(content)} 字。")
             return content
         except Exception as e:
-            log.error(f"LLM API 呼叫失敗: {e}", exc_info=True)
             raise
     # [MODIFIED] 實現動態流程，根據查詢複雜度決定是否使用 Reranker

                     except Exception:
                         pass
+    @tenacity.retry(
+        wait=tenacity.wait_fixed(2),
+        stop=tenacity.stop_after_attempt(3),
+        retry=tenacity.retry_if_exception_type(ValueError),
+        before_sleep=tenacity.before_sleep_log(log, logging.WARNING),
+        after=tenacity.after_log(log, logging.INFO)
+    )
+    def _llm_call(self, messages: List[Dict[str, str]], max_tokens: Optional[int] = None, temperature: Optional[float] = None) -> str:
+        """安全地呼叫 LLM API，並處理可能的回應內容為空錯誤。"""
+        log.info(f"LLM 呼叫開始. 模型: {self.model_name}, max_tokens: {max_tokens}, temperature: {temperature}")
+        # [DEBUG] 記錄完整的 LLM 提示內容，以便除錯
+        log.info(f"送出的 LLM 提示 (messages): {json.dumps(messages, ensure_ascii=False, indent=2)}")
+        start_time = time.time()
         try:
+            response = self.client.chat.completions.create(
+                model=self.model_name,
                 messages=messages,
+                max_tokens=max_tokens,
+                temperature=temperature,
             )
+            end_time = time.time()
+            # [DEBUG] 記錄 LLM 呼叫的完整 JSON 回應，即使內容為空
+            log.info(f"LLM 收到完整回應: {response.model_dump_json(indent=2)}")
+            if not response.choices or not response.choices[0].message.content:
+                # 即使狀態碼是 200 OK，若內容為空，也視為錯誤
+                log.error("LLM 呼叫成功 (200 OK)，但回傳內容為空。")
                 raise ValueError("LLM response content is empty or not a string.")
+            content = response.choices[0].message.content
+            log.info(f"LLM 呼叫完成，耗時: {end_time - start_time:.2f} 秒。內容長度: {len(content)} 字。")
             return content
         except Exception as e:
+            # 捕獲所有其他可能的錯誤，並重新拋出以便 tenacity 處理
+            log.error(f"LLM API 呼叫失敗: {e}")
             raise
     # [MODIFIED] 實現動態流程，根據查詢複雜度決定是否使用 Reranker