Song commited on
Commit
f03b053
·
1 Parent(s): 9cbfa48
Files changed (1) hide show
  1. app.py +153 -61
app.py CHANGED
@@ -83,14 +83,14 @@ LM_MODEL = os.getenv("LM_MODEL")
83
 
84
  # ---------- 檢索設定(固定常數) ----------
85
  TOP_K_SENTENCES = 10
86
- BM25_WEIGHT = 0.6
87
- SEM_WEIGHT = 0.4
88
  EMBEDDING_MODEL_ID= "DMetaSoul/Dmeta-embedding-zh"
89
- RERANKER_MODEL_ID = "BAAI/bge-reranker-v2-m3"
90
  USE_CPU = True # HF 預設 CPU
91
  RERANK_THRESHOLD = 0.5
92
  MAX_CONTEXT_CHARS = 8000
93
- DISCLAIMER = "*免責聲明:本資訊僅供參考,若有疑問請諮詢醫師或藥師。*"
94
 
95
  # 藥名映射與停用詞(擴充)
96
  DRUG_NAME_MAPPING = {
@@ -123,6 +123,15 @@ INTENT_KEYWORDS = {
123
  }
124
 
125
  # 章節權重
 
 
 
 
 
 
 
 
 
126
  SECTION_WEIGHTS = {
127
  "用法及用量": 1.0,
128
  "病人使用須知": 1.0,
@@ -238,6 +247,7 @@ class State:
238
  bm25: Optional[Any] = None
239
  df_csv: Optional[pd.DataFrame] = None
240
  user_sessions: Dict[str, Dict[str, Any]] = {} # 簡易 session 快取
 
241
 
242
  STATE = State()
243
 
@@ -328,7 +338,7 @@ def ensure_bm25(pkl_path: str, sentences: List[str]) -> Optional[Any]:
328
  def extract_drug_candidates_from_query(query: str) -> List[str]:
329
  parts = re.split(r"[::]", query, maxsplit=1)
330
  drug_part = parts[0].strip() if len(parts) > 1 else query.strip()
331
- tokens = tokenize_zh(drug_part)
332
  candidates = [t.lower() for t in tokens if len(t) > 2 and t not in DRUG_STOPWORDS]
333
  return [DRUG_NAME_MAPPING.get(c, c) for c in candidates]
334
 
@@ -336,6 +346,10 @@ def find_drug_ids_from_name(candidates: List[str], df: pd.DataFrame) -> List[str
336
  if df is None or df.empty: return []
337
  drug_ids = set()
338
  for cand in candidates:
 
 
 
 
339
  bucket = []
340
  for _, row in df.iterrows():
341
  name_joined = f"{(row.get('drug_name_zh') or '').lower()} {(row.get('drug_name_en') or '').lower()} {(row.get('drug_name_norm') or '').lower()}".strip()
@@ -346,12 +360,15 @@ def find_drug_ids_from_name(candidates: List[str], df: pd.DataFrame) -> List[str
346
  )
347
  else:
348
  raw = 100 if cand in name_joined else 0
349
- if raw >= 72:
350
  score = raw * (2.0 if re.search(r'[a-zA-Z]', cand) else 1.5) * (1 + len(cand)/20)
351
- bucket.append((score, row["drug_id"]))
352
- # 每個 cand 保留前 3 高分(或依你資料量調 1~5)
353
- for _, did in sorted(bucket, reverse=True)[:3]:
354
- drug_ids.add(did)
 
 
 
355
  return list(drug_ids)
356
 
357
  # ---------- 意圖偵測 ----------
@@ -364,26 +381,34 @@ def detect_intent(query: str) -> List[str]:
364
 
365
  # ---------- 檢索 ----------
366
  def rerank_results(query: str, candidates: List[Tuple[int, float, float, float]], sentences: List[str], reranker: Optional[Any], top_k: int, threshold: float) -> List[Dict[str, Any]]:
367
- if not candidates:
368
- return []
369
- valid_indices = [i for (i, *_ ) in candidates if 0 <= i < len(sentences)]
370
- if not valid_indices:
371
- return []
372
- if reranker is None:
373
- return [{"idx": i, "score": fused} for i, fused, _, _ in sorted(candidates, key=lambda x: -x[1])[:top_k]]
374
- pairs = [[query, sentences[i]] for i in valid_indices]
375
- if not pairs:
376
- return []
377
- scores = reranker.predict(pairs)
378
- reranked = [{"idx": valid_indices[j], "score": float(scores[j])} for j in range(len(scores)) if float(scores[j]) >= threshold]
379
- if not reranked:
380
- # 回退: 用融合分數排序前 top_k
381
- sorted_candidates = sorted(candidates, key=lambda x: -x[1])[:top_k]
382
- reranked = [{"idx": i, "score": fused} for i, fused, _, _ in sorted_candidates]
383
- return sorted(reranked, key=lambda x: -x["score"])[:top_k]
384
-
385
- def fuse_and_select(query: str, sentences: List[str], meta: List[Dict[str, Any]], bm25: Optional[Any], index: Optional[Any], emb_model: Optional[Any], reranker: Optional[Any], top_k: int = 10) -> List[int]:
386
- drug_ids = find_drug_ids_from_name(extract_drug_candidates_from_query(query), STATE.df_csv)
 
 
 
 
 
 
 
 
387
  log.info("Detected drug_ids: %s for query: %s", drug_ids, query[:50])
388
  if not drug_ids:
389
  log.warning("No drug_ids found; falling back to full corpus search.")
@@ -391,7 +416,6 @@ def fuse_and_select(query: str, sentences: List[str], meta: List[Dict[str, Any]]
391
  bm_results = []
392
  if bm25:
393
  scores = bm25.get_scores(tokenized_query)
394
- # Min-max normalize BM25 scores
395
  scores_np = np.array(scores)
396
  if np.max(scores_np) > np.min(scores_np):
397
  scores_norm = (scores_np - np.min(scores_np)) / (np.max(scores_np) - np.min(scores_np))
@@ -427,6 +451,7 @@ def fuse_and_select(query: str, sentences: List[str], meta: List[Dict[str, Any]]
427
  candidates = [(i, sc, 0.0, 0.0) for i, sc in candidates]
428
  reranked = rerank_results(query, candidates, sentences, reranker, top_k, RERANK_THRESHOLD)
429
  idxs = [r["idx"] for r in reranked]
 
430
  return idxs
431
 
432
  def build_context(idxs: List[int], sentences: List[str], meta: List[Dict[str, Any]]) -> str:
@@ -460,8 +485,8 @@ def build_prompt(query: str, contexts: str, intents: List[str]) -> str:
460
  ts_parts.append("優先藥袋醫囑(如每日1顆,早餐後)。範圍 [Sxxx]。特殊:病人使用須知。")
461
  trouble_shooting = " ".join(ts_parts)
462
  return (
463
- f"你是一位專業、有同理心的藥師。使用下列參考片段回答問題。若片段無相關資訊,請說不知道。{trouble_shooting}\n"
464
- f"回答用台灣繁中,親切易懂,分2-3小段,每段<150字。末尾加'了解嗎?(回是/否)'。結尾加{DISCLAIMER}\n"
465
  f"問題:{query}\n"
466
  f"參考片段:\n{contexts}\n"
467
  )
@@ -520,53 +545,117 @@ def parse_user_message(query: str) -> Dict[str, str]:
520
 
521
  def make_clarify_message(drug_name_hint: str = "") -> str:
522
  msg = (
523
- "抱歉,為了給您更準確的回答,請用這個格式描述(至少包含藥名及問題意圖):\n"
524
- "藥名+劑型+強度(例:普拿疼 500mg 錠)\n"
525
- "症狀/目的(頭痛 6/10、發燒 38.5°C)\n"
526
- "使用紀錄(今天 08:00 吃 1 顆、連用 2 天…)\n"
527
- "對象(成人/兒童、孕哺、過敏/肝腎問題)\n"
528
- "併用(其他藥/保健品/咖啡酒精/是否空腹)\n"
529
- "你的問題(可否同用?多久可再吃?一天上限?)\n"
530
- "例:普拿疼 500mg 錠,頭痛,今天 08:00 吃 1 顆,剛喝咖啡;想問現在可再吃嗎?一天上限多少?\n"
531
  )
532
  if drug_name_hint:
533
- msg = f"目前無法識別特定藥名,我會先提供一般性建議。{drug_name_hint}請補充藥名、劑型與強度。\n" + msg
 
 
 
534
  return msg + DISCLAIMER
535
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
  async def answer_pipeline(query: str, user_id: str) -> str:
 
537
  if not query or not isinstance(query, str):
538
- return "請提供有效問題。"
539
  if not STATE.sentences:
540
- return "目前尚未載入語料,請稍後再試。"
541
  session = STATE.user_sessions.get(user_id, {})
542
  if "prev_query" in session and query.lower() in ["是", "否"]:
543
- # 簡易互動
544
  if query.lower() == "是":
545
  return "太好了!若還有問題,請告訴我。" + DISCLAIMER
546
  else:
547
  return f"抱歉沒說明清楚。關於{session['prev_query']},請再說詳細點,或直接問醫師。" + DISCLAIMER
548
- # 新增: 解析與檢核
549
  parsed = parse_user_message(query)
550
  drug_name_hint = ""
551
  if "fentanyl" in query.lower():
552
  drug_name_hint = "你說的是 Fentanyl 經皮貼片(Duragesic)嗎?有寫幾 mcg/hr 嗎?"
553
- drug_ids = find_drug_ids_from_name(extract_drug_candidates_from_query(query), STATE.df_csv)
554
- if not drug_ids or not detect_intent(query):
 
 
 
555
  return make_clarify_message(drug_name_hint)
556
- # 繼續原流程
557
  intents = detect_intent(query)
558
- idxs = fuse_and_select(query, STATE.sentences, STATE.meta, STATE.bm25, STATE.faiss_index, STATE.emb_model, STATE.reranker_model, top_k=TOP_K_SENTENCES)
559
- contexts = build_context(idxs, STATE.sentences, STATE.meta)
560
- ans = None
561
- if LM_MODEL and LITELLM_API_KEY and LITELLM_BASE_URL:
562
- ans = call_llm(build_prompt(query, contexts, intents))
563
- if not ans:
564
- # 改進 fallback 格式
565
- fallback_sentences = [STATE.sentences[i] for i in idxs[:3] if i >= 0]
566
- ans = "以下是相關資訊:\n" + "\n".join([f"- {s}" for s in fallback_sentences]) if fallback_sentences else "抱歉,暫時找不到相關資訊。"
567
- ans += "\n了解嗎?(回是/否)" + DISCLAIMER
568
- STATE.user_sessions[user_id] = {"prev_query": query}
569
- return ans
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
 
571
  # ---------- LINE 驗簽與回覆 ----------
572
  def verify_line_signature(body_bytes: bytes, signature: str) -> bool:
@@ -639,6 +728,9 @@ async def _startup():
639
  STATE.reranker_model = load_reranker_model(RERANKER_MODEL_ID)
640
  STATE.faiss_index = ensure_faiss(FAISS_INDEX, STATE.sentences)
641
  STATE.bm25 = ensure_bm25(BM25_PKL, STATE.sentences)
 
 
 
642
  if os.path.exists(CSV_PATH):
643
  STATE.df_csv = pd.read_csv(CSV_PATH, dtype=str)
644
  log.info("LLM via LiteLLM: base=%s model=%s", str(LITELLM_BASE_URL), str(LM_MODEL))
 
83
 
84
  # ---------- 檢索設定(固定常數) ----------
85
  TOP_K_SENTENCES = 10
86
+ BM25_WEIGHT = 0.8
87
+ SEM_WEIGHT = 0.2
88
  EMBEDDING_MODEL_ID= "DMetaSoul/Dmeta-embedding-zh"
89
+ RERANKER_MODEL_ID = "BAAI/bge-reranker-base"
90
  USE_CPU = True # HF 預設 CPU
91
  RERANK_THRESHOLD = 0.5
92
  MAX_CONTEXT_CHARS = 8000
93
+ DISCLAIMER = "此回覆僅供參考,請遵循醫師/藥師指示。"
94
 
95
  # 藥名映射與停用詞(擴充)
96
  DRUG_NAME_MAPPING = {
 
123
  }
124
 
125
  # 章節權重
126
+ SECTION_NORMALIZE = {
127
+ "用法用量": "用法及用量",
128
+ "副作用不良反應": "不良反應",
129
+ "警語注意事項": "警語及注意事項",
130
+ "交互作用": "藥物交互作用",
131
+ "包裝及儲存": "儲存條件",
132
+ "儲存條件": "儲存條件"
133
+ }
134
+
135
  SECTION_WEIGHTS = {
136
  "用法及用量": 1.0,
137
  "病人使用須知": 1.0,
 
247
  bm25: Optional[Any] = None
248
  df_csv: Optional[pd.DataFrame] = None
249
  user_sessions: Dict[str, Dict[str, Any]] = {} # 簡易 session 快取
250
+ query_cache: Dict[str, Dict[str, Any]] = {}
251
 
252
  STATE = State()
253
 
 
338
  def extract_drug_candidates_from_query(query: str) -> List[str]:
339
  parts = re.split(r"[::]", query, maxsplit=1)
340
  drug_part = parts[0].strip() if len(parts) > 1 else query.strip()
341
+ tokens = tokenize_zh(drug_part) # 只取左側
342
  candidates = [t.lower() for t in tokens if len(t) > 2 and t not in DRUG_STOPWORDS]
343
  return [DRUG_NAME_MAPPING.get(c, c) for c in candidates]
344
 
 
346
  if df is None or df.empty: return []
347
  drug_ids = set()
348
  for cand in candidates:
349
+ # 先查映射
350
+ if cand in DRUG_NAME_MAPPING:
351
+ drug_ids.add(DRUG_NAME_MAPPING[cand])
352
+ continue
353
  bucket = []
354
  for _, row in df.iterrows():
355
  name_joined = f"{(row.get('drug_name_zh') or '').lower()} {(row.get('drug_name_en') or '').lower()} {(row.get('drug_name_norm') or '').lower()}".strip()
 
360
  )
361
  else:
362
  raw = 100 if cand in name_joined else 0
363
+ if raw >= 85:
364
  score = raw * (2.0 if re.search(r'[a-zA-Z]', cand) else 1.5) * (1 + len(cand)/20)
365
+ # 劑型比對 (簡易,假設 row 有 'dosage_form' 欄,CSV無,需加邏輯或略)
366
+ # 假設無,扣分0
367
+ bucket.append((score, row["drug_id"], row.get("section"), name_joined))
368
+ if bucket:
369
+ top = sorted(bucket, reverse=True)[0]
370
+ drug_ids.add(top[1])
371
+ log.info(f"Drug candidates: {[(id, sec, score, terms) for score, id, sec, terms in bucket]}")
372
  return list(drug_ids)
373
 
374
  # ---------- 意圖偵測 ----------
 
381
 
382
  # ---------- 檢索 ----------
383
  def rerank_results(query: str, candidates: List[Tuple[int, float, float, float]], sentences: List[str], reranker: Optional[Any], top_k: int, threshold: float) -> List[Dict[str, Any]]:
384
+ try:
385
+ candidates = sorted(candidates, key=lambda x: -x[1])[:top_k * 2] # 限20
386
+ if not candidates:
387
+ return []
388
+ valid_indices = [i for (i, *_ ) in candidates if 0 <= i < len(sentences)]
389
+ if not valid_indices:
390
+ return []
391
+ if reranker is None:
392
+ return [{"idx": i, "score": fused} for i, fused, _, _ in candidates]
393
+ pairs = [[query, sentences[i]] for i in valid_indices]
394
+ if not pairs:
395
+ return []
396
+ scores = reranker.predict(pairs, show_progress_bar=False)
397
+ reranked = [{"idx": valid_indices[j], "score": float(scores[j])} for j in range(len(scores)) if float(scores[j]) >= threshold]
398
+ if not reranked:
399
+ reranked = [{"idx": i, "score": fused} for i, fused, _, _ in candidates]
400
+ return sorted(reranked, key=lambda x: -x["score"])[:top_k]
401
+ except Exception as e:
402
+ log.warning("Rerank failed: %s", e)
403
+ return [{"idx": i, "score": fused} for i, fused, _, _ in sorted(candidates, key=lambda x: -x[1])[:top_k]] # fallback
404
+
405
+ def fuse_and_select(query: str, sentences: List[str], meta: List[Dict[str, Any]], bm25: Optional[Any], index: Optional[Any], emb_model: Optional[Any], reranker: Optional[Any], top_k: int = 10, drug_ids: List[str] = None) -> List[int]:
406
+ clean_query = query.strip().lower() # 清理
407
+ cache_key = clean_query + str(drug_ids) # per drug cache
408
+ if cache_key in STATE.query_cache and time.time() - STATE.query_cache[cache_key]['time'] < 180: # 3min
409
+ log.info("Cache hit for query: %s", query[:50])
410
+ return STATE.query_cache[cache_key]['idxs']
411
+ drug_ids = drug_ids or []
412
  log.info("Detected drug_ids: %s for query: %s", drug_ids, query[:50])
413
  if not drug_ids:
414
  log.warning("No drug_ids found; falling back to full corpus search.")
 
416
  bm_results = []
417
  if bm25:
418
  scores = bm25.get_scores(tokenized_query)
 
419
  scores_np = np.array(scores)
420
  if np.max(scores_np) > np.min(scores_np):
421
  scores_norm = (scores_np - np.min(scores_np)) / (np.max(scores_np) - np.min(scores_np))
 
451
  candidates = [(i, sc, 0.0, 0.0) for i, sc in candidates]
452
  reranked = rerank_results(query, candidates, sentences, reranker, top_k, RERANK_THRESHOLD)
453
  idxs = [r["idx"] for r in reranked]
454
+ STATE.query_cache[cache_key] = {'idxs': idxs, 'time': time.time()}
455
  return idxs
456
 
457
  def build_context(idxs: List[int], sentences: List[str], meta: List[Dict[str, Any]]) -> str:
 
485
  ts_parts.append("優先藥袋醫囑(如每日1顆,早餐後)。範圍 [Sxxx]。特殊:病人使用須知。")
486
  trouble_shooting = " ".join(ts_parts)
487
  return (
488
+ f"請根據提供內容,回答使用者問題。\n"
489
+ f"限制:- 回覆 ≤100字。- 條列 2–4 點,語氣精簡。- 不要附加「了解嗎」。若無關鍵依據,回答:「查無充分資料,建議詢問醫師/藥師」(≤60字)。\n"
490
  f"問題:{query}\n"
491
  f"參考片段:\n{contexts}\n"
492
  )
 
545
 
546
  def make_clarify_message(drug_name_hint: str = "") -> str:
547
  msg = (
548
+ "請提供:\n"
549
+ "1. 藥名+劑型+劑量(例:普拿疼 500mg 錠)\n"
550
+ "2. 使用情境(症狀、時間、對象)\n"
551
+ "3. 你的問題(可否同用?多久可再吃?)\n"
 
 
 
 
552
  )
553
  if drug_name_hint:
554
+ msg = (
555
+ "目前無法識別特定藥名,我會先提供一般性建議。"
556
+ "請補充藥名、劑型與強度。\n" + msg
557
+ )
558
  return msg + DISCLAIMER
559
 
560
+ # ---------- 新增: 藥名處理 ----------
561
+ def process_drug_names(drug_ids: List[str]) -> List[str]:
562
+ if not drug_ids:
563
+ log.warning("NO_DRUG_ID")
564
+ return []
565
+
566
+ if len(drug_ids) == 1:
567
+ log.info(f"單藥名模式: {drug_ids[0]}")
568
+ return drug_ids
569
+ else:
570
+ log.info(f"多藥名模式: {drug_ids}")
571
+ return drug_ids
572
+
573
+ # ---------- 新增: 回覆壓縮 ----------
574
+ def compress_reply(reply: str, max_len: int = 100) -> str:
575
+ if len(reply) <= max_len:
576
+ return reply
577
+
578
+ # 刪除修飾詞、合併句
579
+ reply = re.sub(r'(例如|比如|像是|可能會|有時候|通常|一般來說|另外|而且|因此|所以)', '', reply)
580
+ reply = re.sub(r'\s+', ' ', reply).strip()
581
+ reply = re.sub(r'(\.|\?|\!)\s*', r'\1 ', reply) # 合併句
582
+ compressed = reply[:max_len] + '...' if len(reply) > max_len else reply
583
+ log.info("回覆超長,自動壓縮")
584
+ return compressed
585
+
586
+ # ---------- 新增: 異常處理 ----------
587
+ def handle_error(code: str) -> str:
588
+ log.error(f"Pipeline error: {code}")
589
+ return "查無充分資料,建議詢問醫師/藥師"
590
+
591
+ # ---------- 新增: 全鏈路log ----------
592
+ def log_pipeline(user_id: str, query: str, parsed: Dict, candidates: List, drug_choices: List,
593
+ retrieval: Dict, sections: List, context: str, prompt: str, reply: str, error_code: str = None):
594
+ log.info(f"1. user_id: {user_id}, query: {query[:50] + '...' if len(query)>50 else query} (desensitized)")
595
+ log.info(f"2. parsed={parsed}")
596
+ log.info(f"3. candidates={candidates}")
597
+ log.info(f"4. drug_id_pick: {drug_choices} (每個候選的分數、淘汰原因 in find_drug_ids)")
598
+ log.info(f"5. retrieval: BM25_topN={retrieval.get('bm_top', 0)}, SEM_topN={retrieval.get('sem_top', 0)}, fused_top10={retrieval.get('fused_top10', [])}")
599
+ log.info(f"6. Injected sections: {sections}")
600
+ log.info(f"7. contexts_chars: {len(context)}, prompt_chars: {len(prompt)}, tokens: ~{len(prompt)//4}")
601
+ log.info(f"8. LLM: model={LM_MODEL}, time={retrieval.get('llm_time', 0):.2f}s, truncated={len(reply)>200}, output_chars={len(reply)}")
602
+ if error_code:
603
+ log.error(f"error_code={error_code}")
604
+
605
  async def answer_pipeline(query: str, user_id: str) -> str:
606
+ log.info("Pipeline start for user_id: %s, query: %s", user_id, query[:50])
607
  if not query or not isinstance(query, str):
608
+ return handle_error("INVALID_QUERY")
609
  if not STATE.sentences:
610
+ return handle_error("NO_CORPUS")
611
  session = STATE.user_sessions.get(user_id, {})
612
  if "prev_query" in session and query.lower() in ["是", "否"]:
 
613
  if query.lower() == "是":
614
  return "太好了!若還有問題,請告訴我。" + DISCLAIMER
615
  else:
616
  return f"抱歉沒說明清楚。關於{session['prev_query']},請再說詳細點,或直接問醫師。" + DISCLAIMER
 
617
  parsed = parse_user_message(query)
618
  drug_name_hint = ""
619
  if "fentanyl" in query.lower():
620
  drug_name_hint = "你說的是 Fentanyl 經皮貼片(Duragesic)嗎?有寫幾 mcg/hr 嗎?"
621
+ candidates = extract_drug_candidates_from_query(query)
622
+ drug_ids = find_drug_ids_from_name(candidates, STATE.df_csv)
623
+ drug_choices = process_drug_names(drug_ids)
624
+ if not drug_choices:
625
+ log_pipeline(user_id, query, parsed, candidates, drug_choices, {}, [], "", "", "", "NO_DRUG_ID")
626
  return make_clarify_message(drug_name_hint)
 
627
  intents = detect_intent(query)
628
+ answers = []
629
+ retrieval = {'bm_top': 0, 'sem_top': 0, 'fused_top10': [], 'llm_time': 0}
630
+ sections = IMPORTANT_SECTIONS
631
+ context = ""
632
+ prompt = ""
633
+ reply = ""
634
+ try:
635
+ for did in drug_choices:
636
+ idxs = fuse_and_select(query, STATE.sentences, STATE.meta, STATE.bm25, STATE.faiss_index, STATE.emb_model, STATE.reranker_model, top_k=TOP_K_SENTENCES, drug_ids=[did])
637
+ if not idxs:
638
+ answers.append(handle_error("NO_CONTEXT"))
639
+ continue
640
+ context = build_context(idxs, STATE.sentences, STATE.meta)
641
+ prompt = build_prompt(query, context, intents)
642
+ t0 = time.time()
643
+ ans = call_llm(prompt)
644
+ retrieval['llm_time'] += time.time() - t0
645
+ if not ans:
646
+ answers.append(handle_error("LLM_ERROR"))
647
+ continue
648
+ ans = compress_reply(ans)
649
+ answers.append(ans)
650
+ reply = "\n".join(answers)
651
+ if len(drug_choices) > 1:
652
+ reply = "偵測到多個藥物,以下分別提供參考:\n" + reply
653
+ STATE.user_sessions[user_id] = {"prev_query": query}
654
+ except Exception as e:
655
+ log.warning("Pipeline 失敗:%s", e)
656
+ reply = handle_error("UNEXPECTED_ERROR")
657
+ log_pipeline(user_id, query, parsed, candidates, drug_choices, retrieval, sections, context, prompt, reply)
658
+ return reply
659
 
660
  # ---------- LINE 驗簽與回覆 ----------
661
  def verify_line_signature(body_bytes: bytes, signature: str) -> bool:
 
728
  STATE.reranker_model = load_reranker_model(RERANKER_MODEL_ID)
729
  STATE.faiss_index = ensure_faiss(FAISS_INDEX, STATE.sentences)
730
  STATE.bm25 = ensure_bm25(BM25_PKL, STATE.sentences)
731
+ for m in STATE.meta:
732
+ sec = m.get("section", "其他")
733
+ m["section"] = SECTION_NORMALIZE.get(sec, sec)
734
  if os.path.exists(CSV_PATH):
735
  STATE.df_csv = pd.read_csv(CSV_PATH, dtype=str)
736
  log.info("LLM via LiteLLM: base=%s model=%s", str(LITELLM_BASE_URL), str(LM_MODEL))