Spaces:
Sleeping
Sleeping
Song
commited on
Commit
·
f03b053
1
Parent(s):
9cbfa48
hi
Browse files
app.py
CHANGED
@@ -83,14 +83,14 @@ LM_MODEL = os.getenv("LM_MODEL")
|
|
83 |
|
84 |
# ---------- 檢索設定(固定常數) ----------
|
85 |
TOP_K_SENTENCES = 10
|
86 |
-
BM25_WEIGHT = 0.
|
87 |
-
SEM_WEIGHT = 0.
|
88 |
EMBEDDING_MODEL_ID= "DMetaSoul/Dmeta-embedding-zh"
|
89 |
-
RERANKER_MODEL_ID = "BAAI/bge-reranker-
|
90 |
USE_CPU = True # HF 預設 CPU
|
91 |
RERANK_THRESHOLD = 0.5
|
92 |
MAX_CONTEXT_CHARS = 8000
|
93 |
-
DISCLAIMER = "
|
94 |
|
95 |
# 藥名映射與停用詞(擴充)
|
96 |
DRUG_NAME_MAPPING = {
|
@@ -123,6 +123,15 @@ INTENT_KEYWORDS = {
|
|
123 |
}
|
124 |
|
125 |
# 章節權重
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
SECTION_WEIGHTS = {
|
127 |
"用法及用量": 1.0,
|
128 |
"病人使用須知": 1.0,
|
@@ -238,6 +247,7 @@ class State:
|
|
238 |
bm25: Optional[Any] = None
|
239 |
df_csv: Optional[pd.DataFrame] = None
|
240 |
user_sessions: Dict[str, Dict[str, Any]] = {} # 簡易 session 快取
|
|
|
241 |
|
242 |
STATE = State()
|
243 |
|
@@ -328,7 +338,7 @@ def ensure_bm25(pkl_path: str, sentences: List[str]) -> Optional[Any]:
|
|
328 |
def extract_drug_candidates_from_query(query: str) -> List[str]:
|
329 |
parts = re.split(r"[::]", query, maxsplit=1)
|
330 |
drug_part = parts[0].strip() if len(parts) > 1 else query.strip()
|
331 |
-
tokens = tokenize_zh(drug_part)
|
332 |
candidates = [t.lower() for t in tokens if len(t) > 2 and t not in DRUG_STOPWORDS]
|
333 |
return [DRUG_NAME_MAPPING.get(c, c) for c in candidates]
|
334 |
|
@@ -336,6 +346,10 @@ def find_drug_ids_from_name(candidates: List[str], df: pd.DataFrame) -> List[str
|
|
336 |
if df is None or df.empty: return []
|
337 |
drug_ids = set()
|
338 |
for cand in candidates:
|
|
|
|
|
|
|
|
|
339 |
bucket = []
|
340 |
for _, row in df.iterrows():
|
341 |
name_joined = f"{(row.get('drug_name_zh') or '').lower()} {(row.get('drug_name_en') or '').lower()} {(row.get('drug_name_norm') or '').lower()}".strip()
|
@@ -346,12 +360,15 @@ def find_drug_ids_from_name(candidates: List[str], df: pd.DataFrame) -> List[str
|
|
346 |
)
|
347 |
else:
|
348 |
raw = 100 if cand in name_joined else 0
|
349 |
-
if raw >=
|
350 |
score = raw * (2.0 if re.search(r'[a-zA-Z]', cand) else 1.5) * (1 + len(cand)/20)
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
|
|
|
|
|
|
355 |
return list(drug_ids)
|
356 |
|
357 |
# ---------- 意圖偵測 ----------
|
@@ -364,26 +381,34 @@ def detect_intent(query: str) -> List[str]:
|
|
364 |
|
365 |
# ---------- 檢索 ----------
|
366 |
def rerank_results(query: str, candidates: List[Tuple[int, float, float, float]], sentences: List[str], reranker: Optional[Any], top_k: int, threshold: float) -> List[Dict[str, Any]]:
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
387 |
log.info("Detected drug_ids: %s for query: %s", drug_ids, query[:50])
|
388 |
if not drug_ids:
|
389 |
log.warning("No drug_ids found; falling back to full corpus search.")
|
@@ -391,7 +416,6 @@ def fuse_and_select(query: str, sentences: List[str], meta: List[Dict[str, Any]]
|
|
391 |
bm_results = []
|
392 |
if bm25:
|
393 |
scores = bm25.get_scores(tokenized_query)
|
394 |
-
# Min-max normalize BM25 scores
|
395 |
scores_np = np.array(scores)
|
396 |
if np.max(scores_np) > np.min(scores_np):
|
397 |
scores_norm = (scores_np - np.min(scores_np)) / (np.max(scores_np) - np.min(scores_np))
|
@@ -427,6 +451,7 @@ def fuse_and_select(query: str, sentences: List[str], meta: List[Dict[str, Any]]
|
|
427 |
candidates = [(i, sc, 0.0, 0.0) for i, sc in candidates]
|
428 |
reranked = rerank_results(query, candidates, sentences, reranker, top_k, RERANK_THRESHOLD)
|
429 |
idxs = [r["idx"] for r in reranked]
|
|
|
430 |
return idxs
|
431 |
|
432 |
def build_context(idxs: List[int], sentences: List[str], meta: List[Dict[str, Any]]) -> str:
|
@@ -460,8 +485,8 @@ def build_prompt(query: str, contexts: str, intents: List[str]) -> str:
|
|
460 |
ts_parts.append("優先藥袋醫囑(如每日1顆,早餐後)。範圍 [Sxxx]。特殊:病人使用須知。")
|
461 |
trouble_shooting = " ".join(ts_parts)
|
462 |
return (
|
463 |
-
f"
|
464 |
-
f"
|
465 |
f"問題:{query}\n"
|
466 |
f"參考片段:\n{contexts}\n"
|
467 |
)
|
@@ -520,53 +545,117 @@ def parse_user_message(query: str) -> Dict[str, str]:
|
|
520 |
|
521 |
def make_clarify_message(drug_name_hint: str = "") -> str:
|
522 |
msg = (
|
523 |
-
"
|
524 |
-
"
|
525 |
-
"
|
526 |
-
"
|
527 |
-
"對象(成人/兒童、孕哺、過敏/肝腎問題)\n"
|
528 |
-
"併用(其他藥/保健品/咖啡酒精/是否空腹)\n"
|
529 |
-
"你的問題(可否同用?多久可再吃?一天上限?)\n"
|
530 |
-
"例:普拿疼 500mg 錠,頭痛,今天 08:00 吃 1 顆,剛喝咖啡;想問現在可再吃嗎?一天上限多少?\n"
|
531 |
)
|
532 |
if drug_name_hint:
|
533 |
-
msg =
|
|
|
|
|
|
|
534 |
return msg + DISCLAIMER
|
535 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
536 |
async def answer_pipeline(query: str, user_id: str) -> str:
|
|
|
537 |
if not query or not isinstance(query, str):
|
538 |
-
return "
|
539 |
if not STATE.sentences:
|
540 |
-
return "
|
541 |
session = STATE.user_sessions.get(user_id, {})
|
542 |
if "prev_query" in session and query.lower() in ["是", "否"]:
|
543 |
-
# 簡易互動
|
544 |
if query.lower() == "是":
|
545 |
return "太好了!若還有問題,請告訴我。" + DISCLAIMER
|
546 |
else:
|
547 |
return f"抱歉沒說明清楚。關於{session['prev_query']},請再說詳細點,或直接問醫師。" + DISCLAIMER
|
548 |
-
# 新增: 解析與檢核
|
549 |
parsed = parse_user_message(query)
|
550 |
drug_name_hint = ""
|
551 |
if "fentanyl" in query.lower():
|
552 |
drug_name_hint = "你說的是 Fentanyl 經皮貼片(Duragesic)嗎?有寫幾 mcg/hr 嗎?"
|
553 |
-
|
554 |
-
|
|
|
|
|
|
|
555 |
return make_clarify_message(drug_name_hint)
|
556 |
-
# 繼續原流程
|
557 |
intents = detect_intent(query)
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
-
|
569 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
570 |
|
571 |
# ---------- LINE 驗簽與回覆 ----------
|
572 |
def verify_line_signature(body_bytes: bytes, signature: str) -> bool:
|
@@ -639,6 +728,9 @@ async def _startup():
|
|
639 |
STATE.reranker_model = load_reranker_model(RERANKER_MODEL_ID)
|
640 |
STATE.faiss_index = ensure_faiss(FAISS_INDEX, STATE.sentences)
|
641 |
STATE.bm25 = ensure_bm25(BM25_PKL, STATE.sentences)
|
|
|
|
|
|
|
642 |
if os.path.exists(CSV_PATH):
|
643 |
STATE.df_csv = pd.read_csv(CSV_PATH, dtype=str)
|
644 |
log.info("LLM via LiteLLM: base=%s model=%s", str(LITELLM_BASE_URL), str(LM_MODEL))
|
|
|
83 |
|
84 |
# ---------- 檢索設定(固定常數) ----------
|
85 |
TOP_K_SENTENCES = 10
|
86 |
+
BM25_WEIGHT = 0.8
|
87 |
+
SEM_WEIGHT = 0.2
|
88 |
EMBEDDING_MODEL_ID= "DMetaSoul/Dmeta-embedding-zh"
|
89 |
+
RERANKER_MODEL_ID = "BAAI/bge-reranker-base"
|
90 |
USE_CPU = True # HF 預設 CPU
|
91 |
RERANK_THRESHOLD = 0.5
|
92 |
MAX_CONTEXT_CHARS = 8000
|
93 |
+
DISCLAIMER = "此回覆僅供參考,請遵循醫師/藥師指示。"
|
94 |
|
95 |
# 藥名映射與停用詞(擴充)
|
96 |
DRUG_NAME_MAPPING = {
|
|
|
123 |
}
|
124 |
|
125 |
# 章節權重
|
126 |
+
SECTION_NORMALIZE = {
|
127 |
+
"用法用量": "用法及用量",
|
128 |
+
"副作用不良反應": "不良反應",
|
129 |
+
"警語注意事項": "警語及注意事項",
|
130 |
+
"交互作用": "藥物交互作用",
|
131 |
+
"包裝及儲存": "儲存條件",
|
132 |
+
"儲存條件": "儲存條件"
|
133 |
+
}
|
134 |
+
|
135 |
SECTION_WEIGHTS = {
|
136 |
"用法及用量": 1.0,
|
137 |
"病人使用須知": 1.0,
|
|
|
247 |
bm25: Optional[Any] = None
|
248 |
df_csv: Optional[pd.DataFrame] = None
|
249 |
user_sessions: Dict[str, Dict[str, Any]] = {} # 簡易 session 快取
|
250 |
+
query_cache: Dict[str, Dict[str, Any]] = {}
|
251 |
|
252 |
STATE = State()
|
253 |
|
|
|
338 |
def extract_drug_candidates_from_query(query: str) -> List[str]:
|
339 |
parts = re.split(r"[::]", query, maxsplit=1)
|
340 |
drug_part = parts[0].strip() if len(parts) > 1 else query.strip()
|
341 |
+
tokens = tokenize_zh(drug_part) # 只取左側
|
342 |
candidates = [t.lower() for t in tokens if len(t) > 2 and t not in DRUG_STOPWORDS]
|
343 |
return [DRUG_NAME_MAPPING.get(c, c) for c in candidates]
|
344 |
|
|
|
346 |
if df is None or df.empty: return []
|
347 |
drug_ids = set()
|
348 |
for cand in candidates:
|
349 |
+
# 先查映射
|
350 |
+
if cand in DRUG_NAME_MAPPING:
|
351 |
+
drug_ids.add(DRUG_NAME_MAPPING[cand])
|
352 |
+
continue
|
353 |
bucket = []
|
354 |
for _, row in df.iterrows():
|
355 |
name_joined = f"{(row.get('drug_name_zh') or '').lower()} {(row.get('drug_name_en') or '').lower()} {(row.get('drug_name_norm') or '').lower()}".strip()
|
|
|
360 |
)
|
361 |
else:
|
362 |
raw = 100 if cand in name_joined else 0
|
363 |
+
if raw >= 85:
|
364 |
score = raw * (2.0 if re.search(r'[a-zA-Z]', cand) else 1.5) * (1 + len(cand)/20)
|
365 |
+
# 劑型比對 (簡易,假設 row 有 'dosage_form' 欄,CSV無,需加邏輯或略)
|
366 |
+
# 假設無,扣分0
|
367 |
+
bucket.append((score, row["drug_id"], row.get("section"), name_joined))
|
368 |
+
if bucket:
|
369 |
+
top = sorted(bucket, reverse=True)[0]
|
370 |
+
drug_ids.add(top[1])
|
371 |
+
log.info(f"Drug candidates: {[(id, sec, score, terms) for score, id, sec, terms in bucket]}")
|
372 |
return list(drug_ids)
|
373 |
|
374 |
# ---------- 意圖偵測 ----------
|
|
|
381 |
|
382 |
# ---------- 檢索 ----------
|
383 |
def rerank_results(query: str, candidates: List[Tuple[int, float, float, float]], sentences: List[str], reranker: Optional[Any], top_k: int, threshold: float) -> List[Dict[str, Any]]:
|
384 |
+
try:
|
385 |
+
candidates = sorted(candidates, key=lambda x: -x[1])[:top_k * 2] # 限20
|
386 |
+
if not candidates:
|
387 |
+
return []
|
388 |
+
valid_indices = [i for (i, *_ ) in candidates if 0 <= i < len(sentences)]
|
389 |
+
if not valid_indices:
|
390 |
+
return []
|
391 |
+
if reranker is None:
|
392 |
+
return [{"idx": i, "score": fused} for i, fused, _, _ in candidates]
|
393 |
+
pairs = [[query, sentences[i]] for i in valid_indices]
|
394 |
+
if not pairs:
|
395 |
+
return []
|
396 |
+
scores = reranker.predict(pairs, show_progress_bar=False)
|
397 |
+
reranked = [{"idx": valid_indices[j], "score": float(scores[j])} for j in range(len(scores)) if float(scores[j]) >= threshold]
|
398 |
+
if not reranked:
|
399 |
+
reranked = [{"idx": i, "score": fused} for i, fused, _, _ in candidates]
|
400 |
+
return sorted(reranked, key=lambda x: -x["score"])[:top_k]
|
401 |
+
except Exception as e:
|
402 |
+
log.warning("Rerank failed: %s", e)
|
403 |
+
return [{"idx": i, "score": fused} for i, fused, _, _ in sorted(candidates, key=lambda x: -x[1])[:top_k]] # fallback
|
404 |
+
|
405 |
+
def fuse_and_select(query: str, sentences: List[str], meta: List[Dict[str, Any]], bm25: Optional[Any], index: Optional[Any], emb_model: Optional[Any], reranker: Optional[Any], top_k: int = 10, drug_ids: List[str] = None) -> List[int]:
|
406 |
+
clean_query = query.strip().lower() # 清理
|
407 |
+
cache_key = clean_query + str(drug_ids) # per drug cache
|
408 |
+
if cache_key in STATE.query_cache and time.time() - STATE.query_cache[cache_key]['time'] < 180: # 3min
|
409 |
+
log.info("Cache hit for query: %s", query[:50])
|
410 |
+
return STATE.query_cache[cache_key]['idxs']
|
411 |
+
drug_ids = drug_ids or []
|
412 |
log.info("Detected drug_ids: %s for query: %s", drug_ids, query[:50])
|
413 |
if not drug_ids:
|
414 |
log.warning("No drug_ids found; falling back to full corpus search.")
|
|
|
416 |
bm_results = []
|
417 |
if bm25:
|
418 |
scores = bm25.get_scores(tokenized_query)
|
|
|
419 |
scores_np = np.array(scores)
|
420 |
if np.max(scores_np) > np.min(scores_np):
|
421 |
scores_norm = (scores_np - np.min(scores_np)) / (np.max(scores_np) - np.min(scores_np))
|
|
|
451 |
candidates = [(i, sc, 0.0, 0.0) for i, sc in candidates]
|
452 |
reranked = rerank_results(query, candidates, sentences, reranker, top_k, RERANK_THRESHOLD)
|
453 |
idxs = [r["idx"] for r in reranked]
|
454 |
+
STATE.query_cache[cache_key] = {'idxs': idxs, 'time': time.time()}
|
455 |
return idxs
|
456 |
|
457 |
def build_context(idxs: List[int], sentences: List[str], meta: List[Dict[str, Any]]) -> str:
|
|
|
485 |
ts_parts.append("優先藥袋醫囑(如每日1顆,早餐後)。範圍 [Sxxx]。特殊:病人使用須知。")
|
486 |
trouble_shooting = " ".join(ts_parts)
|
487 |
return (
|
488 |
+
f"請根據提供內容,回答使用者問題。\n"
|
489 |
+
f"限制:- 回覆 ≤100字。- 條列 2–4 點,語氣精簡。- 不要附加「了解嗎」。若無關鍵依據,回答:「查無充分資料,建議詢問醫師/藥師」(≤60字)。\n"
|
490 |
f"問題:{query}\n"
|
491 |
f"參考片段:\n{contexts}\n"
|
492 |
)
|
|
|
545 |
|
546 |
def make_clarify_message(drug_name_hint: str = "") -> str:
|
547 |
msg = (
|
548 |
+
"請提供:\n"
|
549 |
+
"1. 藥名+劑型+劑量(例:普拿疼 500mg 錠)\n"
|
550 |
+
"2. 使用情境(症狀、時間、對象)\n"
|
551 |
+
"3. 你的問題(可否同用?多久可再吃?)\n"
|
|
|
|
|
|
|
|
|
552 |
)
|
553 |
if drug_name_hint:
|
554 |
+
msg = (
|
555 |
+
"目前無法識別特定藥名,我會先提供一般性建議。"
|
556 |
+
"請補充藥名、劑型與強度。\n" + msg
|
557 |
+
)
|
558 |
return msg + DISCLAIMER
|
559 |
|
560 |
+
# ---------- 新增: 藥名處理 ----------
|
561 |
+
def process_drug_names(drug_ids: List[str]) -> List[str]:
|
562 |
+
if not drug_ids:
|
563 |
+
log.warning("NO_DRUG_ID")
|
564 |
+
return []
|
565 |
+
|
566 |
+
if len(drug_ids) == 1:
|
567 |
+
log.info(f"單藥名模式: {drug_ids[0]}")
|
568 |
+
return drug_ids
|
569 |
+
else:
|
570 |
+
log.info(f"多藥名模式: {drug_ids}")
|
571 |
+
return drug_ids
|
572 |
+
|
573 |
+
# ---------- 新增: 回覆壓縮 ----------
|
574 |
+
def compress_reply(reply: str, max_len: int = 100) -> str:
|
575 |
+
if len(reply) <= max_len:
|
576 |
+
return reply
|
577 |
+
|
578 |
+
# 刪除修飾詞、合併句
|
579 |
+
reply = re.sub(r'(例如|比如|像是|可能會|有時候|通常|一般來說|另外|而且|因此|所以)', '', reply)
|
580 |
+
reply = re.sub(r'\s+', ' ', reply).strip()
|
581 |
+
reply = re.sub(r'(\.|\?|\!)\s*', r'\1 ', reply) # 合併句
|
582 |
+
compressed = reply[:max_len] + '...' if len(reply) > max_len else reply
|
583 |
+
log.info("回覆超長,自動壓縮")
|
584 |
+
return compressed
|
585 |
+
|
586 |
+
# ---------- 新增: 異常處理 ----------
|
587 |
+
def handle_error(code: str) -> str:
|
588 |
+
log.error(f"Pipeline error: {code}")
|
589 |
+
return "查無充分資料,建議詢問醫師/藥師"
|
590 |
+
|
591 |
+
# ---------- 新增: 全鏈路log ----------
|
592 |
+
def log_pipeline(user_id: str, query: str, parsed: Dict, candidates: List, drug_choices: List,
|
593 |
+
retrieval: Dict, sections: List, context: str, prompt: str, reply: str, error_code: str = None):
|
594 |
+
log.info(f"1. user_id: {user_id}, query: {query[:50] + '...' if len(query)>50 else query} (desensitized)")
|
595 |
+
log.info(f"2. parsed={parsed}")
|
596 |
+
log.info(f"3. candidates={candidates}")
|
597 |
+
log.info(f"4. drug_id_pick: {drug_choices} (每個候選的分數、淘汰原因 in find_drug_ids)")
|
598 |
+
log.info(f"5. retrieval: BM25_topN={retrieval.get('bm_top', 0)}, SEM_topN={retrieval.get('sem_top', 0)}, fused_top10={retrieval.get('fused_top10', [])}")
|
599 |
+
log.info(f"6. Injected sections: {sections}")
|
600 |
+
log.info(f"7. contexts_chars: {len(context)}, prompt_chars: {len(prompt)}, tokens: ~{len(prompt)//4}")
|
601 |
+
log.info(f"8. LLM: model={LM_MODEL}, time={retrieval.get('llm_time', 0):.2f}s, truncated={len(reply)>200}, output_chars={len(reply)}")
|
602 |
+
if error_code:
|
603 |
+
log.error(f"error_code={error_code}")
|
604 |
+
|
605 |
async def answer_pipeline(query: str, user_id: str) -> str:
|
606 |
+
log.info("Pipeline start for user_id: %s, query: %s", user_id, query[:50])
|
607 |
if not query or not isinstance(query, str):
|
608 |
+
return handle_error("INVALID_QUERY")
|
609 |
if not STATE.sentences:
|
610 |
+
return handle_error("NO_CORPUS")
|
611 |
session = STATE.user_sessions.get(user_id, {})
|
612 |
if "prev_query" in session and query.lower() in ["是", "否"]:
|
|
|
613 |
if query.lower() == "是":
|
614 |
return "太好了!若還有問題,請告訴我。" + DISCLAIMER
|
615 |
else:
|
616 |
return f"抱歉沒說明清楚。關於{session['prev_query']},請再說詳細點,或直接問醫師。" + DISCLAIMER
|
|
|
617 |
parsed = parse_user_message(query)
|
618 |
drug_name_hint = ""
|
619 |
if "fentanyl" in query.lower():
|
620 |
drug_name_hint = "你說的是 Fentanyl 經皮貼片(Duragesic)嗎?有寫幾 mcg/hr 嗎?"
|
621 |
+
candidates = extract_drug_candidates_from_query(query)
|
622 |
+
drug_ids = find_drug_ids_from_name(candidates, STATE.df_csv)
|
623 |
+
drug_choices = process_drug_names(drug_ids)
|
624 |
+
if not drug_choices:
|
625 |
+
log_pipeline(user_id, query, parsed, candidates, drug_choices, {}, [], "", "", "", "NO_DRUG_ID")
|
626 |
return make_clarify_message(drug_name_hint)
|
|
|
627 |
intents = detect_intent(query)
|
628 |
+
answers = []
|
629 |
+
retrieval = {'bm_top': 0, 'sem_top': 0, 'fused_top10': [], 'llm_time': 0}
|
630 |
+
sections = IMPORTANT_SECTIONS
|
631 |
+
context = ""
|
632 |
+
prompt = ""
|
633 |
+
reply = ""
|
634 |
+
try:
|
635 |
+
for did in drug_choices:
|
636 |
+
idxs = fuse_and_select(query, STATE.sentences, STATE.meta, STATE.bm25, STATE.faiss_index, STATE.emb_model, STATE.reranker_model, top_k=TOP_K_SENTENCES, drug_ids=[did])
|
637 |
+
if not idxs:
|
638 |
+
answers.append(handle_error("NO_CONTEXT"))
|
639 |
+
continue
|
640 |
+
context = build_context(idxs, STATE.sentences, STATE.meta)
|
641 |
+
prompt = build_prompt(query, context, intents)
|
642 |
+
t0 = time.time()
|
643 |
+
ans = call_llm(prompt)
|
644 |
+
retrieval['llm_time'] += time.time() - t0
|
645 |
+
if not ans:
|
646 |
+
answers.append(handle_error("LLM_ERROR"))
|
647 |
+
continue
|
648 |
+
ans = compress_reply(ans)
|
649 |
+
answers.append(ans)
|
650 |
+
reply = "\n".join(answers)
|
651 |
+
if len(drug_choices) > 1:
|
652 |
+
reply = "偵測到多個藥物,以下分別提供參考:\n" + reply
|
653 |
+
STATE.user_sessions[user_id] = {"prev_query": query}
|
654 |
+
except Exception as e:
|
655 |
+
log.warning("Pipeline 失敗:%s", e)
|
656 |
+
reply = handle_error("UNEXPECTED_ERROR")
|
657 |
+
log_pipeline(user_id, query, parsed, candidates, drug_choices, retrieval, sections, context, prompt, reply)
|
658 |
+
return reply
|
659 |
|
660 |
# ---------- LINE 驗簽與回覆 ----------
|
661 |
def verify_line_signature(body_bytes: bytes, signature: str) -> bool:
|
|
|
728 |
STATE.reranker_model = load_reranker_model(RERANKER_MODEL_ID)
|
729 |
STATE.faiss_index = ensure_faiss(FAISS_INDEX, STATE.sentences)
|
730 |
STATE.bm25 = ensure_bm25(BM25_PKL, STATE.sentences)
|
731 |
+
for m in STATE.meta:
|
732 |
+
sec = m.get("section", "其他")
|
733 |
+
m["section"] = SECTION_NORMALIZE.get(sec, sec)
|
734 |
if os.path.exists(CSV_PATH):
|
735 |
STATE.df_csv = pd.read_csv(CSV_PATH, dtype=str)
|
736 |
log.info("LLM via LiteLLM: base=%s model=%s", str(LITELLM_BASE_URL), str(LM_MODEL))
|