Spaces:

pcreem
/

brown-cafe

Sleeping

App Files Files Community

Song commited on 23 days ago

Commit

032fccb

1 Parent(s): 81f1ba7

hi

Browse files

Files changed (1) hide show

app.py +194 -277

app.py CHANGED Viewed

@@ -92,7 +92,7 @@ RERANK_THRESHOLD  = 0.5
 MAX_CONTEXT_CHARS = 8000
 DISCLAIMER = "此回覆僅供參考，請遵循醫師/藥師指示。"
-# 藥名映射與停用詞（擴充）
 DRUG_NAME_MAPPING = {
     "fentanyl patch": "fentanyl",
     "spiriva respimat": "spiriva",
@@ -108,6 +108,7 @@ DRUG_NAME_MAPPING = {
     "芬太尼貼片": "fentanyl",
     "透皮止痛貼片": "fentanyl",
 }
 DRUG_STOPWORDS = {"藥", "劑", "錠", "膠囊", "糖漿", "乳膏", "貼片", "含錠", "膜衣錠", "緩釋錠", "滴劑", "懸液", "注射液",
                   "吸入劑", "噴霧", "噴霧劑", "吸入器", "注射筆", "藥水", "小袋", "條", "包", "瓶", "外用", "口服"}
@@ -146,7 +147,7 @@ SECTION_WEIGHTS = {
 }
 IMPORTANT_SECTIONS = ["用法及用量", "病人使用須知", "包裝及儲存", "不良反應", "警語及注意事項"]
-DOSAGE_FORM_BOOST = 1.2 # NEW: 劑型匹配的權重提升
 # ---------- 路徑工具 ----------
 def pick_existing_or_tmp(candidates: List[str]) -> str:
@@ -321,7 +322,7 @@ def ensure_bm25(pkl_path: str, sentences: List[str]) -> Optional[Any]:
         try:
             with open(pkl_path, "rb") as f:
                 bm = pickle.load(f)
-            # MODIFIED: BM25 has corpus, not corpus_size attribute
             n_bm = len(bm.corpus) if hasattr(bm, 'corpus') else 0
             if n_bm == len(sentences):
                 log.info("Loaded BM25: %s (n=%d)", pkl_path, n_bm)
@@ -336,168 +337,216 @@ def ensure_bm25(pkl_path: str, sentences: List[str]) -> Optional[Any]:
     safe_pickle_dump(bm, pkl_path)
     return bm
-# ---------- 資訊解析與藥名處理 (MODIFIED & NEW) ----------
-def parse_user_message(query: str) -> Dict[str, Any]:
     """
-    NEW: 使用更強的正規表示式從使用者問題中提取結構化資訊
     """
-    parsed = {
-        "drug_name": "", "strength": "", "dosage_form": "", "question": query, "raw_query": query
     }
-    # Regex to find drug name, strength (e.g., 500mg, 25 mcg/hr), and dosage form
-    # It tries to find a noun-like part followed by numbers/units and another noun-like part
-    drug_pattern = re.compile(
-        r"([\u4e00-\u9fa5a-zA-Z\s\d\.\-]+?)"  # Drug name (non-greedy)
-        r"[:：\s]*?"                         # Optional separator
-        r"(\d+(?:\.\d+)?\s*(?:mg|mcg|µg|g|mcg/hr|mg/hr|iu|國際單位))?"  # Strength (optional)
-        r"[\s]*?"                            # Optional space
-        r"([\u4e00-\u9fa5a-zA-Z]+(?:劑|錠|膠囊|糖漿|乳膏|貼片|噴劑|噴霧|吸入劑))?" # Dosage form (optional)
-    , re.I)
-    match = drug_pattern.search(query)
-    question_part = query
-    if match:
-        drug_name = (match.group(1) or "").strip()
-        strength = (match.group(2) or "").strip()
-        dosage_form = (match.group(3) or "").strip()
-        # Clean up drug name from common question words if they are at the end
-        drug_name = re.sub(r"(怎麼用|怎麼吃|副作用|的用法)$", "", drug_name).strip()
-        parsed.update({"drug_name": drug_name, "strength": strength, "dosage_form": dosage_form})
-        # The rest of the query is the question
-        question_part = query[match.end():].strip()
-        if not question_part:
-             question_part = query # fallback if parsing consumes whole string
-    parsed["question"] = question_part
-    log.info("Parsed user message: %s", parsed)
-    return parsed
-def find_drug_candidates(parsed_info: Dict[str, Any], df: pd.DataFrame) -> List[Dict[str, Any]]:
-    """
-    MODIFIED: Find drug candidates based on parsed info and return a ranked list of dicts.
-    """
-    if df is None or df.empty or not parsed_info.get("drug_name"):
-        return []
-    query_drug_name = parsed_info["drug_name"].lower()
-    query_dosage_form = parsed_info["dosage_form"]
-    # Use jieba to get core drug name
-    tokens = tokenize_zh(query_drug_name)
-    candidates = [t.lower() for t in tokens if len(t) > 1 and t not in DRUG_STOPWORDS]
-    candidates.append(query_drug_name) # also search the raw name
-    candidates = list(set([DRUG_NAME_MAPPING.get(c, c) for c in candidates]))
-    drug_bucket = []
-    # Get all unique drug_ids and their names to avoid iterating the whole dataframe repeatedly
-    unique_drugs = df.drop_duplicates(subset=['drug_id'])
-    for cand in candidates:
-        for _, row in unique_drugs.iterrows():
-            name_joined = f"{(row.get('drug_name_zh') or '').lower()} {(row.get('drug_name_en') or '').lower()} {(row.get('drug_name_norm') or '').lower()}".strip()
-            if not fuzz:
-                raw_score = 100 if cand in name_joined else 0
-            else:
-                raw_score = max(
-                    fuzz.token_set_ratio(cand, name_joined),
-                    fuzz.partial_ratio(cand, name_joined)
-                )
-            if raw_score >= 85:
-                # Boost score for English name match, length, and dosage form match
-                score = raw_score
-                if re.search(r'[a-zA-Z]', cand):
-                    score *= 1.2
-                if query_dosage_form and query_dosage_form in name_joined:
-                    score *= 1.1
-                drug_bucket.append({
-                    "score": score,
-                    "drug_id": row["drug_id"],
-                    "drug_name_zh": row.get('drug_name_zh'),
-                    "drug_name_en": row.get('drug_name_en'),
-                    "matched_term": cand
-                })
-    if not drug_bucket:
-        return []
-    # Sort and deduplicate results
-    sorted_bucket = sorted(drug_bucket, key=lambda x: x['score'], reverse=True)
-    seen_ids = set()
-    unique_top = []
-    for item in sorted_bucket:
-        if item['drug_id'] not in seen_ids:
-            unique_top.append(item)
-            seen_ids.add(item['drug_id'])
-    log.info(f"Found drug candidates: {unique_top[:5]}")
-    return unique_top[:5] # Return top 5 candidates
-def select_best_drug_candidate(candidates: List[Dict[str, Any]]) -> Union[Dict[str, Any], str, None]:
     """
-    NEW: Logic to decide if we have a clear winner or need clarification.
     """
-    if not candidates:
-        return None
-    # Case 1: The top candidate has a very high score
-    if candidates[0]['score'] >= 95:
-        # Check if score difference is big enough or if it's the only one.
-        if len(candidates) == 1 or (candidates[0]['score'] - candidates[1]['score'] > 10):
-            return candidates[0]
-    # Case 2: One candidate is clearly the best, but score isn't super high
-    if len(candidates) > 1 and (candidates[0]['score'] - candidates[1]['score']) > 10:
-        return candidates[0]
-    # Case 3: Multiple candidates are close in score, ask for clarification
-    options = [f"「{c.get('drug_name_zh') or c.get('drug_name_en')}」" for c in candidates[:3]]
-    return f"請問您指的是以下哪一種藥物？\n- " + "\n- ".join(options)
-# ---------- 意圖偵測 ----------
-def detect_intent(query: str) -> List[str]:
-    intents = []
-    for cat, kws in INTENT_KEYWORDS.items():
-        if any(kw in query for kw in kws):
-            intents.append(cat)
-    return intents or ["其他"]
-# ---------- 檢索 ----------
-def rerank_results(query: str, candidates: List[Tuple[int, float, float, float]], sentences: List[str], reranker: Optional[Any], top_k: int, threshold: float) -> List[Dict[str, Any]]:
     try:
-        candidates = sorted(candidates, key=lambda x: -x[1])[:top_k * 2]  # 限20
-        if not candidates:
-            return []
-        valid_indices = [i for (i, *_ ) in candidates if 0 <= i < len(sentences)]
-        if not valid_indices:
-            return []
-        if reranker is None:
-            return [{"idx": i, "score": fused} for i, fused, _, _ in candidates]
-        pairs = [[query, sentences[i]] for i in valid_indices]
-        if not pairs:
-            return []
-        scores = reranker.predict(pairs, show_progress_bar=False)
-        reranked = [{"idx": valid_indices[j], "score": float(scores[j])} for j in range(len(scores)) if float(scores[j]) >= threshold]
-        if not reranked:
-            reranked = [{"idx": i, "score": fused} for i, fused, _, _ in candidates]
-        return sorted(reranked, key=lambda x: -x["score"])[:top_k]
     except Exception as e:
-        log.warning("Rerank failed: %s", e)
-        return [{"idx": i, "score": fused} for i, fused, _, _ in sorted(candidates, key=lambda x: -x[1])[:top_k]]  # fallback
-# MODIFIED: fuse_and_select now accepts parsed_info to boost scores based on dosage form
-def fuse_and_select(query: str, sentences: List[str], meta: List[Dict[str, Any]], bm25: Optional[Any], index: Optional[Any], emb_model: Optional[Any], reranker: Optional[Any], top_k: int = 10, drug_id: str = None, parsed_info: Dict[str, Any] = None) -> List[int]:
-    clean_query = parsed_info.get("question", query).strip().lower()
     cache_key = clean_query + str(drug_id)
     if cache_key in STATE.query_cache and time.time() - STATE.query_cache[cache_key]['time'] < 180:
         log.info("Cache hit for query: %s", clean_query[:50])
@@ -532,19 +581,14 @@ def fuse_and_select(query: str, sentences: List[str], meta: List[Dict[str, Any]]
                 scores[i] = scores.get(i, 0.0) + SEM_WEIGHT * (1.0 / (1 + rank))
     # Apply boosts
-    query_dosage_form = parsed_info.get("dosage_form") if parsed_info else ""
     for i in list(scores.keys()): # Iterate over a copy of keys
         meta_item = meta[i]
         # Section weight boost
         sec = meta_item.get("section", "其他")
         scores[i] *= SECTION_WEIGHTS.get(sec, 1.0)
-        # NEW: Dosage form boost
-        if query_dosage_form and query_dosage_form in sentences[i]:
-            scores[i] *= DOSAGE_FORM_BOOST
-    # NEW: Boost based on detected intent
     detected_intents = detect_intent(clean_query)
     for i in list(scores.keys()):
         meta_item = meta[i]
@@ -573,7 +617,6 @@ def fuse_and_select(query: str, sentences: List[str], meta: List[Dict[str, Any]]
     STATE.query_cache[cache_key] = {'idxs': idxs, 'time': time.time()}
     return idxs
 def build_context(idxs: List[int], sentences: List[str], meta: List[Dict[str, Any]]) -> str:
     ctx_lines, total_len, seen = [], 0, set()
     for i in idxs:
@@ -582,139 +625,13 @@ def build_context(idxs: List[int], sentences: List[str], meta: List[Dict[str, An
         if text in seen: continue
         chunk_id = meta[i].get("chunk_id", "None")
         section = meta[i].get("section", "未知章節")
-        line = f"[{section}]: {text}" # MODIFIED: Show section name for better context
         if total_len + len(line) > MAX_CONTEXT_CHARS: break
         ctx_lines.append(line)
         total_len += len(line) + 1
         seen.add(text)
     return "\n".join(ctx_lines) or "[未知章節]: 沒有找到相關資料，請諮詢醫師或藥師。"
-# MODIFIED: Prompt now includes structured patient info
-def build_prompt(parsed_info: Dict[str, Any], contexts: str, drug_choice: Dict[str, Any]) -> str:
-    patient_context_parts = []
-    if parsed_info.get('strength'):
-        patient_context_parts.append(f"劑量: {parsed_info['strength']}")
-    if parsed_info.get('dosage_form'):
-        patient_context_parts.append(f"劑型: {parsed_info['dosage_form']}")
-    patient_context_str = " ".join(patient_context_parts)
-    if not patient_context_str:
-        patient_context_str = "未提供"
-    return (
-        "你是一位專業、有同理心的藥師。請根據提供的「參考片段」，並考量「病患已知資訊」，簡潔地回答使用者的「問題」。\n"
-        "---限制---\n"
-        "- 絕對忠於「參考片段」，不可捏造或過度推論。你的知識僅限於提供的片段。\n"
-        "- 回覆少於 120 字，並使用繁體中文條列式 2-4 點說明。\n"
-        "- 語氣親切、精簡、專業。\n"
-        "- 若片段中無足夠資訊回答，必須回覆：「根據提供的資料，我無法找到關於您問題的明確答案，建議您諮詢醫師或藥師。」\n"
-        "---輸入資訊---\n"
-        f"藥物名稱: {drug_choice.get('drug_name_zh') or drug_choice.get('drug_name_en')}\n"
-        f"病患已知資訊: {patient_context_str}\n"
-        f"問題: {parsed_info.get('raw_query')}\n\n"
-        f"參考片段:\n{contexts}\n"
-        "---你的回答---"
-    )
-def call_llm(prompt: str, max_tokens: int = 2048) -> Optional[str]:
-    try:
-        from openai import OpenAI
-    except Exception as e:
-        log.warning("openai client 不可用：%s", e)
-        return None
-    if not (LITELLM_API_KEY and LM_MODEL and LITELLM_BASE_URL):
-        log.warning("LLM 未完整設定；略過生成。")
-        return None
-    client = OpenAI(base_url=LITELLM_BASE_URL, api_key=LITELLM_API_KEY)
-    try:
-        t0 = time.time()
-        resp = client.chat.completions.create(
-            model=LM_MODEL,
-            messages=[{"role": "user", "content": prompt}], # MODIFIED: Simplified to user role only, as system prompt is now part of the main prompt
-            temperature=0.1, # MODIFIED: slightly lower temperature for more deterministic answers
-            timeout=15, # MODIFIED: slightly longer timeout
-            max_tokens=max_tokens,
-        )
-        used = time.time() - t0
-        log.info("LLM ok (%.2fs)", used)
-        return (resp.choices[0].message.content or "").strip()
-    except Exception as e:
-        log.warning("LLM 失敗：%s", e)
-        return None
-def make_clarify_message() -> str:
-    # MODIFIED: More generic clarification message
-    msg = (
-        "我需要更多資訊才能準確回答，請您提供：\n"
-        "1. 完整的藥物名稱\n"
-        "2. 劑量和劑型（例如：普拿疼 500mg 錠劑）\n"
-        "3. 您的具體問題\n\n"
-        f"{DISCLAIMER}"
-    )
-    return msg
-def handle_error(code: str) -> str:
-    log.error(f"Pipeline error: {code}")
-    return f"抱歉，系統暫時無法回覆 ({code})。請諮詢醫師或藥師。{DISCLAIMER}"
-# ---------- 主流程 (MODIFIED) ----------
-async def answer_pipeline(query: str, user_id: str) -> str:
-    log.info("Pipeline start for user_id: %s, query: %s", user_id, query[:50])
-    if not query or not isinstance(query, str):
-        return handle_error("INVALID_QUERY")
-    if not STATE.sentences:
-        return handle_error("NO_CORPUS")
-    # 1. 解析使用者輸入
-    parsed_info = parse_user_message(query)
-    # 2. 尋找藥物候選
-    drug_candidates = find_drug_candidates(parsed_info, STATE.df_csv)
-    # 3. 選擇最佳藥物或要求澄清
-    drug_choice_or_clarification = select_best_drug_candidate(drug_candidates)
-    if drug_choice_or_clarification is None:
-        log.warning("No confident drug match found.")
-        return make_clarify_message()
-    if isinstance(drug_choice_or_clarification, str): # It's a clarification message
-        log.info("Requesting clarification from user.")
-        return drug_choice_or_clarification + f"\n\n{DISCLAIMER}"
-    drug_choice = drug_choice_or_clarification
-    log.info("Selected drug: %s", drug_choice)
-    # 4. 檢索相關內文
-    idxs = fuse_and_select(
-        query=parsed_info["raw_query"],
-        sentences=STATE.sentences,
-        meta=STATE.meta,
-        bm25=STATE.bm25,
-        index=STATE.faiss_index,
-        emb_model=STATE.emb_model,
-        reranker=STATE.reranker_model,
-        top_k=TOP_K_SENTENCES,
-        drug_id=drug_choice['drug_id'],
-        parsed_info=parsed_info
-    )
-    if not idxs:
-        return handle_error("NO_CONTEXT")
-    # 5. 建立上下文和 Prompt
-    context = build_context(idxs, STATE.sentences, STATE.meta)
-    prompt = build_prompt(parsed_info, context, drug_choice)
-    log.info("Generated Prompt:\n%s", prompt)
-    # 6. 呼叫 LLM 生成答案
-    answer = call_llm(prompt)
-    if not answer:
-        return handle_error("LLM_ERROR")
-    return f"{answer}\n\n{DISCLAIMER}"
 # ---------- LINE 驗簽與回覆 ----------
 def verify_line_signature(body_bytes: bytes, signature: str) -> bool:
     if not CHANNEL_SECRET:
@@ -801,4 +718,4 @@ async def health():
 if __name__ == "__main__":
     import uvicorn
     port = int(os.getenv("PORT", "7860"))
-    uvicorn.run("app:app", host="0.0.0.0", port=port, log_level=LOG_LEVEL.lower(), reload=False)

 MAX_CONTEXT_CHARS = 8000
 DISCLAIMER = "此回覆僅供參考，請遵循醫師/藥師指示。"
+# 藥名映射與停用詞
 DRUG_NAME_MAPPING = {
     "fentanyl patch": "fentanyl",
     "spiriva respimat": "spiriva",
     "芬太尼貼片": "fentanyl",
     "透皮止痛貼片": "fentanyl",
 }
 DRUG_STOPWORDS = {"藥", "劑", "錠", "膠囊", "糖漿", "乳膏", "貼片", "含錠", "膜衣錠", "緩釋錠", "滴劑", "懸液", "注射液",
                   "吸入劑", "噴霧", "噴霧劑", "吸入器", "注射筆", "藥水", "小袋", "條", "包", "瓶", "外用", "口服"}
 }
 IMPORTANT_SECTIONS = ["用法及用量", "病人使用須知", "包裝及儲存", "不良反應", "警語及注意事項"]
+# 移除 DOSAGE_FORM_BOOST
 # ---------- 路徑工具 ----------
 def pick_existing_or_tmp(candidates: List[str]) -> str:
         try:
             with open(pkl_path, "rb") as f:
                 bm = pickle.load(f)
+            # BM25 has corpus, not corpus_size attribute
             n_bm = len(bm.corpus) if hasattr(bm, 'corpus') else 0
             if n_bm == len(sentences):
                 log.info("Loaded BM25: %s (n=%d)", pkl_path, n_bm)
     safe_pickle_dump(bm, pkl_path)
     return bm
+# ---------- 資訊解析與藥名處理 (簡化) ----------
+# 1. parse_user_message: 簡化為只比對藥名
+def parse_user_message(query: str, df: pd.DataFrame) -> Dict[str, Any]:
     """
+    MODIFIED: 只比對 drug_name_norm，找最佳藥品。
     """
+    best_drug = None
+    best_row = None
+    max_score = 0
+    if not fuzz:
+        log.warning("fuzzywuzzy not available; skipping fuzzy match.")
+        return {
+            "drug_name": None,
+            "drug_id": None,
+            "question": query,
+        }
+    # Use a pre-tokenized and normalized list for faster fuzzy matching
+    # In a real app, this should be pre-computed and stored for efficiency
+    unique_drugs = df.drop_duplicates(subset=['drug_id'])
+    # Check for direct match first
+    query_lower = query.lower().strip()
+    direct_match = unique_drugs[unique_drugs['drug_name_norm'].str.lower() == query_lower]
+    if not direct_match.empty:
+        best_row = direct_match.iloc[0]
+        best_drug = best_row["drug_name_norm"]
+        log.info(f"Direct match found: {best_drug}")
+    else:
+        for _, row in unique_drugs.iterrows():
+            drug_norm = (row.get('drug_name_norm') or "").lower()
+            score = fuzz.token_set_ratio(query_lower, drug_norm)
+            if score > max_score:
+                max_score = score
+                best_drug = drug_norm
+                best_row = row
+    if best_drug is None or max_score < 80: # 設定一個閾值來避免不相關的匹配
+        log.warning(f"No confident drug match found (score: {max_score})")
+        return {
+            "drug_name": None,
+            "drug_id": None,
+            "question": query,
+        }
+    log.info(f"Parsed user message (best match): {best_drug}, score: {max_score}")
+    return {
+      "drug_name": best_drug,
+      "drug_id": best_row["drug_id"],
+      "question": query
     }
+# 2. find_drug_candidates: 簡化為單純 fuzzy 比對
+def find_drug_candidates(parsed_info: Dict[str, Any], df: pd.DataFrame, top_k: int = 5) -> List[Dict[str, Any]]:
+    """
+    MODIFIED: 單純對 drug_name_norm 做 fuzzy 比對，並回傳前 top_k 候選。
+    """
+    query_text = parsed_info.get("question", "").lower()
+    if df is None or df.empty or not query_text:
+        return []
+    if not fuzz:
+        return []
+    candidates_list = []
+    unique_drugs = df.drop_duplicates(subset=['drug_id'])
+    for _, row in unique_drugs.iterrows():
+        drug_norm = (row.get('drug_name_norm') or "").lower()
+        score = fuzz.token_set_ratio(query_text, drug_norm)
+        candidates_list.append({
+            "drug_id": row["drug_id"],
+            "drug_name": drug_norm,
+            "score": score
+        })
+    # 依 score 排序並回傳前 top_k
+    sorted_candidates = sorted(candidates_list, key=lambda x: x['score'], reverse=True)
+    log.info(f"Found drug candidates: {sorted_candidates[:top_k]}")
+    return sorted_candidates[:top_k]
+# 3. answer_pipeline: 簡化流程
+async def answer_pipeline(query: str, user_id: str) -> str:
+    log.info("Pipeline start for user_id: %s, query: %s", user_id, query[:50])
+    if not query or not isinstance(query, str):
+        return handle_error("INVALID_QUERY")
+    if not STATE.sentences or not STATE.df_csv:
+        return handle_error("NO_CORPUS")
+    # 1. 解析使用者輸入並找到最佳藥品
+    best_drug_info = parse_user_message(query, STATE.df_csv)
+    if not best_drug_info.get("drug_id"):
+        log.warning("No confident drug match found.")
+        return make_clarify_message()
+    # 2. 呼叫 find_drug_candidates 產生候選清單
+    drug_candidates = find_drug_candidates(best_drug_info, STATE.df_csv)
+    # 3. 依 score >= 95 或與次高分差距 > 10 判斷是否選定最佳藥品
+    top_score = drug_candidates[0]['score'] if drug_candidates else 0
+    second_score = drug_candidates[1]['score'] if len(drug_candidates) > 1 else 0
+    if top_score >= 95 or (top_score - second_score) > 10:
+        log.info("Confidently selected drug: %s", best_drug_info['drug_name'])
+        drug_choice = best_drug_info
+    else:
+        log.info("Scores are too close, requesting clarification.")
+        options = [f"「{c.get('drug_name')}」" for c in drug_candidates[:3]]
+        return f"請問您指的是以下哪一種藥物？\n- " + "\n- ".join(options) + f"\n\n{DISCLAIMER}"
+    # 4. 檢索相關內文 (fuse_and_select)
+    idxs = fuse_and_select(
+        query=best_drug_info["question"],
+        sentences=STATE.sentences,
+        meta=STATE.meta,
+        bm25=STATE.bm25,
+        index=STATE.faiss_index,
+        emb_model=STATE.emb_model,
+        reranker=STATE.reranker_model,
+        top_k=TOP_K_SENTENCES,
+        drug_id=drug_choice['drug_id'],
+        # 移除 parsed_info
+    )
+    if not idxs:
+        return handle_error("NO_CONTEXT")
+    # 5. 建立上下文和 Prompt (build_prompt)
+    context = build_context(idxs, STATE.sentences, STATE.meta)
+    prompt = build_prompt(best_drug_info, context, drug_choice)
+    log.info("Generated Prompt:\n%s", prompt)
+    # 6. 呼叫 LLM 生成答案
+    answer = call_llm(prompt)
+    if not answer:
+        return handle_error("LLM_ERROR")
+    return f"{answer}\n\n{DISCLAIMER}"
+# 4. build_prompt: 簡化提示詞
+def build_prompt(parsed_info: Dict[str, Any], contexts: str, drug_choice: Dict[str, Any]) -> str:
     """
+    MODIFIED: 簡化為只包含藥品名稱、使用者問題、參考片段。
     """
+    return (
+        "你是一位專業、有同理心的藥師。請根據提供的「參考片段」，簡潔地回答使用者的「問題」。\n"
+        "---限制---\n"
+        "- 絕對忠於「參考片段」，不可捏造或過度推論。你的知識僅限於提供的片段。\n"
+        "- 回覆少於 120 字，並使用繁體中文條列式 2-4 點說明。\n"
+        "- 語氣親切、精簡、專業。\n"
+        "- 若片段中無足夠資訊回答，必須回覆：「根據提供的資料，我無法找到關於您問題的明確答案，建議您諮詢醫師或藥師。」\n"
+        "---輸入資訊---\n"
+        f"藥物名稱: {drug_choice.get('drug_name')}\n"
+        f"問題: {parsed_info.get('question')}\n\n"
+        f"參考片段:\n{contexts}\n"
+        "---你的回答---"
+    )
+def call_llm(prompt: str, max_tokens: int = 2048) -> Optional[str]:
+    try:
+        from openai import OpenAI
+    except Exception as e:
+        log.warning("openai client 不可用：%s", e)
+        return None
+    if not (LITELLM_API_KEY and LM_MODEL and LITELLM_BASE_URL):
+        log.warning("LLM 未完整設定；略過生成。")
+        return None
+    client = OpenAI(base_url=LITELLM_BASE_URL, api_key=LITELLM_API_KEY)
     try:
+        t0 = time.time()
+        resp = client.chat.completions.create(
+            model=LM_MODEL,
+            messages=[{"role": "user", "content": prompt}],
+            temperature=0.1,
+            timeout=15,
+            max_tokens=max_tokens,
+        )
+        used = time.time() - t0
+        log.info("LLM ok (%.2fs)", used)
+        return (resp.choices[0].message.content or "").strip()
     except Exception as e:
+        log.warning("LLM 失敗：%s", e)
+        return None
+def make_clarify_message() -> str:
+    msg = (
+        "我需要更多資訊才能準確回答，請您提供：\n"
+        "1. 完整的藥物名稱\n"
+        "2. 您的具體問題\n\n"
+        f"{DISCLAIMER}"
+    )
+    return msg
+def handle_error(code: str) -> str:
+    log.error(f"Pipeline error: {code}")
+    return f"抱歉，系統暫時無法回覆 ({code})。請諮詢醫師或藥師。{DISCLAIMER}"
+# 5. fuse_and_select: 移除劑型加權
+def fuse_and_select(query: str, sentences: List[str], meta: List[Dict[str, Any]], bm25: Optional[Any], index: Optional[Any], emb_model: Optional[Any], reranker: Optional[Any], top_k: int = 10, drug_id: str = None) -> List[int]:
+    """
+    MODIFIED: 移除劑型加權。只保留 BM25/FAISS 融合 + 章節加權 + 意圖加權。
+    """
+    clean_query = query.strip().lower()
     cache_key = clean_query + str(drug_id)
     if cache_key in STATE.query_cache and time.time() - STATE.query_cache[cache_key]['time'] < 180:
         log.info("Cache hit for query: %s", clean_query[:50])
                 scores[i] = scores.get(i, 0.0) + SEM_WEIGHT * (1.0 / (1 + rank))
     # Apply boosts
     for i in list(scores.keys()): # Iterate over a copy of keys
         meta_item = meta[i]
         # Section weight boost
         sec = meta_item.get("section", "其他")
         scores[i] *= SECTION_WEIGHTS.get(sec, 1.0)
+    # Boost based on detected intent
     detected_intents = detect_intent(clean_query)
     for i in list(scores.keys()):
         meta_item = meta[i]
     STATE.query_cache[cache_key] = {'idxs': idxs, 'time': time.time()}
     return idxs
 def build_context(idxs: List[int], sentences: List[str], meta: List[Dict[str, Any]]) -> str:
     ctx_lines, total_len, seen = [], 0, set()
     for i in idxs:
         if text in seen: continue
         chunk_id = meta[i].get("chunk_id", "None")
         section = meta[i].get("section", "未知章節")
+        line = f"[{section}]: {text}"
         if total_len + len(line) > MAX_CONTEXT_CHARS: break
         ctx_lines.append(line)
         total_len += len(line) + 1
         seen.add(text)
     return "\n".join(ctx_lines) or "[未知章節]: 沒有找到相關資料，請諮詢醫師或藥師。"
 # ---------- LINE 驗簽與回覆 ----------
 def verify_line_signature(body_bytes: bytes, signature: str) -> bool:
     if not CHANNEL_SECRET:
 if __name__ == "__main__":
     import uvicorn
     port = int(os.getenv("PORT", "7860"))
+    uvicorn.run("app:app", host="0.0.0.0", port=port, log_level=LOG_LEVEL.lower(), reload=False)