Spaces:
Sleeping
Sleeping
Song
commited on
Commit
·
b20c534
1
Parent(s):
032fccb
hi
Browse files
app.py
CHANGED
@@ -114,13 +114,12 @@ DRUG_STOPWORDS = {"藥", "劑", "錠", "膠囊", "糖漿", "乳膏", "貼片", "
|
|
114 |
|
115 |
# 意圖分類(改用字典提升匹配率)
|
116 |
INTENT_KEYWORDS = {
|
117 |
-
"
|
118 |
-
"保存/攜帶 (Storage & Handling)": ["保存", "儲存", "攜帶", "冷藏", "室溫", "潮濕", "保冰袋"],
|
119 |
-
"副作用/異常 (Side Effects / Issues)": ["副作用", "異常", "拉肚子", "
|
120 |
-
"
|
121 |
-
"
|
122 |
-
"
|
123 |
-
"禁忌症/適應症 (Contraindications/Indications)": ["禁忌", "適應症", "不能用", "適合"],
|
124 |
}
|
125 |
|
126 |
# 章節權重
|
@@ -240,6 +239,15 @@ def tokenize_zh(s: str) -> List[str]:
|
|
240 |
if jieba is None: return s.strip().split()
|
241 |
return [t for t in jieba.lcut(s) if t.strip() and t not in DRUG_STOPWORDS]
|
242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
class State:
|
244 |
sentences: List[str] = []
|
245 |
meta: List[Dict[str, Any]] = []
|
@@ -594,13 +602,12 @@ def fuse_and_select(query: str, sentences: List[str], meta: List[Dict[str, Any]]
|
|
594 |
meta_item = meta[i]
|
595 |
sec = meta_item.get("section", "其他")
|
596 |
|
597 |
-
for intent in
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
603 |
-
scores[i] *= 1.5
|
604 |
|
605 |
# Inject important sections if they are missing
|
606 |
for sec in IMPORTANT_SECTIONS:
|
|
|
114 |
|
115 |
# 意圖分類(改用字典提升匹配率)
|
116 |
INTENT_KEYWORDS = {
|
117 |
+
"如何用藥 (Administration)": ["操作", "使用", "怎麼用", "怎麼吃", "怎麼貼", "怎麼喝", "怎麼注射", "服用", "組裝", "安裝", "用藥方式"],
|
118 |
+
"保存/攜帶 (Storage & Handling)": ["保存", "儲存", "攜帶", "冷藏", "室溫", "潮濕", "保冰袋", "旅遊"],
|
119 |
+
"副作用/異常 (Side Effects / Issues)": ["副作用", "異常", "不良反應", "頭暈", "拉肚子", "噁心", "想吐", "過敏", "問題"],
|
120 |
+
"劑量調整 (Dosage Adjustment)": ["劑量", "幾顆", "調整", "忘記吃", "上限", "幾次", "劑量多少"],
|
121 |
+
"用藥時間 (Timing)": ["時間", "多久", "間隔", "飯前", "飯後", "隨餐", "睡前", "什麼時候"],
|
122 |
+
"禁忌症/適應症 (Contraindications/Indications)": ["禁忌", "適應症", "不能用", "不適合", "誰不能吃", "適合"],
|
|
|
123 |
}
|
124 |
|
125 |
# 章節權重
|
|
|
239 |
if jieba is None: return s.strip().split()
|
240 |
return [t for t in jieba.lcut(s) if t.strip() and t not in DRUG_STOPWORDS]
|
241 |
|
242 |
+
def detect_intent(query: str) -> List[str]:
|
243 |
+
"""Detects user intent based on keywords."""
|
244 |
+
detected = []
|
245 |
+
query_lower = query.lower().replace(" ", "")
|
246 |
+
for intent, keywords in INTENT_KEYWORDS.items():
|
247 |
+
if any(k in query_lower for k in keywords):
|
248 |
+
detected.append(intent)
|
249 |
+
return detected
|
250 |
+
|
251 |
class State:
|
252 |
sentences: List[str] = []
|
253 |
meta: List[Dict[str, Any]] = []
|
|
|
602 |
meta_item = meta[i]
|
603 |
sec = meta_item.get("section", "其他")
|
604 |
|
605 |
+
if any(intent in detected_intents for intent in ["如何用藥 (Administration)", "用藥時間 (Timing)", "劑量調整 (Dosage Adjustment)"]) and sec in ["用法及用量", "病人使用須知"]:
|
606 |
+
scores[i] *= 1.5
|
607 |
+
elif any(intent in detected_intents for intent in ["保存/攜帶 (Storage & Handling)"]) and sec in ["儲存條件", "包裝及儲存"]:
|
608 |
+
scores[i] *= 1.5
|
609 |
+
elif any(intent in detected_intents for intent in ["副作用/異常 (Side Effects / Issues)"]) and sec in ["不良反應", "警語及注意事項"]:
|
610 |
+
scores[i] *= 1.5
|
|
|
611 |
|
612 |
# Inject important sections if they are missing
|
613 |
for sec in IMPORTANT_SECTIONS:
|