Spaces:

HungBB
/

egov-bot-backend

Running

App Files Files Community

khoaaaaa commited on 14 days ago

Commit

4bdaf32

verified ·

1 Parent(s): 465670d

update llm_classifier_follow_up

Browse files

Files changed (1) hide show

app.py +14 -30

app.py CHANGED Viewed

@@ -27,6 +27,7 @@ from rank_bm25 import BM25Okapi
 import google.generativeai as genai
 from cachetools import TTLCache
 from huggingface_hub import login, hf_hub_download
 # --- Login ---
 HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
@@ -130,37 +131,20 @@ def minmax_scale(arr):
         return np.zeros_like(arr)
     return (arr - np.min(arr)) / (np.max(arr) - np.min(arr))
 def classify_followup(text: str):
-    text = text.lower().strip()
-    score = 0
-    strong_followup_keywords = [
-        r"\b(nó|cái (này|đó|ấy)|thủ tục (này|đó|ấy))\b",
-        r"\b(vừa (nói|hỏi)|trước đó|ở trên|phía trên)\b",
-        r"\b(tiếp theo|tiếp|còn nữa|ngoài ra)\b",
-        r"\b(thế (thì|à)|vậy (thì|à)|như vậy)\b"
-    ]
-    # SỬA LỖI: Thêm "lệ phí" và "chuẩn bị" vào đây
-    detail_questions = [
-        r"\b(mất bao lâu|thời gian|bao nhiêu tiền|chi phí|phí|lệ phí)\b",
-        r"\b(ở đâu|tại đâu|chỗ nào|địa chỉ)\b",
-        r"\b(cần (gì|những gì)|yêu cầu|điều kiện|chuẩn bị)\b"
-    ]
-    specific_services = [
-        r"\b(làm|cấp|gia hạn|đổi|đăng ký)\s+(căn cước|cmnd|cccd)\b",
-        r"\b(làm|cấp|gia hạn|đổi)\s+hộ chiếu\b",
-        r"\b(đăng ký)\s+(kết hôn|sinh|tử|hộ khẩu)\b"
-    ]
-    if any(re.search(p, text) for p in strong_followup_keywords):
-        score -= 5 # Tăng điểm phạt
-    if any(re.search(p, text) for p in detail_questions):
-        score -= 4 # Tăng điểm phạt
-    if any(re.search(p, text) for p in specific_services):
-        score += 1 # Giảm điểm cộng
-    if len(text.split()) <= 3: # Giảm ngưỡng độ dài
-        score -= 1
-    return 0 if score < 0 else 1
 def retrieve(query: str, top_k=TOP_K):
     print("Retrieving using FAISS -> BM25 Rerank method on CHUNKS...")

 import google.generativeai as genai
 from cachetools import TTLCache
 from huggingface_hub import login, hf_hub_download
+from transformers import pipeline
 # --- Login ---
 HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or os.environ.get("HUGGINGFACE_HUB_TOKEN")
         return np.zeros_like(arr)
     return (arr - np.min(arr)) / (np.max(arr) - np.min(arr))
+classifier = pipeline(
+    "text-classification",
+    model="Qwen/Qwen2-0.5B-Instruct",
+    device_map="auto"
+)
 def classify_followup(text: str):
+    prompt = f"""
+    Xác định xem câu sau có phải là follow-up (câu hỏi tiếp nối từ ngữ cảnh trước đó) hay không.
+    Trả lời duy nhất: 0 (không) hoặc 1 (có).
+    Câu: "{text}"
+    """
+    result = classifier(prompt, truncation=True)[0]["label"]
+    return 1 if "1" in result else 0
 def retrieve(query: str, top_k=TOP_K):
     print("Retrieving using FAISS -> BM25 Rerank method on CHUNKS...")