Spaces:

ThomasBasil
/

slm-customer-support-chatbot

Paused

App Files Files Community

BasilTh commited on 26 days ago

Commit

ae5323d

1 Parent(s): 938032f

Deploy updated SLM customer-support chatbot

Browse files

Files changed (1) hide show

SLM_CService.py +28 -32

SLM_CService.py CHANGED Viewed

@@ -66,50 +66,42 @@ chat_pipe = pipeline(
     tokenizer=tokenizer,
     trust_remote_code=True,
     return_full_text=False,
-)  # you pass **GEN_KW on each call; see HF pipelines docs. :contentReference[oaicite:3]{index=3}
 # ──────────────────────────────────────────────────────────────────────────────
 # Moderation & blocking (strict)
 from transformers import TextClassificationPipeline
-# Heuristic list (not exhaustive; we also add classifier + bad_words_ids)
 SEXUAL_TERMS = [
-    # single words
     "sex","sexual","porn","nsfw","fetish","kink","bdsm","nude","naked","anal",
     "blowjob","handjob","cum","breast","boobs","vagina","penis","semen","ejaculate",
     "doggy","missionary","cowgirl","69","kamasutra","dominatrix","submissive","spank",
-    # common phrases
     "sex position","have sex","make love","how to flirt","dominant in bed",
 ]
 def _bad_words_ids(tokenizer, terms: List[str]) -> List[List[int]]:
-    """Build bad_words_ids for generate(); add with/without leading space.
-    HF docs: pass bad_words_ids to prevent generating those tokens. :contentReference[oaicite:4]{index=4}
-    """
     ids = set()
     for t in terms:
-        for variant in (t, " " + t):
-            toks = tokenizer(variant, add_special_tokens=False).input_ids
             if toks:
                 ids.add(tuple(toks))
     return [list(t) for t in ids]
 BAD_WORD_IDS = _bad_words_ids(tokenizer, SEXUAL_TERMS)
-# Optional classifiers for extra safety (small, fast)
 nsfw_cls: TextClassificationPipeline = pipeline(
     "text-classification",
     model="eliasalbouzidi/distilbert-nsfw-text-classifier",
     truncation=True,
-)  # binary safe/nsfw. :contentReference[oaicite:5]{index=5}
 toxicity_cls: TextClassificationPipeline = pipeline(
     "text-classification",
     model="unitary/toxic-bert",
     truncation=True,
     return_all_scores=True,
-)  # multi-label toxicity. :contentReference[oaicite:6]{index=6}
 def is_sexual_or_toxic(text: str) -> bool:
     t = (text or "").lower()
@@ -117,14 +109,12 @@ def is_sexual_or_toxic(text: str) -> bool:
         return True
     try:
         res = nsfw_cls(t)[0]
-        label = (res.get("label") or "").lower()
-        score = float(res.get("score") or 0)
-        if label == "nsfw" and score > 0.60:
             return True
     except Exception:
         pass
     try:
-        scores = toxicity_cls(t)[0]  # list of {label, score}
         if any(item["score"] > 0.60 and item["label"].lower() in
                {"toxic","severe_toxic","obscene","threat","insult","identity_hate"} for item in scores):
             return True
@@ -136,7 +126,7 @@ REFUSAL = ("Sorry, I can’t help with that. I’m only for store support "
            "(orders, shipping, ETA, tracking, returns, warranty, account).")
 # ──────────────────────────────────────────────────────────────────────────────
-# Memory (yes, it’s deprecated; it still works—see migration note). :contentReference[oaicite:7]{index=7}
 memory = ConversationBufferMemory(return_messages=True)
 # System prompt = domain guardrails
@@ -179,14 +169,13 @@ def _lc_to_messages() -> List[Dict[str,str]]:
     return msgs
 def _generate_reply(user_input: str) -> str:
-    # format as chat (HF chat templating guide) :contentReference[oaicite:8]{index=8}
     messages = _lc_to_messages() + [{"role": "user", "content": user_input}]
     prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     out = chat_pipe(
         prompt,
         eos_token_id=tokenizer.eos_token_id,
         pad_token_id=tokenizer.pad_token_id,
-        bad_words_ids=BAD_WORD_IDS,  # block sexual tokens at generation time. :contentReference[oaicite:9]{index=9}
         **GEN_KW,
     )[0]["generated_text"]
     return out.strip()
@@ -198,20 +187,15 @@ def chat_with_memory(user_input: str) -> str:
     if not ui:
         return "How can I help with your order today?"
-    # 1) Block sexual/NSFW up front
     if is_sexual_or_toxic(ui):
         reply = REFUSAL
         memory.save_context({"input": ui}, {"output": reply})
         return reply
-    # 2) Support-only guard (light small-talk allowed)
     low = ui.lower()
-    if not any(k in low for k in ALLOWED_KEYWORDS) and not any(k in low for k in ("hi","hello","hey","thanks","thank you")):
-        reply = "I’m for store support only (orders, shipping, returns, warranty, account). How can I help with those?"
-        memory.save_context({"input": ui}, {"output": reply})
-        return reply
-    # 3) Quick intents
     if any(tok in low for tok in ["thank you","thanks","thx"]):
         reply = handle_gratitude()
         memory.save_context({"input": ui}, {"output": reply})
@@ -221,15 +205,26 @@ def chat_with_memory(user_input: str) -> str:
         memory.save_context({"input": ui}, {"output": reply})
         return reply
-    # 4) Order number handling + intents
     new_o = extract_order(ui)
     if new_o:
         stored_order = new_o
         if pending_intent in ("status","eta","track","link"):
             fn = {"status": handle_status,"eta": handle_eta,"track": handle_track,"link": handle_link}[pending_intent]
-            reply = fn(stored_order); pending_intent = None
-            memory.save_context({"input": ui}, {"output": reply}); return reply
     if any(k in low for k in ["status","where is my order","check status"]):
         intent = "status"
     elif any(k in low for k in ["how long","eta","delivery time"]):
@@ -241,6 +236,7 @@ def chat_with_memory(user_input: str) -> str:
     else:
         intent = "fallback"
     if intent in ("status","eta","track","link"):
         if not stored_order:
             pending_intent = intent
@@ -251,7 +247,7 @@ def chat_with_memory(user_input: str) -> str:
         memory.save_context({"input": ui}, {"output": reply})
         return reply
-    # 5) LLM fallback (still on-topic) + post-check for safety
     reply = _generate_reply(ui)
     if is_sexual_or_toxic(reply):
         reply = REFUSAL

     tokenizer=tokenizer,
     trust_remote_code=True,
     return_full_text=False,
+)
 # ──────────────────────────────────────────────────────────────────────────────
 # Moderation & blocking (strict)
 from transformers import TextClassificationPipeline
 SEXUAL_TERMS = [
     "sex","sexual","porn","nsfw","fetish","kink","bdsm","nude","naked","anal",
     "blowjob","handjob","cum","breast","boobs","vagina","penis","semen","ejaculate",
     "doggy","missionary","cowgirl","69","kamasutra","dominatrix","submissive","spank",
     "sex position","have sex","make love","how to flirt","dominant in bed",
 ]
 def _bad_words_ids(tokenizer, terms: List[str]) -> List[List[int]]:
     ids = set()
     for t in terms:
+        for v in (t, " " + t):
+            toks = tokenizer(v, add_special_tokens=False).input_ids
             if toks:
                 ids.add(tuple(toks))
     return [list(t) for t in ids]
 BAD_WORD_IDS = _bad_words_ids(tokenizer, SEXUAL_TERMS)
 nsfw_cls: TextClassificationPipeline = pipeline(
     "text-classification",
     model="eliasalbouzidi/distilbert-nsfw-text-classifier",
     truncation=True,
+)
 toxicity_cls: TextClassificationPipeline = pipeline(
     "text-classification",
     model="unitary/toxic-bert",
     truncation=True,
     return_all_scores=True,
+)
 def is_sexual_or_toxic(text: str) -> bool:
     t = (text or "").lower()
         return True
     try:
         res = nsfw_cls(t)[0]
+        if (res.get("label","").lower() == "nsfw") and float(res.get("score",0)) > 0.60:
             return True
     except Exception:
         pass
     try:
+        scores = toxicity_cls(t)[0]
         if any(item["score"] > 0.60 and item["label"].lower() in
                {"toxic","severe_toxic","obscene","threat","insult","identity_hate"} for item in scores):
             return True
            "(orders, shipping, ETA, tracking, returns, warranty, account).")
 # ──────────────────────────────────────────────────────────────────────────────
+# Memory (kept simple)
 memory = ConversationBufferMemory(return_messages=True)
 # System prompt = domain guardrails
     return msgs
 def _generate_reply(user_input: str) -> str:
     messages = _lc_to_messages() + [{"role": "user", "content": user_input}]
     prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     out = chat_pipe(
         prompt,
         eos_token_id=tokenizer.eos_token_id,
         pad_token_id=tokenizer.pad_token_id,
+        bad_words_ids=BAD_WORD_IDS,
         **GEN_KW,
     )[0]["generated_text"]
     return out.strip()
     if not ui:
         return "How can I help with your order today?"
+    # 1) Safety first
     if is_sexual_or_toxic(ui):
         reply = REFUSAL
         memory.save_context({"input": ui}, {"output": reply})
         return reply
     low = ui.lower()
+    # 2) Quick intents that don't depend on domain keywords
     if any(tok in low for tok in ["thank you","thanks","thx"]):
         reply = handle_gratitude()
         memory.save_context({"input": ui}, {"output": reply})
         memory.save_context({"input": ui}, {"output": reply})
         return reply
+    # 3) *** ORDER NUMBER FIRST *** (so follow-ups like "It's #4567" work)
     new_o = extract_order(ui)
     if new_o:
         stored_order = new_o
         if pending_intent in ("status","eta","track","link"):
             fn = {"status": handle_status,"eta": handle_eta,"track": handle_track,"link": handle_link}[pending_intent]
+            reply = fn(stored_order)
+            pending_intent = None
+            memory.save_context({"input": ui}, {"output": reply})
+            return reply
+        # No pending intent → fall through to classify what they want next.
+    # 4) Support-only guard (but SKIP if we just saw an order number or have a pending intent)
+    if pending_intent is None and new_o is None:
+        if not any(k in low for k in ALLOWED_KEYWORDS) and not any(k in low for k in ("hi","hello","hey")):
+            reply = "I’m for store support only (orders, shipping, returns, warranty, account). How can I help with those?"
+            memory.save_context({"input": ui}, {"output": reply})
+            return reply
+    # 5) Intent classification
     if any(k in low for k in ["status","where is my order","check status"]):
         intent = "status"
     elif any(k in low for k in ["how long","eta","delivery time"]):
     else:
         intent = "fallback"
+    # 6) Handle core intents
     if intent in ("status","eta","track","link"):
         if not stored_order:
             pending_intent = intent
         memory.save_context({"input": ui}, {"output": reply})
         return reply
+    # 7) LLM fallback (still on-topic) + post-check for safety
     reply = _generate_reply(ui)
     if is_sexual_or_toxic(reply):
         reply = REFUSAL