Spaces:

ThomasBasil
/

slm-customer-support-chatbot

Paused

File size: 11,946 Bytes

93d3bfa
7ceb07f
f3b040f
93d3bfa
 
938032f
77b14f6
7d9bb79
 
 
938032f
 
93d3bfa
f3b040f
 
77b14f6
f3b040f
93d3bfa
da2916f
 
938032f
 
7ceb07f
 
 
 
 
 
938032f
 
f3b040f
7ceb07f
 
 
 
85c8b2b
77b14f6
938032f
a58eed0
 
 
 
 
93d3bfa
7ceb07f
8eb6be4
7ceb07f
 
 
 
 
f3b040f
8eb6be4
7ceb07f
 
a58eed0
93d3bfa
77b14f6
7ceb07f
85c8b2b
7ceb07f
 
 
 
 
ae5323d
816e617
93d3bfa
7ceb07f
938032f
7ceb07f
938032f
 
 
 
 
 
da2916f
 
938032f
da2916f
 
 
938032f
 
 
 
da2916f
ae5323d
938032f
da2916f
ae5323d
938032f
 
da2916f
938032f
 
da2916f
 
938032f
ae5323d
da2916f
 
938032f
da2916f
938032f
7ceb07f
938032f
 
77b14f6
93d3bfa
da2916f
7ceb07f
 
938032f
da2916f
 
938032f
 
 
7ceb07f
938032f
 
7ceb07f
 
 
 
 
 
cd7eb0b
7ceb07f
938032f
da2916f
cd7eb0b
 
 
da2916f
f3b040f
 
85c8b2b
f3b040f
 
938032f
77b14f6
7ceb07f
 
cd7eb0b
7ceb07f
da2916f
 
 
938032f
 
7ceb07f
816e617
f3b040f
 
77b14f6
da2916f
 
 
 
 
 
 
 
938032f
 
 
da2916f
 
938032f
 
 
77b14f6
938032f
 
 
 
 
 
 
ae5323d
938032f
 
 
 
 
816e617
 
93d3bfa
938032f
 
 
7ceb07f
da2916f
 
 
 
 
 
938032f
 
 
 
 
77b14f6
f3b040f
da2916f
938032f
77b14f6
 
 
 
cd7eb0b
77b14f6
cd7eb0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
816e617
 
7ceb07f
 
 
ae5323d
cd7eb0b
 
 
 
 
ae5323d
cd7eb0b
938032f
77b14f6
938032f
77b14f6
938032f
77b14f6
938032f
77b14f6
7ceb07f
da2916f
7ceb07f
 
 
 
816e617
77b14f6
f3b040f
cd7eb0b
da2916f
f3b040f
 
 
816e617
da2916f
 
f3b040f
cd7eb0b
 
7ceb07f
 
 
cd7eb0b
 
7ceb07f
 
 
cd7eb0b
 
85c8b2b
cd7eb0b
938032f
da2916f
77b14f6
816e617

# ── SLM_CService.py ───────────────────────────────────────────────────────────
# Customer-support-only chatbot with strict NSFW blocking + robust FSM.

import os
import re
from typing import List, Dict

os.environ["OMP_NUM_THREADS"] = "1"
os.environ.pop("HF_HUB_OFFLINE", None)

# Unsloth must come before transformers/peft
import unsloth  # noqa: E402
import torch
from transformers import AutoTokenizer, BitsAndBytesConfig, pipeline
from peft import PeftModel
from langchain.memory import ConversationBufferMemory

# ──────────────────────────────────────────────────────────────────────────────
REPO = "ThomasBasil/bitext-qlora-tinyllama"
BASE = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

GEN_KW = dict(
    max_new_tokens=160,
    do_sample=True,
    top_p=0.9,
    temperature=0.7,
    repetition_penalty=1.1,
    no_repeat_ngram_size=4,
)

bnb_cfg = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
)

# ---- Tokenizer & model -------------------------------------------------------
tokenizer = AutoTokenizer.from_pretrained(REPO, use_fast=False)
if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
    tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "left"
tokenizer.truncation_side = "right"

# Unsloth returns (model, tokenizer) → unpack
model, _ = unsloth.FastLanguageModel.from_pretrained(
    model_name=BASE,
    load_in_4bit=True,
    quantization_config=bnb_cfg,
    device_map="auto",
    trust_remote_code=True,
)
unsloth.FastLanguageModel.for_inference(model)

# Apply your PEFT adapter from repo root
model = PeftModel.from_pretrained(model, REPO)
model.eval()

# Text-generation pipeline (pass gen params at call time)
chat_pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    trust_remote_code=True,
    return_full_text=False,
)

# ──────────────────────────────────────────────────────────────────────────────
# Moderation & blocking (strict)
from transformers import TextClassificationPipeline

SEXUAL_TERMS = [
    "sex","sexual","porn","nsfw","fetish","kink","bdsm","nude","naked","anal",
    "blowjob","handjob","cum","breast","boobs","vagina","penis","semen","ejaculate",
    "doggy","missionary","cowgirl","69","kamasutra","dominatrix","submissive","spank",
    "sex position","have sex","make love","how to flirt","dominant in bed",
]
def _bad_words_ids(tok, terms: List[str]) -> List[List[int]]:
    ids=set()
    for t in terms:
        for v in (t, " "+t):
            toks = tok(v, add_special_tokens=False).input_ids
            if toks: ids.add(tuple(toks))
    return [list(t) for t in ids]
BAD_WORD_IDS = _bad_words_ids(tokenizer, SEXUAL_TERMS)

nsfw_cls: TextClassificationPipeline = pipeline(
    "text-classification", model="eliasalbouzidi/distilbert-nsfw-text-classifier", truncation=True,
)
toxicity_cls: TextClassificationPipeline = pipeline(
    "text-classification", model="unitary/toxic-bert", truncation=True, return_all_scores=True,
)
def is_sexual_or_toxic(text: str) -> bool:
    t = (text or "").lower()
    if any(k in t for k in SEXUAL_TERMS): return True
    try:
        res = nsfw_cls(t)[0]
        if (res.get("label","").lower()=="nsfw") and float(res.get("score",0))>0.60: return True
    except Exception: pass
    try:
        scores = toxicity_cls(t)[0]
        if any(s["score"]>0.60 and s["label"].lower() in
               {"toxic","severe_toxic","obscene","threat","insult","identity_hate"} for s in scores):
            return True
    except Exception: pass
    return False

REFUSAL = ("Sorry, I can’t help with that. I’m only for store support "
           "(orders, shipping, ETA, tracking, returns, warranty, account).")

# ──────────────────────────────────────────────────────────────────────────────
# Memory + globals
memory = ConversationBufferMemory(return_messages=True)

SYSTEM_PROMPT = (
    "You are a customer-support assistant for our store. Only handle account, "
    "orders, shipping, delivery ETA, tracking links, returns/refunds, warranty, and store policy. "
    "If a request is out of scope or sexual/NSFW, refuse briefly and offer support options. "
    "Be concise and professional."
)

ALLOWED_KEYWORDS = (
    "order","track","status","delivery","shipping","ship","eta","arrive",
    "refund","return","exchange","warranty","guarantee","policy","account","billing",
    "address","cancel","help","support","agent","human"
)

# Robust order detection:
ORDER_RX = re.compile(
    r"(?:#\s*([\d]{3,12})|order(?:\s*(?:no\.?|number|id))?\s*#?\s*([\d]{3,12}))",
    flags=re.I,
)
def extract_order(text: str):
    if not text: return None
    m = ORDER_RX.search(text)
    return (m.group(1) or m.group(2)) if m else None

def handle_status(o): return f"Order #{o} is in transit and should arrive in 3–5 business days."
def handle_eta(o):    return f"Delivery for order #{o} typically takes 3–5 days; you can track it at https://track.example.com/{o}"
def handle_track(o):  return f"Track order #{o} here: https://track.example.com/{o}"
def handle_link(o):   return f"Here’s the latest tracking link for order #{o}: https://track.example.com/{o}"
def handle_return_policy(_=None):
    return ("Our return policy allows returns of unused items in original packaging within 30 days of receipt. "
            "Would you like me to connect you with a human agent?")
def handle_warranty_policy(_=None):
    return ("We provide a 1-year limited warranty against manufacturing defects. "
            "Within 30 days you can return or exchange; afterwards, warranty service applies. "
            "Need help starting a claim?")
def handle_cancel(o=None):
    return (f"I’ve submitted a cancellation request for order #{o}. If it has already shipped, "
            "we’ll process a return/refund once it’s back. You’ll receive a confirmation email shortly.")
def handle_gratitude(_=None): return "You’re welcome! Anything else I can help with?"
def handle_escalation(_=None): return "I can connect you with a human agent. Would you like me to do that?"
def handle_ask_action(o): return (f"I’ve saved order #{o}. What would you like to do — status, ETA, tracking link, or cancel?")

stored_order   = None
pending_intent = None

def reset_state():
    global stored_order, pending_intent
    stored_order = None
    pending_intent = None
    try: memory.clear()
    except Exception: pass
    return True

# ---- chat templating ---------------------------------------------------------
def _lc_to_messages() -> List[Dict[str,str]]:
    msgs = [{"role": "system", "content": SYSTEM_PROMPT}]
    hist = memory.load_memory_variables({}).get("chat_history", []) or []
    for m in hist:
        role = "user" if getattr(m, "type", "") == "human" else "assistant"
        msgs.append({"role": role, "content": getattr(m, "content", "")})
    return msgs

def _generate_reply(user_input: str) -> str:
    messages = _lc_to_messages() + [{"role": "user", "content": user_input}]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    out = chat_pipe(
        prompt,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id,
        bad_words_ids=BAD_WORD_IDS,
        **GEN_KW,
    )[0]["generated_text"]
    return out.strip()

# ---- main entry --------------------------------------------------------------
def chat_with_memory(user_input: str) -> str:
    global stored_order, pending_intent
    ui = (user_input or "").strip()
    if not ui:
        return "How can I help with your order today?"

    # Fresh session guard
    hist = memory.load_memory_variables({}).get("chat_history", []) or []
    if len(hist) == 0:
        stored_order = None
        pending_intent = None

    # 1) Safety
    if is_sexual_or_toxic(ui):
        reply = REFUSAL
        memory.save_context({"input": ui}, {"output": reply})
        return reply

    low = ui.lower()

    # 2) Quick intents
    if any(tok in low for tok in ["thank you","thanks","thx"]):
        reply = handle_gratitude()
        memory.save_context({"input": ui}, {"output": reply})
        return reply

    # 3) PENDING-INTENT SHORT-CIRCUIT (fix for "It's #26790" case)
    new_o = extract_order(ui)
    if pending_intent:
        if new_o:
            stored_order = new_o
            fn = {"status": handle_status, "eta": handle_eta,
                  "track": handle_track, "link": handle_link,
                  "cancel": handle_cancel}[pending_intent]
            reply = fn(stored_order)
            pending_intent = None
            memory.save_context({"input": ui}, {"output": reply})
            return reply
        # still waiting for an order number
        reply = "Got it—please share your order number (e.g., #12345)."
        memory.save_context({"input": ui}, {"output": reply})
        return reply

    # 4) If message provides an order number (no pending intent yet), save it & ask action
    if new_o:
        stored_order = new_o
        reply = handle_ask_action(stored_order)
        memory.save_context({"input": ui}, {"output": reply})
        return reply

    # 5) Support-only guard (message must be support-ish)
    if not any(k in low for k in ALLOWED_KEYWORDS) and not any(k in low for k in ("hi","hello","hey")):
        reply = "I’m for store support only (orders, shipping, returns, warranty, account). How can I help with those?"
        memory.save_context({"input": ui}, {"output": reply})
        return reply

    # 6) Intent classification
    if any(k in low for k in ["status","where is my order","check status"]):
        intent = "status"
    elif any(k in low for k in ["how long","eta","delivery time"]):
        intent = "eta"
    elif any(k in low for k in ["how can i track","track my order","where is my package","tracking"]):
        intent = "track"
    elif "tracking link" in low or "resend" in low or "link" in low:
        intent = "link"
    elif any(k in low for k in ["cancel","cancellation","abort order"]):
        intent = "cancel"
    elif any(k in low for k in ["warranty","guarantee","policy"]):
        intent = "warranty_policy"
    elif "return" in low:
        intent = "return_policy"
    else:
        intent = "fallback"

    # 7) Handle intents
    if intent in ("status","eta","track","link","cancel"):
        if not stored_order:
            pending_intent = intent
            reply = "Sure—what’s your order number (e.g., #12345)?"
        else:
            fn = {"status": handle_status,"eta": handle_eta,"track": handle_track,
                  "link": handle_link,"cancel": handle_cancel}[intent]
            reply = fn(stored_order)
        memory.save_context({"input": ui}, {"output": reply})
        return reply

    if intent == "warranty_policy":
        reply = handle_warranty_policy()
        memory.save_context({"input": ui}, {"output": reply})
        return reply

    if intent == "return_policy":
        reply = handle_return_policy()
        memory.save_context({"input": ui}, {"output": reply})
        return reply

    # 8) LLM fallback (on-topic) + post-check
    reply = _generate_reply(ui)
    if is_sexual_or_toxic(reply): reply = REFUSAL
    memory.save_context({"input": ui}, {"output": reply})
    return reply