Spaces:

ThomasBasil
/

slm-customer-support-chatbot

Paused

File size: 10,629 Bytes

93d3bfa
da2916f
f3b040f
93d3bfa
 
938032f
77b14f6
7d9bb79
 
 
938032f
 
93d3bfa
f3b040f
 
77b14f6
f3b040f
93d3bfa
da2916f
 
938032f
 
da2916f
 
938032f
 
f3b040f
da2916f
 
85c8b2b
77b14f6
938032f
a58eed0
 
 
 
 
93d3bfa
8eb6be4
da2916f
 
f3b040f
8eb6be4
a58eed0
93d3bfa
77b14f6
85c8b2b
da2916f
 
ae5323d
816e617
93d3bfa
da2916f
938032f
 
 
 
 
 
 
da2916f
 
938032f
da2916f
 
 
938032f
 
 
 
da2916f
ae5323d
938032f
da2916f
ae5323d
938032f
 
da2916f
938032f
 
da2916f
 
938032f
ae5323d
da2916f
 
938032f
da2916f
938032f
 
 
77b14f6
93d3bfa
da2916f
 
938032f
da2916f
 
938032f
 
 
 
 
 
 
 
816e617
938032f
da2916f
 
 
f3b040f
 
85c8b2b
f3b040f
 
938032f
77b14f6
da2916f
 
 
938032f
 
816e617
da2916f
f3b040f
 
77b14f6
da2916f
 
 
 
 
 
 
 
 
 
938032f
 
 
da2916f
 
938032f
 
 
77b14f6
938032f
 
 
 
 
 
 
ae5323d
938032f
 
 
 
 
816e617
 
93d3bfa
938032f
 
 
da2916f
 
 
 
 
 
 
938032f
 
 
 
 
77b14f6
f3b040f
da2916f
938032f
77b14f6
 
 
 
 
 
 
 
da2916f
77b14f6
816e617
 
da2916f
 
 
 
 
ae5323d
da2916f
ae5323d
 
 
 
 
 
da2916f
938032f
77b14f6
938032f
77b14f6
938032f
77b14f6
938032f
77b14f6
da2916f
 
816e617
77b14f6
f3b040f
da2916f
f3b040f
 
 
816e617
da2916f
 
f3b040f
77b14f6
 
85c8b2b
da2916f
938032f
da2916f
77b14f6
816e617

# ── SLM_CService.py ───────────────────────────────────────────────────────────
# Customer-support-only chatbot with strict NSFW blocking + proper Reset.

import os
import re
from typing import List, Dict

os.environ["OMP_NUM_THREADS"] = "1"
os.environ.pop("HF_HUB_OFFLINE", None)

# Unsloth must come before transformers/peft
import unsloth  # noqa: E402
import torch
from transformers import AutoTokenizer, BitsAndBytesConfig, pipeline
from peft import PeftModel
from langchain.memory import ConversationBufferMemory

# ──────────────────────────────────────────────────────────────────────────────
REPO = "ThomasBasil/bitext-qlora-tinyllama"
BASE = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

GEN_KW = dict(
    max_new_tokens=160, do_sample=True, top_p=0.9, temperature=0.7,
    repetition_penalty=1.1, no_repeat_ngram_size=4,
)

bnb_cfg = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.float16,
)

# ---- Tokenizer & model -------------------------------------------------------
tokenizer = AutoTokenizer.from_pretrained(REPO, use_fast=False)
if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None:
    tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "left"
tokenizer.truncation_side = "right"

model, _ = unsloth.FastLanguageModel.from_pretrained(
    model_name=BASE, load_in_4bit=True, quantization_config=bnb_cfg,
    device_map="auto", trust_remote_code=True,
)
unsloth.FastLanguageModel.for_inference(model)
model = PeftModel.from_pretrained(model, REPO)
model.eval()

chat_pipe = pipeline(
    "text-generation", model=model, tokenizer=tokenizer,
    trust_remote_code=True, return_full_text=False,
)

# ──────────────────────────────────────────────────────────────────────────────
# Moderation (unchanged from your last good version)
from transformers import TextClassificationPipeline
SEXUAL_TERMS = [
    "sex","sexual","porn","nsfw","fetish","kink","bdsm","nude","naked","anal",
    "blowjob","handjob","cum","breast","boobs","vagina","penis","semen","ejaculate",
    "doggy","missionary","cowgirl","69","kamasutra","dominatrix","submissive","spank",
    "sex position","have sex","make love","how to flirt","dominant in bed",
]
def _bad_words_ids(tok, terms: List[str]) -> List[List[int]]:
    ids=set()
    for t in terms:
        for v in (t, " "+t):
            toks = tok(v, add_special_tokens=False).input_ids
            if toks: ids.add(tuple(toks))
    return [list(t) for t in ids]
BAD_WORD_IDS = _bad_words_ids(tokenizer, SEXUAL_TERMS)

nsfw_cls: TextClassificationPipeline = pipeline(
    "text-classification", model="eliasalbouzidi/distilbert-nsfw-text-classifier", truncation=True,
)
toxicity_cls: TextClassificationPipeline = pipeline(
    "text-classification", model="unitary/toxic-bert", truncation=True, return_all_scores=True,
)
def is_sexual_or_toxic(text: str) -> bool:
    t = (text or "").lower()
    if any(k in t for k in SEXUAL_TERMS): return True
    try:
        res = nsfw_cls(t)[0]
        if (res.get("label","").lower()=="nsfw") and float(res.get("score",0))>0.60: return True
    except Exception: pass
    try:
        scores = toxicity_cls(t)[0]
        if any(s["score"]>0.60 and s["label"].lower() in
               {"toxic","severe_toxic","obscene","threat","insult","identity_hate"} for s in scores):
            return True
    except Exception: pass
    return False
REFUSAL = ("Sorry, I can’t help with that. I’m only for store support "
           "(orders, shipping, ETA, tracking, returns, warranty, account).")

# ──────────────────────────────────────────────────────────────────────────────
# Memory + globals
memory = ConversationBufferMemory(return_messages=True)  # has .clear() :contentReference[oaicite:2]{index=2}
SYSTEM_PROMPT = (
    "You are a customer-support assistant for our store. Only handle account, "
    "orders, shipping, delivery ETA, tracking links, returns/refunds, warranty, and store policy. "
    "If a request is out of scope or sexual/NSFW, refuse briefly and offer support options. "
    "Be concise and professional."
)
ALLOWED_KEYWORDS = (
    "order","track","status","delivery","shipping","ship","eta","arrive",
    "refund","return","exchange","warranty","guarantee","account","billing",
    "address","cancel","policy","help","support","agent","human"
)

order_re = re.compile(r"#(\d{1,10})")
def extract_order(text: str):
    m = order_re.search(text); return m.group(1) if m else None

def handle_status(o): return f"Order #{o} is in transit and should arrive in 3–5 business days."
def handle_eta(o):    return f"Delivery for order #{o} typically takes 3–5 days; you can track it at https://track.example.com/{o}"
def handle_track(o):  return f"Track order #{o} here: https://track.example.com/{o}"
def handle_link(o):   return f"Here’s the latest tracking link for order #{o}: https://track.example.com/{o}"
def handle_return_policy(_=None):
    return ("Our return policy allows returns of unused items in original packaging within 30 days of receipt. "
            "Would you like me to connect you with a human agent?")
def handle_cancel(o=None):
    return (f"I’ve submitted a cancellation request for order #{o}. If it has already shipped, "
            "we’ll process a return/refund once it’s back. You’ll receive a confirmation email shortly.")
def handle_gratitude(_=None): return "You’re welcome! Anything else I can help with?"
def handle_escalation(_=None): return "I can connect you with a human agent. Would you like me to do that?"

# >>> state that must reset <<<
stored_order   = None
pending_intent = None

# public reset hook (called from app.py)
def reset_state():
    global stored_order, pending_intent
    stored_order = None
    pending_intent = None
    # clear conversation buffer (official API) :contentReference[oaicite:3]{index=3}
    try: memory.clear()
    except Exception: pass
    return True

# ---- chat templating ---------------------------------------------------------
def _lc_to_messages() -> List[Dict[str,str]]:
    msgs = [{"role": "system", "content": SYSTEM_PROMPT}]
    hist = memory.load_memory_variables({}).get("chat_history", []) or []
    for m in hist:
        role = "user" if getattr(m, "type", "") == "human" else "assistant"
        msgs.append({"role": role, "content": getattr(m, "content", "")})
    return msgs

def _generate_reply(user_input: str) -> str:
    messages = _lc_to_messages() + [{"role": "user", "content": user_input}]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    out = chat_pipe(
        prompt,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id,
        bad_words_ids=BAD_WORD_IDS,
        **GEN_KW,
    )[0]["generated_text"]
    return out.strip()

# ---- main entry --------------------------------------------------------------
def chat_with_memory(user_input: str) -> str:
    global stored_order, pending_intent
    ui = (user_input or "").strip()
    if not ui:
        return "How can I help with your order today?"

    # If memory is empty, start clean (fresh session)
    hist = memory.load_memory_variables({}).get("chat_history", []) or []
    if len(hist) == 0:
        stored_order = None
        pending_intent = None

    # 1) Safety
    if is_sexual_or_toxic(ui):
        reply = REFUSAL
        memory.save_context({"input": ui}, {"output": reply})
        return reply

    low = ui.lower()

    # 2) Quick intents
    if any(tok in low for tok in ["thank you","thanks","thx"]):
        reply = handle_gratitude()
        memory.save_context({"input": ui}, {"output": reply})
        return reply
    if "return" in low:
        reply = handle_return_policy()
        memory.save_context({"input": ui}, {"output": reply})
        return reply

    # 3) Order number FIRST
    new_o = extract_order(ui)
    if new_o:
        stored_order = new_o
        if pending_intent in ("status","eta","track","link","cancel"):
            fn = {"status": handle_status,"eta": handle_eta,"track": handle_track,
                  "link": handle_link,"cancel": handle_cancel}[pending_intent]
            reply = fn(stored_order); pending_intent = None
            memory.save_context({"input": ui}, {"output": reply}); return reply

    # 4) Support-only guard (skip if pending intent or new order number)
    if pending_intent is None and new_o is None:
        if not any(k in low for k in ALLOWED_KEYWORDS) and not any(k in low for k in ("hi","hello","hey")):
            reply = "I’m for store support only (orders, shipping, returns, warranty, account). How can I help with those?"
            memory.save_context({"input": ui}, {"output": reply})
            return reply

    # 5) Intents (added 'cancel')
    if any(k in low for k in ["status","where is my order","check status"]):
        intent = "status"
    elif any(k in low for k in ["how long","eta","delivery time"]):
        intent = "eta"
    elif any(k in low for k in ["how can i track","track my order","where is my package","tracking"]):
        intent = "track"
    elif "tracking link" in low or "resend" in low or "link" in low:
        intent = "link"
    elif "cancel" in low:
        intent = "cancel"
    else:
        intent = "fallback"

    if intent in ("status","eta","track","link","cancel"):
        if not stored_order:
            pending_intent = intent
            reply = "Sure—what’s your order number (e.g., #12345)?"
        else:
            fn = {"status": handle_status,"eta": handle_eta,"track": handle_track,
                  "link": handle_link,"cancel": handle_cancel}[intent]
            reply = fn(stored_order)
        memory.save_context({"input": ui}, {"output": reply})
        return reply

    # 6) LLM fallback (on-topic) + post-check
    reply = _generate_reply(ui)
    if is_sexual_or_toxic(reply): reply = REFUSAL
    memory.save_context({"input": ui}, {"output": reply})
    return reply