slm-customer-support-chatbot / SLM_CService.py
BasilTh
Deploy updated SLM customer-support chatbot
bbe7b0d
raw
history blame
4.84 kB
# ─── SLM_CService.py ─────────────────────────────────────────────────────────
import os
# Fix for libgomp warning in Spaces
os.environ["OMP_NUM_THREADS"] = "1"
# 1) Unsloth must come first
import unsloth
import torch
from transformers import AutoTokenizer, BitsAndBytesConfig, pipeline
from peft import PeftModel
# 2) Simple in-memory convo buffer
# we keep alternating (user, assistant) tuples
conversation_history = []
# 3) Model + adapter path in your repo (copied into the Space repo root)
MODEL_DIR = "ThomasBasil/bitext-qlora-tinyllama"
# 4) Load tokenizer from local dir
tokenizer = AutoTokenizer.from_pretrained(
"ThomasBasil/bitext-qlora-tinyllama", use_fast=False
)
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "left"
tokenizer.truncation_side = "right"
# 5) QLoRA + Unsloth load in 4-bit
bnb_cfg = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16
)
# 5a) Base model
model = unsloth.FastLanguageModel.from_pretrained(
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
load_in_4bit=True,
quantization_config=bnb_cfg,
device_map="auto",
trust_remote_code=True
)
# 5b) Attach your LoRA adapter
model = PeftModel.from_pretrained(model, "ThomasBasil/bitext-qlora-tinyllama")
# 6) HF text-gen pipeline
chat_pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
trust_remote_code=True,
return_full_text=False,
generate_kwargs={"max_new_tokens":128, "do_sample":True, "top_p":0.9, "temperature":0.7}
)
# 7) FSM helpers (your existing code unmodified)
import re
order_re = re.compile(r"#(\d{1,10})")
def extract_order(text: str):
m = order_re.search(text)
return m.group(1) if m else None
def handle_status(o): return f"Order #{o} is in transit and should arrive in 3–5 business days."
def handle_eta(o): return f"Delivery for order #{o} typically takes 3–5 days; you can track it at https://track.example.com/{o}"
def handle_track(o): return f"Track order #{o} here: https://track.example.com/{o}"
def handle_link(o): return f"Here’s the latest tracking link for order #{o}: https://track.example.com/{o}"
def handle_return_policy(_=None):
return ("Our return policy allows returns of unused items in their original packaging "
"within 30 days of receipt. Would you like me to connect you with a human agent?")
def handle_gratitude(_=None):
return "You’re welcome! Is there anything else I can help with?"
def handle_escalation(_=None):
return "I’m sorry, I don’t have that information. Would you like me to connect you with a human agent?"
# 8) Core chat fn
stored_order = None
pending_intent = None
def chat_with_memory(user_input: str) -> str:
global stored_order, pending_intent
# A) Save into history
conversation_history.append(("User", user_input))
# B) New order?
new_o = extract_order(user_input)
if new_o:
stored_order = new_o
if pending_intent in ("status","eta","track","link"):
fn = {"status":handle_status,"eta":handle_eta,"track":handle_track,"link":handle_link}[pending_intent]
reply = fn(stored_order)
pending_intent = None
conversation_history.append(("Assistant", reply))
return reply
ui = user_input.lower().strip()
# C) Gratitude
if any(tok in ui for tok in ["thank you","thanks","thx"]):
reply = handle_gratitude()
conversation_history.append(("Assistant", reply))
return reply
# D) Return policy
if "return" in ui:
reply = handle_return_policy()
conversation_history.append(("Assistant", reply))
return reply
# E) Classify intent
if any(k in ui for k in ["status","where is my order","check status"]):
intent="status"
elif any(k in ui for k in ["how long","eta","delivery time"]):
intent="eta"
elif any(k in ui for k in ["how can i track","track my order","where is my package"]):
intent="track"
elif "tracking link" in ui or "resend" in ui:
intent="link"
else:
intent="fallback"
# F) Fulfill or ask order #
if intent in ("status","eta","track","link"):
if not stored_order:
pending_intent = intent
reply = "Sureβ€”what’s your order number (e.g., #12345)?"
else:
fn = {"status":handle_status,"eta":handle_eta,"track":handle_track,"link":handle_link}[intent]
reply = fn(stored_order)
else:
reply = handle_escalation()
# G) Save & done
conversation_history.append(("Assistant", reply))
return reply