BasilTh
Deploy latest SLM customer-support chatbot
a621f88
raw
history blame
4.7 kB
import re
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from langchain.memory import ConversationBufferMemory
import torch, unsloth, triton
# ─── LOAD MODEL & TOKENIZER ─────────────────────────────────────────────
# Adjust paths or HF repo as needed
FINETUNED_DIR = "/content/drive/MyDrive/bitext-qlora-tinyllama"
bnb_cfg = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(FINETUNED_DIR, use_fast=False)
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "left"
tokenizer.truncation_side = "right"
model = AutoModelForCausalLM.from_pretrained(
FINETUNED_DIR,
quantization_config=bnb_cfg,
device_map="auto",
trust_remote_code=True
)
# ─── MEMORY & PIPELINE ─────────────────────────────────────────────────
memory = ConversationBufferMemory(memory_key="user_lines",
human_prefix="User",
ai_prefix="Assistant",
return_messages=False)
stored_order = None
pending_intent = None
chat_pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
trust_remote_code=True,
return_full_text=False
)
# ─── HELPERS & HANDLERS ────────────────────────────────────────────────
order_re = re.compile(r"#(\\d{1,10})")
def extract_order(text):
m = order_re.search(text)
return m.group(1) if m else None
def handle_status(o):
return f"Order #{o} is in transit and should arrive in 3–5 business days."
def handle_eta(o):
return (f"Delivery for order #{o} typically takes 3–5 days; "
f"you can track it at https://track.example.com/{o}")
def handle_track(o):
return f"Track order #{o} here: https://track.example.com/{o}"
def handle_link(o):
return f"Here’s the latest tracking link for order #{o}: https://track.example.com/{o}"
def handle_return_policy(_=None):
return ("Our return policy allows returns of unused items in their original packaging "
"within 30 days of receipt. Would you like me to connect you with a human agent?")
def handle_gratitude(_=None):
return "You’re welcome! Is there anything else I can help with?"
def handle_escalation(_=None):
return "I’m sorry, I don’t have that information. Would you like me to connect you with a human agent?"
# ─── MAIN CHAT FUNCTION ────────────────────────────────────────────────
def chat_with_memory(user_input: str) -> str:
global stored_order, pending_intent
memory.save_context({"input": user_input}, {"output": ""})
new_o = extract_order(user_input)
if new_o:
stored_order = new_o
if pending_intent in ("status","eta","track","link"):
fn = {"status":handle_status,"eta":handle_eta,
"track":handle_track,"link":handle_link}[pending_intent]
reply = fn(stored_order)
pending_intent = None
memory.save_context({"input": user_input}, {"output": reply})
return reply
ui = user_input.lower().strip()
if any(tok in ui for tok in ["thank you","thanks","thx"]):
reply = handle_gratitude()
elif "return" in ui:
reply = handle_return_policy()
elif any(k in ui for k in ["status","where is my order","check status"]):
intent = "status"
elif any(k in ui for k in ["how long","eta","delivery time"]):
intent = "eta"
elif any(k in ui for k in ["how can i track","track my order","where is my package"]):
intent = "track"
elif "tracking link" in ui or "resend" in ui:
intent = "link"
else:
intent = "fallback"
if intent in ("status","eta","track","link"):
if not stored_order:
pending_intent = intent
reply = "Sureβ€”what’s your order number (e.g., #12345)?"
else:
reply = {"status":handle_status,"eta":handle_eta,
"track":handle_track,"link":handle_link}[intent](stored_order)
else:
reply = handle_escalation()
memory.save_context({"input": user_input}, {"output": reply})
return reply