Spaces:

ThomasBasil
/

slm-customer-support-chatbot

Paused

slm-customer-support-chatbot / SLM_CService.py

BasilTh

Deploy updated SLM customer-support chatbot

bbe7b0d about 1 month ago

4.84 kB

	# ─── SLM_CService.py ─────────────────────────────────────────────────────────
	import os
	# Fix for libgomp warning in Spaces
	os.environ["OMP_NUM_THREADS"] = "1"

	# 1) Unsloth must come first
	import unsloth
	import torch

	from transformers import AutoTokenizer, BitsAndBytesConfig, pipeline
	from peft import PeftModel

	# 2) Simple in-memory convo buffer
	# we keep alternating (user, assistant) tuples
	conversation_history = []

	# 3) Model + adapter path in your repo (copied into the Space repo root)
	MODEL_DIR = "ThomasBasil/bitext-qlora-tinyllama"

	# 4) Load tokenizer from local dir
	tokenizer = AutoTokenizer.from_pretrained(
	"ThomasBasil/bitext-qlora-tinyllama", use_fast=False
	)
	tokenizer.pad_token_id = tokenizer.eos_token_id
	tokenizer.padding_side = "left"
	tokenizer.truncation_side = "right"

	# 5) QLoRA + Unsloth load in 4-bit
	bnb_cfg = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_use_double_quant=True,
	bnb_4bit_compute_dtype=torch.bfloat16
	)
	# 5a) Base model
	model = unsloth.FastLanguageModel.from_pretrained(
	"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
	load_in_4bit=True,
	quantization_config=bnb_cfg,
	device_map="auto",
	trust_remote_code=True
	)
	# 5b) Attach your LoRA adapter
	model = PeftModel.from_pretrained(model, "ThomasBasil/bitext-qlora-tinyllama")

	# 6) HF text-gen pipeline
	chat_pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	trust_remote_code=True,
	return_full_text=False,
	generate_kwargs={"max_new_tokens":128, "do_sample":True, "top_p":0.9, "temperature":0.7}
	)

	# 7) FSM helpers (your existing code unmodified)
	import re
	order_re = re.compile(r"#(\d{1,10})")
	def extract_order(text: str):
	m = order_re.search(text)
	return m.group(1) if m else None

	def handle_status(o): return f"Order #{o} is in transit and should arrive in 3–5 business days."
	def handle_eta(o): return f"Delivery for order #{o} typically takes 3–5 days; you can track it at https://track.example.com/{o}"
	def handle_track(o): return f"Track order #{o} here: https://track.example.com/{o}"
	def handle_link(o): return f"Here’s the latest tracking link for order #{o}: https://track.example.com/{o}"
	def handle_return_policy(_=None):
	return ("Our return policy allows returns of unused items in their original packaging "
	"within 30 days of receipt. Would you like me to connect you with a human agent?")
	def handle_gratitude(_=None):
	return "You’re welcome! Is there anything else I can help with?"
	def handle_escalation(_=None):
	return "I’m sorry, I don’t have that information. Would you like me to connect you with a human agent?"

	# 8) Core chat fn
	stored_order = None
	pending_intent = None
	def chat_with_memory(user_input: str) -> str:
	global stored_order, pending_intent

	# A) Save into history
	conversation_history.append(("User", user_input))

	# B) New order?
	new_o = extract_order(user_input)
	if new_o:
	stored_order = new_o
	if pending_intent in ("status","eta","track","link"):
	fn = {"status":handle_status,"eta":handle_eta,"track":handle_track,"link":handle_link}[pending_intent]
	reply = fn(stored_order)
	pending_intent = None
	conversation_history.append(("Assistant", reply))
	return reply

	ui = user_input.lower().strip()

	# C) Gratitude
	if any(tok in ui for tok in ["thank you","thanks","thx"]):
	reply = handle_gratitude()
	conversation_history.append(("Assistant", reply))
	return reply

	# D) Return policy
	if "return" in ui:
	reply = handle_return_policy()
	conversation_history.append(("Assistant", reply))
	return reply

	# E) Classify intent
	if any(k in ui for k in ["status","where is my order","check status"]):
	intent="status"
	elif any(k in ui for k in ["how long","eta","delivery time"]):
	intent="eta"
	elif any(k in ui for k in ["how can i track","track my order","where is my package"]):
	intent="track"
	elif "tracking link" in ui or "resend" in ui:
	intent="link"
	else:
	intent="fallback"

	# F) Fulfill or ask order #
	if intent in ("status","eta","track","link"):
	if not stored_order:
	pending_intent = intent
	reply = "Sure—what’s your order number (e.g., #12345)?"
	else:
	fn = {"status":handle_status,"eta":handle_eta,"track":handle_track,"link":handle_link}[intent]
	reply = fn(stored_order)
	else:
	reply = handle_escalation()

	# G) Save & done
	conversation_history.append(("Assistant", reply))
	return reply