File size: 4,835 Bytes
f3b040f eff99d8 f3b040f 85c8b2b f3b040f 85c8b2b f3b040f 85c8b2b f3b040f 85c8b2b f3b040f 816e617 f3b040f 816e617 f3b040f 85c8b2b f3b040f 816e617 f3b040f 816e617 85c8b2b f3b040f 816e617 85c8b2b 816e617 f3b040f 816e617 f3b040f 816e617 f3b040f 816e617 f3b040f 816e617 f3b040f 85c8b2b f3b040f 816e617 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
# βββ SLM_CService.py βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
import os
# Fix for libgomp warning in Spaces
os.environ.pop("OMP_NUM_THREADS", None)
# 1) Unsloth must come first
import unsloth
import triton
import torch
from transformers import AutoTokenizer, BitsAndBytesConfig, pipeline
from peft import PeftModel
# 2) Simple in-memory convo buffer
# we keep alternating (user, assistant) tuples
conversation_history = []
# 3) Model + adapter path in your repo (copied into the Space repo root)
MODEL_DIR = "bitext-qlora-tinyllama"
# 4) Load tokenizer from local dir
tokenizer = AutoTokenizer.from_pretrained(
MODEL_DIR,
use_fast=False,
local_files_only=True
)
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "left"
tokenizer.truncation_side = "right"
# 5) QLoRA + Unsloth load in 4-bit
bnb_cfg = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16
)
# 5a) Base model
model = unsloth.FastLanguageModel.from_pretrained(
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
load_in_4bit=True,
quant_type="nf4",
device_map="auto",
trust_remote_code=True
)
# 5b) Attach your LoRA adapter
model = PeftModel.from_pretrained(model, MODEL_DIR, local_files_only=True)
# 6) HF text-gen pipeline
chat_pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
trust_remote_code=True,
return_full_text=False,
generation_kwargs={"max_new_tokens":128, "do_sample":True, "top_p":0.9, "temperature":0.7}
)
# 7) FSM helpers (your existing code unmodified)
import re
order_re = re.compile(r"#(\d{1,10})")
def extract_order(text: str):
m = order_re.search(text)
return m.group(1) if m else None
def handle_status(o): return f"Order #{o} is in transit and should arrive in 3β5 business days."
def handle_eta(o): return f"Delivery for order #{o} typically takes 3β5 days; you can track it at https://track.example.com/{o}"
def handle_track(o): return f"Track order #{o} here: https://track.example.com/{o}"
def handle_link(o): return f"Hereβs the latest tracking link for order #{o}: https://track.example.com/{o}"
def handle_return_policy(_=None):
return ("Our return policy allows returns of unused items in their original packaging "
"within 30 days of receipt. Would you like me to connect you with a human agent?")
def handle_gratitude(_=None):
return "Youβre welcome! Is there anything else I can help with?"
def handle_escalation(_=None):
return "Iβm sorry, I donβt have that information. Would you like me to connect you with a human agent?"
# 8) Core chat fn
stored_order = None
pending_intent = None
def chat_with_memory(user_input: str) -> str:
global stored_order, pending_intent
# A) Save into history
conversation_history.append(("User", user_input))
# B) New order?
new_o = extract_order(user_input)
if new_o:
stored_order = new_o
if pending_intent in ("status","eta","track","link"):
fn = {"status":handle_status,"eta":handle_eta,"track":handle_track,"link":handle_link}[pending_intent]
reply = fn(stored_order)
pending_intent = None
conversation_history.append(("Assistant", reply))
return reply
ui = user_input.lower().strip()
# C) Gratitude
if any(tok in ui for tok in ["thank you","thanks","thx"]):
reply = handle_gratitude()
conversation_history.append(("Assistant", reply))
return reply
# D) Return policy
if "return" in ui:
reply = handle_return_policy()
conversation_history.append(("Assistant", reply))
return reply
# E) Classify intent
if any(k in ui for k in ["status","where is my order","check status"]):
intent="status"
elif any(k in ui for k in ["how long","eta","delivery time"]):
intent="eta"
elif any(k in ui for k in ["how can i track","track my order","where is my package"]):
intent="track"
elif "tracking link" in ui or "resend" in ui:
intent="link"
else:
intent="fallback"
# F) Fulfill or ask order #
if intent in ("status","eta","track","link"):
if not stored_order:
pending_intent = intent
reply = "Sureβwhatβs your order number (e.g., #12345)?"
else:
fn = {"status":handle_status,"eta":handle_eta,"track":handle_track,"link":handle_link}[intent]
reply = fn(stored_order)
else:
reply = handle_escalation()
# G) Save & done
conversation_history.append(("Assistant", reply))
return reply
|