# ─── SLM_CService.py ─────────────────────────────────────────────────────────
import os
# Fix for libgomp warning in Spaces
os.environ.pop("OMP_NUM_THREADS", None)

# 1) Unsloth must come first
import unsloth        
import triton
import torch

from transformers import AutoTokenizer, BitsAndBytesConfig, pipeline
from peft import PeftModel

# 2) Simple in-memory convo buffer
#    we keep alternating (user, assistant) tuples
conversation_history = []

# 3) Model + adapter path in your repo (copied into the Space repo root)
MODEL_DIR = "bitext-qlora-tinyllama"

# 4) Load tokenizer from local dir
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_DIR,
    use_fast=False,
    local_files_only=True
)
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "left"
tokenizer.truncation_side = "right"

# 5) QLoRA + Unsloth load in 4-bit
bnb_cfg = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)
# 5a) Base model
model = unsloth.FastLanguageModel.from_pretrained(
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    load_in_4bit=True,
    quant_type="nf4",
    device_map="auto",
    trust_remote_code=True
)
# 5b) Attach your LoRA adapter
model = PeftModel.from_pretrained(model, MODEL_DIR, local_files_only=True)

# 6) HF text-gen pipeline
chat_pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    trust_remote_code=True,
    return_full_text=False,
    generation_kwargs={"max_new_tokens":128, "do_sample":True, "top_p":0.9, "temperature":0.7}
)

# 7) FSM helpers (your existing code unmodified)
import re
order_re = re.compile(r"#(\d{1,10})")
def extract_order(text: str):
    m = order_re.search(text)
    return m.group(1) if m else None

def handle_status(o): return f"Order #{o} is in transit and should arrive in 3–5 business days."
def handle_eta(o):    return f"Delivery for order #{o} typically takes 3–5 days; you can track it at https://track.example.com/{o}"
def handle_track(o):  return f"Track order #{o} here: https://track.example.com/{o}"
def handle_link(o):   return f"Here’s the latest tracking link for order #{o}: https://track.example.com/{o}"
def handle_return_policy(_=None):
    return ("Our return policy allows returns of unused items in their original packaging "
            "within 30 days of receipt. Would you like me to connect you with a human agent?")
def handle_gratitude(_=None):
    return "You’re welcome! Is there anything else I can help with?"
def handle_escalation(_=None):
    return "I’m sorry, I don’t have that information. Would you like me to connect you with a human agent?"

# 8) Core chat fn
stored_order   = None
pending_intent = None
def chat_with_memory(user_input: str) -> str:
    global stored_order, pending_intent

    # A) Save into history
    conversation_history.append(("User", user_input))

    # B) New order?
    new_o = extract_order(user_input)
    if new_o:
        stored_order = new_o
        if pending_intent in ("status","eta","track","link"):
            fn = {"status":handle_status,"eta":handle_eta,"track":handle_track,"link":handle_link}[pending_intent]
            reply = fn(stored_order)
            pending_intent = None
            conversation_history.append(("Assistant", reply))
            return reply

    ui = user_input.lower().strip()

    # C) Gratitude
    if any(tok in ui for tok in ["thank you","thanks","thx"]):
        reply = handle_gratitude()
        conversation_history.append(("Assistant", reply))
        return reply

    # D) Return policy
    if "return" in ui:
        reply = handle_return_policy()
        conversation_history.append(("Assistant", reply))
        return reply

    # E) Classify intent
    if any(k in ui for k in ["status","where is my order","check status"]):
        intent="status"
    elif any(k in ui for k in ["how long","eta","delivery time"]):
        intent="eta"
    elif any(k in ui for k in ["how can i track","track my order","where is my package"]):
        intent="track"
    elif "tracking link" in ui or "resend" in ui:
        intent="link"
    else:
        intent="fallback"

    # F) Fulfill or ask order #
    if intent in ("status","eta","track","link"):
        if not stored_order:
            pending_intent = intent
            reply = "Sure—what’s your order number (e.g., #12345)?"
        else:
            fn = {"status":handle_status,"eta":handle_eta,"track":handle_track,"link":handle_link}[intent]
            reply = fn(stored_order)
    else:
        reply = handle_escalation()

    # G) Save & done
    conversation_history.append(("Assistant", reply))
    return reply