Spaces:

ThomasBasil
/

slm-customer-support-chatbot

Paused

BasilTh commited on 30 days ago

Commit

97664b5

1 Parent(s): aa36139

Deploy updated SLM customer-support chatbot

Files changed (1) hide show

SLM_CService.py CHANGED Viewed

@@ -1,11 +1,10 @@
 # ─── SLM_CService.py ─────────────────────────────────────────────────────────
 import os
 # Fix for libgomp warning in Spaces
-os.environ.pop("OMP_NUM_THREADS", None)
 # 1) Unsloth must come first
 import unsloth
-import triton
 import torch
 from transformers import AutoTokenizer, BitsAndBytesConfig, pipeline
@@ -39,7 +38,7 @@ bnb_cfg = BitsAndBytesConfig(
 model = unsloth.FastLanguageModel.from_pretrained(
     "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     load_in_4bit=True,
-    quant_type="nf4",
     device_map="auto",
     trust_remote_code=True
 )
@@ -53,7 +52,7 @@ chat_pipe = pipeline(
     tokenizer=tokenizer,
     trust_remote_code=True,
     return_full_text=False,
-    generation_kwargs={"max_new_tokens":128, "do_sample":True, "top_p":0.9, "temperature":0.7}
 )
 # 7) FSM helpers (your existing code unmodified)

 # ─── SLM_CService.py ─────────────────────────────────────────────────────────
 import os
 # Fix for libgomp warning in Spaces
+os.environ["OMP_NUM_THREADS"] = "1"
 # 1) Unsloth must come first
 import unsloth
 import torch
 from transformers import AutoTokenizer, BitsAndBytesConfig, pipeline
 model = unsloth.FastLanguageModel.from_pretrained(
     "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     load_in_4bit=True,
+    quantization_config=bnb_cfg,
     device_map="auto",
     trust_remote_code=True
 )
     tokenizer=tokenizer,
     trust_remote_code=True,
     return_full_text=False,
+    generate_kwargs={"max_new_tokens":128, "do_sample":True, "top_p":0.9, "temperature":0.7}
 )
 # 7) FSM helpers (your existing code unmodified)