BasilTh commited on
Commit
97664b5
Β·
1 Parent(s): aa36139

Deploy updated SLM customer-support chatbot

Browse files
Files changed (1) hide show
  1. SLM_CService.py +3 -4
SLM_CService.py CHANGED
@@ -1,11 +1,10 @@
1
  # ─── SLM_CService.py ─────────────────────────────────────────────────────────
2
  import os
3
  # Fix for libgomp warning in Spaces
4
- os.environ.pop("OMP_NUM_THREADS", None)
5
 
6
  # 1) Unsloth must come first
7
  import unsloth
8
- import triton
9
  import torch
10
 
11
  from transformers import AutoTokenizer, BitsAndBytesConfig, pipeline
@@ -39,7 +38,7 @@ bnb_cfg = BitsAndBytesConfig(
39
  model = unsloth.FastLanguageModel.from_pretrained(
40
  "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
41
  load_in_4bit=True,
42
- quant_type="nf4",
43
  device_map="auto",
44
  trust_remote_code=True
45
  )
@@ -53,7 +52,7 @@ chat_pipe = pipeline(
53
  tokenizer=tokenizer,
54
  trust_remote_code=True,
55
  return_full_text=False,
56
- generation_kwargs={"max_new_tokens":128, "do_sample":True, "top_p":0.9, "temperature":0.7}
57
  )
58
 
59
  # 7) FSM helpers (your existing code unmodified)
 
1
  # ─── SLM_CService.py ─────────────────────────────────────────────────────────
2
  import os
3
  # Fix for libgomp warning in Spaces
4
+ os.environ["OMP_NUM_THREADS"] = "1"
5
 
6
  # 1) Unsloth must come first
7
  import unsloth
 
8
  import torch
9
 
10
  from transformers import AutoTokenizer, BitsAndBytesConfig, pipeline
 
38
  model = unsloth.FastLanguageModel.from_pretrained(
39
  "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
40
  load_in_4bit=True,
41
+ quantization_config=bnb_cfg,
42
  device_map="auto",
43
  trust_remote_code=True
44
  )
 
52
  tokenizer=tokenizer,
53
  trust_remote_code=True,
54
  return_full_text=False,
55
+ generate_kwargs={"max_new_tokens":128, "do_sample":True, "top_p":0.9, "temperature":0.7}
56
  )
57
 
58
  # 7) FSM helpers (your existing code unmodified)