phanerozoic commited on
Commit
d0ad708
Β·
verified Β·
1 Parent(s): 0ea4bc5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -69
app.py CHANGED
@@ -1,19 +1,10 @@
1
- """
2
- SchoolSpiritΒ AI chatbot Space – Granite‑3.3‑2B
3
- β€’ Streams tokens to Gradio UI
4
- β€’ 5‑per‑minute rate‑limit per IP
5
- β€’ Founder + email hand‑off in system prompt
6
- """
7
-
8
  import os, re, time, datetime, threading, traceback, torch, gradio as gr
9
- from transformers import (
10
- AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
11
- )
12
  from transformers.utils import logging as hf_logging
13
 
14
- # ───────────────────────────────────────────────────────────────────────────────
15
- # 0. Logging helper
16
- # ───────────────────────────────────────────────────────────────────────────────
17
  os.environ["HF_HOME"] = "/data/.huggingface"
18
  LOG_FILE = "/data/requests.log"
19
  def log(msg: str):
@@ -26,21 +17,19 @@ def log(msg: str):
26
  except FileNotFoundError:
27
  pass
28
 
29
- # ───────────────────────────────────────────────────────────────────────────────
30
- # 1. Config
31
- # ───────────────────────────────────────────────────────────────────────────────
32
  MODEL_ID = "ibm-granite/granite-3.3-2b-instruct"
33
  CTX_TOKENS = 1800
34
  MAX_NEW_TOKENS = 64
35
  TEMP = 0.6
36
  MAX_INPUT_CH = 300
37
- RATE_N, RATE_SEC = 5, 60 # 5 messages / 60Β s
38
 
39
  SYSTEM_MSG = (
40
  "You are **SchoolSpiritΒ AI**, the friendly digital mascot of "
41
- "SchoolSpiritΒ AIΒ LLC, founded by CharlesΒ Norton inΒ 2025. The company "
42
- "installs on‑prem AI chat mascots, fine‑tunes language models, and ships "
43
- "turnkey GPU servers to K‑12 schools.\n\n"
44
  "RULES:\n"
45
  "β€’ Reply in ≀ 4 sentences unless asked for detail.\n"
46
  "β€’ No personal‑data collection; no medical/legal/financial advice.\n"
@@ -48,54 +37,55 @@ SYSTEM_MSG = (
48
  "β€’ If you can’t answer, politely direct the user to [email protected].\n"
49
  "β€’ Keep language age‑appropriate; avoid profanity, politics, mature themes."
50
  )
51
- WELCOME = "HiΒ there! I’m SchoolSpiritΒ AI. How can I help?"
52
 
53
  strip = lambda s: re.sub(r"\s+", " ", s.strip())
54
 
55
- # ───────────────────────────────────────────────────────────────────────────────
56
- # 2. Load model
57
- # ───────────────────────────────────────────────────────────────────────────────
58
  hf_logging.set_verbosity_error()
59
  try:
60
- log("Loading tokenizer / model …")
61
  tok = AutoTokenizer.from_pretrained(MODEL_ID)
62
 
63
- model = AutoModelForCausalLM.from_pretrained(
64
- MODEL_ID,
65
- device_map="auto" if torch.cuda.is_available() else "cpu",
66
- torch_dtype=torch.float16 if torch.cuda.is_available() else "auto",
67
- low_cpu_mem_usage=True,
68
- )
 
 
 
 
 
 
 
 
 
 
69
  MODEL_ERR = None
70
  log("Model loaded βœ”")
71
  except Exception as exc:
72
  MODEL_ERR = f"Model load error: {exc}"
73
- log(MODEL_ERR)
74
 
75
- # ───────────────────────────────────────────────────────────────────────────────
76
- # 3. Simple in‑memory rate‑limiter {ip: [timestamps]}
77
- # ───────────────────────────────────────────────────────────────────────────────
78
  VISITS: dict[str, list[float]] = {}
79
  def allowed(ip: str) -> bool:
80
  now = time.time()
81
- times = [t for t in VISITS.get(ip, []) if now - t < RATE_SEC]
82
- VISITS[ip] = times
83
- if len(times) >= RATE_N:
84
  return False
85
  VISITS[ip].append(now)
86
  return True
87
 
88
- # ───────────────────────────────────────────────────────────────────────────────
89
- # 4. Prompt builder (trims old turns to fit context)
90
- # ───────────────────────────────────────────────────────────────────────────────
91
  def build_prompt(raw: list[dict]) -> str:
92
  def render(m):
93
- role = m["role"]
94
- if role == "system":
95
  return m["content"]
96
- prefix = "User:" if role == "user" else "AI:"
97
  return f"{prefix} {m['content']}"
98
-
99
  system, convo = raw[0], raw[1:]
100
  while True:
101
  parts = [system["content"]] + [render(m) for m in convo] + ["AI:"]
@@ -103,27 +93,25 @@ def build_prompt(raw: list[dict]) -> str:
103
  return "\n".join(parts)
104
  convo = convo[2:] # drop oldest user+assistant pair
105
 
106
- # ───────────────────────────────────────────────────────────────────────────────
107
- # 5. Streaming chat callback
108
- # ───────────────────────────────────────────────────────────────────────────────
109
- def chat_fn(user_msg, chat_history, state, request: gr.Request):
110
  ip = request.client.host if request else "anon"
111
  if not allowed(ip):
112
- chat_history.append((user_msg, "Rate limit exceeded β€” please wait a minute."))
113
- return chat_history, state
114
 
115
  user_msg = strip(user_msg or "")
116
  if not user_msg:
117
- return chat_history, state
118
  if len(user_msg) > MAX_INPUT_CH:
119
- chat_history.append((user_msg, f"Input >{MAX_INPUT_CH} chars."))
120
- return chat_history, state
121
  if MODEL_ERR:
122
- chat_history.append((user_msg, MODEL_ERR))
123
- return chat_history, state
124
 
125
- # Append user line and placeholder for assistant
126
- chat_history.append((user_msg, ""))
127
  state["raw"].append({"role": "user", "content": user_msg})
128
 
129
  prompt = build_prompt(state["raw"])
@@ -141,17 +129,25 @@ def chat_fn(user_msg, chat_history, state, request: gr.Request):
141
  ).start()
142
 
143
  partial = ""
144
- for token in streamer:
145
- partial += token
146
- chat_history[-1] = (user_msg, partial) # update last message
147
- yield chat_history, state # stream to UI
148
-
149
- state["raw"].append({"role": "assistant", "content": strip(partial)})
150
- yield chat_history, state # final update
151
-
152
- # ───────────────────────────────────────────────────────────────────────────────
153
- # 6. Gradio UI
154
- # ───────────────────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
155
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
156
  gr.Markdown("### SchoolSpiritΒ AI Chat")
157
  bot = gr.Chatbot(value=[("", WELCOME)], height=480, label="SchoolSpiritΒ AI")
 
1
+ # app.py β€’ SchoolSpiritΒ AI chatbot Space
2
+ # Granite‑3.3‑2B‑Instruct | Streaming + rate‑limit + hallucination guard
 
 
 
 
 
3
  import os, re, time, datetime, threading, traceback, torch, gradio as gr
4
+ from transformers import (AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer)
 
 
5
  from transformers.utils import logging as hf_logging
6
 
7
+ # ───────────────────────────────── Log helper ────────────────────────────────
 
 
8
  os.environ["HF_HOME"] = "/data/.huggingface"
9
  LOG_FILE = "/data/requests.log"
10
  def log(msg: str):
 
17
  except FileNotFoundError:
18
  pass
19
 
20
+ # ─────────────────────────────── Configuration ───────────────────────────────
 
 
21
  MODEL_ID = "ibm-granite/granite-3.3-2b-instruct"
22
  CTX_TOKENS = 1800
23
  MAX_NEW_TOKENS = 64
24
  TEMP = 0.6
25
  MAX_INPUT_CH = 300
26
+ RATE_N, RATE_SEC = 5, 60 # 5 msgs / 60Β s per IP
27
 
28
  SYSTEM_MSG = (
29
  "You are **SchoolSpiritΒ AI**, the friendly digital mascot of "
30
+ "SchoolSpiritΒ AIΒ LLC, founded by CharlesΒ Norton inΒ 2025. "
31
+ "The company installs on‑prem AI chat mascots, fine‑tunes language models, "
32
+ "and ships turnkey GPU servers to K‑12 schools.\n\n"
33
  "RULES:\n"
34
  "β€’ Reply in ≀ 4 sentences unless asked for detail.\n"
35
  "β€’ No personal‑data collection; no medical/legal/financial advice.\n"
 
37
  "β€’ If you can’t answer, politely direct the user to [email protected].\n"
38
  "β€’ Keep language age‑appropriate; avoid profanity, politics, mature themes."
39
  )
40
+ WELCOME = "HiΒ there! I’m SchoolSpiritΒ AI. Ask me about our chat mascots, fine‑tuning, or GPU servers."
41
 
42
  strip = lambda s: re.sub(r"\s+", " ", s.strip())
43
 
44
+ # ─────────────────────── Load tokenizer & model ──────────────────────────────
 
 
45
  hf_logging.set_verbosity_error()
46
  try:
47
+ log("Loading tokenizer …")
48
  tok = AutoTokenizer.from_pretrained(MODEL_ID)
49
 
50
+ if torch.cuda.is_available():
51
+ log("GPU detected β†’ loading model in FP‑16")
52
+ model = AutoModelForCausalLM.from_pretrained(
53
+ MODEL_ID,
54
+ device_map="auto",
55
+ torch_dtype=torch.float16,
56
+ )
57
+ else:
58
+ log("No GPU β†’ loading model on CPU (this is slower)")
59
+ model = AutoModelForCausalLM.from_pretrained(
60
+ MODEL_ID,
61
+ device_map="cpu",
62
+ torch_dtype="auto",
63
+ low_cpu_mem_usage=True,
64
+ )
65
+
66
  MODEL_ERR = None
67
  log("Model loaded βœ”")
68
  except Exception as exc:
69
  MODEL_ERR = f"Model load error: {exc}"
70
+ log("❌ " + MODEL_ERR + "\n" + traceback.format_exc())
71
 
72
+ # ────────────────────────── Per‑IP rate limiter ──────────────────────────────
 
 
73
  VISITS: dict[str, list[float]] = {}
74
  def allowed(ip: str) -> bool:
75
  now = time.time()
76
+ VISITS[ip] = [t for t in VISITS.get(ip, []) if now - t < RATE_SEC]
77
+ if len(VISITS[ip]) >= RATE_N:
 
78
  return False
79
  VISITS[ip].append(now)
80
  return True
81
 
82
+ # ─────────────────────── Prompt builder (token budget) ───────────────────────
 
 
83
  def build_prompt(raw: list[dict]) -> str:
84
  def render(m):
85
+ if m["role"] == "system":
 
86
  return m["content"]
87
+ prefix = "User:" if m["role"] == "user" else "AI:"
88
  return f"{prefix} {m['content']}"
 
89
  system, convo = raw[0], raw[1:]
90
  while True:
91
  parts = [system["content"]] + [render(m) for m in convo] + ["AI:"]
 
93
  return "\n".join(parts)
94
  convo = convo[2:] # drop oldest user+assistant pair
95
 
96
+ # ───────────────────────── Streaming chat callback ───────────────────────────
97
+ def chat_fn(user_msg, chat_hist, state, request: gr.Request):
 
 
98
  ip = request.client.host if request else "anon"
99
  if not allowed(ip):
100
+ chat_hist.append((user_msg, "Rate limit exceeded β€” please wait a minute."))
101
+ return chat_hist, state
102
 
103
  user_msg = strip(user_msg or "")
104
  if not user_msg:
105
+ return chat_hist, state
106
  if len(user_msg) > MAX_INPUT_CH:
107
+ chat_hist.append((user_msg, f"Input >{MAX_INPUT_CH} chars."))
108
+ return chat_hist, state
109
  if MODEL_ERR:
110
+ chat_hist.append((user_msg, MODEL_ERR))
111
+ return chat_hist, state
112
 
113
+ # append user turn & empty assistant slot
114
+ chat_hist.append((user_msg, ""))
115
  state["raw"].append({"role": "user", "content": user_msg})
116
 
117
  prompt = build_prompt(state["raw"])
 
129
  ).start()
130
 
131
  partial = ""
132
+ try:
133
+ for token in streamer:
134
+ partial += token
135
+ # hallucination guard: stop if model starts new speaker tag
136
+ if "User:" in partial or "\nAI:" in partial:
137
+ partial = re.split(r"(?:\n?User:|\n?AI:)", partial)[0].strip()
138
+ break
139
+ chat_hist[-1] = (user_msg, partial)
140
+ yield chat_hist, state
141
+ except Exception as exc:
142
+ log("❌ Stream error:\n" + traceback.format_exc())
143
+ partial = "Apologiesβ€”internal error. Please try again."
144
+
145
+ reply = strip(partial)
146
+ chat_hist[-1] = (user_msg, reply)
147
+ state["raw"].append({"role": "assistant", "content": reply})
148
+ yield chat_hist, state # final
149
+
150
+ # ─────────────────────────── Gradio Blocks UI ────────────────────────────────
151
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
152
  gr.Markdown("### SchoolSpiritΒ AI Chat")
153
  bot = gr.Chatbot(value=[("", WELCOME)], height=480, label="SchoolSpiritΒ AI")