rajeshlion commited on
Commit
cf59443
Β·
verified Β·
1 Parent(s): 885733a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +272 -132
app.py CHANGED
@@ -1,54 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- import spaces
4
-
5
-
6
-
7
- """
8
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
9
- """
10
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
11
-
12
- @spaces.GPU
13
- def respond(
14
- message,
15
- history: list[tuple[str, str]],
16
- system_message,
17
- max_tokens,
18
- temperature,
19
- top_p,
20
- ):
21
- messages = [{"role": "system", "content": system_message}]
22
-
23
- for val in history:
24
- if val[0]:
25
- messages.append({"role": "user", "content": val[0]})
26
- if val[1]:
27
- messages.append({"role": "assistant", "content": val[1]})
28
-
29
- messages.append({"role": "user", "content": message})
30
-
31
- response = ""
32
-
33
- for message in client.chat_completion(
34
- messages,
35
- max_tokens=max_tokens,
36
- stream=True,
37
- temperature=temperature,
38
- top_p=top_p,
39
- ):
40
- token = message.choices[0].delta.content
41
-
42
- response += token
43
- yield response
44
-
45
- """
46
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
47
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  demo = gr.ChatInterface(
49
  respond,
50
  additional_inputs=[
51
- gr.Textbox(value="You are a Chatbot who only answers spiritual questions based on Indian scriptures and declines answering other questions.", label="System message"),
 
 
 
 
 
 
52
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
53
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
54
  gr.Slider(
@@ -59,69 +212,57 @@ demo = gr.ChatInterface(
59
  label="Top-p (nucleus sampling)",
60
  ),
61
  ],
62
-
63
  )
64
 
65
-
66
  if __name__ == "__main__":
67
  demo.launch()
68
 
 
69
  # import os
70
  # import gradio as gr
71
  # from huggingface_hub import InferenceClient
72
- # from huggingface_hub.utils import HfHubHTTPError
73
-
74
- # MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
75
- # HF_TOKEN = os.getenv("HF_TOKEN") # ⚠️ set this in Spaces β†’ Settings β†’ Secrets
76
 
77
- # client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
 
78
 
 
 
79
 
 
80
 
81
- # def _build_zephyr_prompt(system_message: str, history, user_msg: str) -> str:
 
 
 
82
  # parts = []
83
  # if system_message:
84
- # parts.append(f"<|system|>\n{system_message}\n</s>")
85
  # for u, a in (history or []):
86
  # if u:
87
- # parts.append(f"<|user|>\n{u}\n</s>")
88
  # if a:
89
- # parts.append(f"<|assistant|>\n{a}\n</s>")
90
- # parts.append(f"<|user|>\n{user_msg}\n</s>\n<|assistant|>\n")
 
91
  # return "\n".join(parts)
92
 
93
-
94
  # def respond(message, history, system_message, max_tokens, temperature, top_p):
95
- # # Early guardrails for missing token
96
- # if not HF_TOKEN:
97
- # yield (
98
- # "⚠️ Missing HF_TOKEN.\n\n"
99
- # "Set a Hugging Face access token in your Space:\n"
100
- # "Settings β†’ Repository secrets β†’ Add secret β†’ Name: HF_TOKEN, Value: <your token>\n"
101
- # "Token needs at least 'read' scope."
102
- # )
103
- # return
104
-
105
- # # Try OpenAI-like chat completion first
106
  # try:
107
  # response_text = ""
 
 
 
 
 
 
 
 
 
 
108
  # for chunk in client.chat_completion(
109
- # messages=(
110
- # [{"role": "system", "content": system_message}] if system_message else []
111
- # )
112
- # + [
113
- # msg
114
- # for pair in (history or [])
115
- # for msg in (
116
- # [{"role": "user", "content": pair[0]}] if pair and pair[0] else []
117
- # )
118
- # + (
119
- # [{"role": "assistant", "content": pair[1]}]
120
- # if pair and len(pair) > 1 and pair[1]
121
- # else []
122
- # )
123
- # ]
124
- # + [{"role": "user", "content": message}],
125
  # max_tokens=max_tokens,
126
  # temperature=temperature,
127
  # top_p=top_p,
@@ -133,91 +274,90 @@ if __name__ == "__main__":
133
  # yield response_text
134
  # return
135
  # except HfHubHTTPError as e:
136
- # # Handle 401 explicitly with helpful guidance
137
- # try:
138
- # status = e.response.status_code
139
- # except Exception:
140
- # status = None
141
  # if status == 401:
 
142
  # yield (
143
- # "❌ 401 Unauthorized from Hugging Face Inference API.\n\n"
144
- # "Fix:\n"
145
- # "1) Create a token at https://huggingface.co/settings/tokens with at least 'read' scope.\n"
146
- # "2) In your Space, go to Settings β†’ Repository secrets β†’ Add secret\n"
147
- # " Name: HF_TOKEN, Value: <your token>\n"
148
- # "3) Restart the Space.\n"
149
  # )
150
  # return
151
- # # Otherwise drop to fallback
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  # except Exception:
153
- # pass
154
 
155
- # # Fallback: raw text_generation with Zephyr chat format
156
- # zephyr_prompt = _build_zephyr_prompt(system_message, history, message)
157
  # try:
158
  # response_text = ""
159
- # # for tok in client.text_generation(
160
- # # zephyr_prompt,
161
- # # max_new_tokens=max_tokens,
162
- # # temperature=temperature,
163
- # # top_p=top_p,
164
- # # stream=True,
165
- # # stop=["</s>", "<|user|>", "<|assistant|>", "<|system|>"],
166
- # # ):
167
-
168
  # for tok in client.text_generation(
169
- # zephyr_prompt,
170
  # max_new_tokens=max_tokens,
171
  # temperature=temperature,
172
  # top_p=top_p,
173
  # stream=True,
174
  # ):
175
-
 
 
176
  # if tok:
177
  # response_text += tok
178
  # yield response_text
179
  # except HfHubHTTPError as e:
180
- # try:
181
- # status = e.response.status_code
182
- # except Exception:
183
- # status = None
184
  # if status == 401:
 
185
  # yield (
186
  # "❌ 401 Unauthorized (text_generation fallback).\n\n"
187
- # "Set HF_TOKEN in Space secrets (Settings β†’ Repository secrets)."
 
 
 
 
 
 
 
 
 
 
 
 
188
  # )
189
  # else:
190
  # yield f"[Inference error] {e}"
191
  # except Exception as e:
192
  # yield f"[Runtime error] {e}"
193
 
194
-
195
  # demo = gr.ChatInterface(
196
  # respond,
197
  # additional_inputs=[
198
  # gr.Textbox(
199
- # value=(
200
- # "You are a Chatbot who only answers spiritual questions based "
201
- # "on Indian scriptures and declines answering other questions."
202
- # ),
203
  # label="System message",
204
  # ),
205
  # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
206
  # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
207
- # gr.Slider(
208
- # minimum=0.1,
209
- # maximum=1.0,
210
- # value=0.95,
211
- # step=0.05,
212
- # label="Top-p (nucleus sampling)",
213
- # ),
214
  # ],
215
  # )
216
 
217
  # if __name__ == "__main__":
218
- # demo.launch()
219
-
220
-
221
 
222
 
223
 
 
1
+ # import gradio as gr
2
+ # from huggingface_hub import InferenceClient
3
+ # import spaces
4
+
5
+
6
+
7
+ # """
8
+ # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
9
+ # """
10
+ # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
11
+
12
+ # @spaces.GPU
13
+ # def respond(
14
+ # message,
15
+ # history: list[tuple[str, str]],
16
+ # system_message,
17
+ # max_tokens,
18
+ # temperature,
19
+ # top_p,
20
+ # ):
21
+ # messages = [{"role": "system", "content": system_message}]
22
+
23
+ # for val in history:
24
+ # if val[0]:
25
+ # messages.append({"role": "user", "content": val[0]})
26
+ # if val[1]:
27
+ # messages.append({"role": "assistant", "content": val[1]})
28
+
29
+ # messages.append({"role": "user", "content": message})
30
+
31
+ # response = ""
32
+
33
+ # for message in client.chat_completion(
34
+ # messages,
35
+ # max_tokens=max_tokens,
36
+ # stream=True,
37
+ # temperature=temperature,
38
+ # top_p=top_p,
39
+ # ):
40
+ # token = message.choices[0].delta.content
41
+
42
+ # response += token
43
+ # yield response
44
+
45
+ # """
46
+ # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
47
+ # """
48
+ # demo = gr.ChatInterface(
49
+ # respond,
50
+ # additional_inputs=[
51
+ # gr.Textbox(value="You are a Chatbot who only answers spiritual questions based on Indian scriptures and declines answering other questions.", label="System message"),
52
+ # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
53
+ # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
54
+ # gr.Slider(
55
+ # minimum=0.1,
56
+ # maximum=1.0,
57
+ # value=0.95,
58
+ # step=0.05,
59
+ # label="Top-p (nucleus sampling)",
60
+ # ),
61
+ # ],
62
+
63
+ # )
64
+
65
+
66
+ # if __name__ == "__main__":
67
+ # demo.launch()
68
+
69
+ import os
70
  import gradio as gr
71
  from huggingface_hub import InferenceClient
72
+ from huggingface_hub.utils import HfHubHTTPError
73
+
74
+ MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
75
+ HF_TOKEN = os.getenv("HF_TOKEN") # ⚠️ set this in Spaces β†’ Settings β†’ Secrets
76
+
77
+ client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
78
+
79
+
80
+
81
+ def _build_zephyr_prompt(system_message: str, history, user_msg: str) -> str:
82
+ parts = []
83
+ if system_message:
84
+ parts.append(f"<|system|>\n{system_message}\n</s>")
85
+ for u, a in (history or []):
86
+ if u:
87
+ parts.append(f"<|user|>\n{u}\n</s>")
88
+ if a:
89
+ parts.append(f"<|assistant|>\n{a}\n</s>")
90
+ parts.append(f"<|user|>\n{user_msg}\n</s>\n<|assistant|>\n")
91
+ return "\n".join(parts)
92
+
93
+
94
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
95
+ # Early guardrails for missing token
96
+ if not HF_TOKEN:
97
+ yield (
98
+ "⚠️ Missing HF_TOKEN.\n\n"
99
+ "Set a Hugging Face access token in your Space:\n"
100
+ "Settings β†’ Repository secrets β†’ Add secret β†’ Name: HF_TOKEN, Value: <your token>\n"
101
+ "Token needs at least 'read' scope."
102
+ )
103
+ return
104
+
105
+ # Try OpenAI-like chat completion first
106
+ try:
107
+ response_text = ""
108
+ for chunk in client.chat_completion(
109
+ messages=(
110
+ [{"role": "system", "content": system_message}] if system_message else []
111
+ )
112
+ + [
113
+ msg
114
+ for pair in (history or [])
115
+ for msg in (
116
+ [{"role": "user", "content": pair[0]}] if pair and pair[0] else []
117
+ )
118
+ + (
119
+ [{"role": "assistant", "content": pair[1]}]
120
+ if pair and len(pair) > 1 and pair[1]
121
+ else []
122
+ )
123
+ ]
124
+ + [{"role": "user", "content": message}],
125
+ max_tokens=max_tokens,
126
+ temperature=temperature,
127
+ top_p=top_p,
128
+ stream=True,
129
+ ):
130
+ token = getattr(chunk.choices[0].delta, "content", None)
131
+ if token:
132
+ response_text += token
133
+ yield response_text
134
+ return
135
+ except HfHubHTTPError as e:
136
+ # Handle 401 explicitly with helpful guidance
137
+ try:
138
+ status = e.response.status_code
139
+ except Exception:
140
+ status = None
141
+ if status == 401:
142
+ yield (
143
+ "❌ 401 Unauthorized from Hugging Face Inference API.\n\n"
144
+ "Fix:\n"
145
+ "1) Create a token at https://huggingface.co/settings/tokens with at least 'read' scope.\n"
146
+ "2) In your Space, go to Settings β†’ Repository secrets β†’ Add secret\n"
147
+ " Name: HF_TOKEN, Value: <your token>\n"
148
+ "3) Restart the Space.\n"
149
+ )
150
+ return
151
+ # Otherwise drop to fallback
152
+ except Exception:
153
+ pass
154
+
155
+ # Fallback: raw text_generation with Zephyr chat format
156
+ zephyr_prompt = _build_zephyr_prompt(system_message, history, message)
157
+ try:
158
+ response_text = ""
159
+ # for tok in client.text_generation(
160
+ # zephyr_prompt,
161
+ # max_new_tokens=max_tokens,
162
+ # temperature=temperature,
163
+ # top_p=top_p,
164
+ # stream=True,
165
+ # stop=["</s>", "<|user|>", "<|assistant|>", "<|system|>"],
166
+ # ):
167
+
168
+ for tok in client.text_generation(
169
+ zephyr_prompt,
170
+ max_new_tokens=max_tokens,
171
+ temperature=temperature,
172
+ top_p=top_p,
173
+ stream=True,
174
+ ):
175
+
176
+ if tok:
177
+ response_text += tok
178
+ yield response_text
179
+ except HfHubHTTPError as e:
180
+ try:
181
+ status = e.response.status_code
182
+ except Exception:
183
+ status = None
184
+ if status == 401:
185
+ yield (
186
+ "❌ 401 Unauthorized (text_generation fallback).\n\n"
187
+ "Set HF_TOKEN in Space secrets (Settings β†’ Repository secrets)."
188
+ )
189
+ else:
190
+ yield f"[Inference error] {e}"
191
+ except Exception as e:
192
+ yield f"[Runtime error] {e}"
193
+
194
+
195
  demo = gr.ChatInterface(
196
  respond,
197
  additional_inputs=[
198
+ gr.Textbox(
199
+ value=(
200
+ "You are a Chatbot who only answers spiritual questions based "
201
+ "on Indian scriptures and declines answering other questions."
202
+ ),
203
+ label="System message",
204
+ ),
205
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
206
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
207
  gr.Slider(
 
212
  label="Top-p (nucleus sampling)",
213
  ),
214
  ],
 
215
  )
216
 
 
217
  if __name__ == "__main__":
218
  demo.launch()
219
 
220
+
221
  # import os
222
  # import gradio as gr
223
  # from huggingface_hub import InferenceClient
224
+ # from huggingface_hub.utils import HfHubHTTPError # correct import for 0.22.2
 
 
 
225
 
226
+ # # βœ… You can override this in the Space secrets: MODEL_ID=google/gemma-2-2b-it (or Qwen/Qwen2...)
227
+ # MODEL_ID = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct")
228
 
229
+ # # Accept either token name (matches your other Spaces)
230
+ # HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
231
 
232
+ # client = InferenceClient(model=MODEL_ID, token=HF_TOKEN) if HF_TOKEN else InferenceClient(model=MODEL_ID)
233
 
234
+ # def _build_generic_prompt(system_message: str, history, user_msg: str) -> str:
235
+ # """
236
+ # Generic chat-style prompt that works across most instruct-tuned models.
237
+ # """
238
  # parts = []
239
  # if system_message:
240
+ # parts.append(f"System: {system_message}")
241
  # for u, a in (history or []):
242
  # if u:
243
+ # parts.append(f"User: {u}")
244
  # if a:
245
+ # parts.append(f"Assistant: {a}")
246
+ # parts.append(f"User: {user_msg}")
247
+ # parts.append("Assistant:")
248
  # return "\n".join(parts)
249
 
 
250
  # def respond(message, history, system_message, max_tokens, temperature, top_p):
251
+ # # Try chat-completions first (if backend supports it)
 
 
 
 
 
 
 
 
 
 
252
  # try:
253
  # response_text = ""
254
+ # msgs = (
255
+ # [{"role": "system", "content": system_message}] if system_message else []
256
+ # )
257
+ # for u, a in (history or []):
258
+ # if u:
259
+ # msgs.append({"role": "user", "content": u})
260
+ # if a:
261
+ # msgs.append({"role": "assistant", "content": a})
262
+ # msgs.append({"role": "user", "content": message})
263
+
264
  # for chunk in client.chat_completion(
265
+ # messages=msgs,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  # max_tokens=max_tokens,
267
  # temperature=temperature,
268
  # top_p=top_p,
 
274
  # yield response_text
275
  # return
276
  # except HfHubHTTPError as e:
277
+ # status = getattr(e, "response", None).status_code if getattr(e, "response", None) else None
 
 
 
 
278
  # if status == 401:
279
+ # which = "HF_TOKEN or HUGGINGFACEHUB_API_TOKEN"
280
  # yield (
281
+ # "❌ 401 Unauthorized from HF Inference API.\n\n"
282
+ # f"Add a read-scoped token as **{which}** and restart."
 
 
 
 
283
  # )
284
  # return
285
+ # if status == 403:
286
+ # yield (
287
+ # "❌ 403 Forbidden from HF Inference API.\n\n"
288
+ # "Model may require Inference Providers permissions & billing. "
289
+ # "Either enable those for your token or switch MODEL_ID to a free hosted model."
290
+ # )
291
+ # return
292
+ # if status == 404:
293
+ # yield (
294
+ # f"❌ 404 Not Found for model `{MODEL_ID}` via chat-completions.\n\n"
295
+ # "The serverless endpoint is likely unavailable. "
296
+ # "Set MODEL_ID to a hosted model (e.g., microsoft/Phi-3-mini-4k-instruct, "
297
+ # "google/gemma-2-2b-it, Qwen/Qwen2-1.5B-Instruct) in Space secrets and restart."
298
+ # )
299
+ # # fall through to fallback too
300
  # except Exception:
301
+ # pass # fall through to fallback
302
 
303
+ # # Fallback: plain text_generation with a generic prompt
304
+ # prompt = _build_generic_prompt(system_message, history, message)
305
  # try:
306
  # response_text = ""
 
 
 
 
 
 
 
 
 
307
  # for tok in client.text_generation(
308
+ # prompt,
309
  # max_new_tokens=max_tokens,
310
  # temperature=temperature,
311
  # top_p=top_p,
312
  # stream=True,
313
  # ):
314
+ # # Manual stop filtering (since 0.22.2 lacks 'stop' kwarg)
315
+ # if any(s in tok for s in ["</s>", "<|user|>", "<|assistant|>", "<|system|>"]):
316
+ # break
317
  # if tok:
318
  # response_text += tok
319
  # yield response_text
320
  # except HfHubHTTPError as e:
321
+ # status = getattr(e, "response", None).status_code if getattr(e, "response", None) else None
 
 
 
322
  # if status == 401:
323
+ # which = "HF_TOKEN or HUGGINGFACEHUB_API_TOKEN"
324
  # yield (
325
  # "❌ 401 Unauthorized (text_generation fallback).\n\n"
326
+ # f"Set **{which}** in Space secrets and restart."
327
+ # )
328
+ # elif status == 403:
329
+ # yield (
330
+ # "❌ 403 Forbidden (text_generation fallback).\n\n"
331
+ # "Your token lacks 'Use Inference API/Providers' or billing is not enabled. "
332
+ # "Either grant those permissions & restart, or set MODEL_ID to a free hosted model."
333
+ # )
334
+ # elif status == 404:
335
+ # yield (
336
+ # f"❌ 404 Not Found for model `{MODEL_ID}` via text-generation.\n\n"
337
+ # "Switch MODEL_ID to a hosted model (e.g., microsoft/Phi-3-mini-4k-instruct, "
338
+ # "google/gemma-2-2b-it, Qwen/Qwen2-1.5B-Instruct) and restart."
339
  # )
340
  # else:
341
  # yield f"[Inference error] {e}"
342
  # except Exception as e:
343
  # yield f"[Runtime error] {e}"
344
 
 
345
  # demo = gr.ChatInterface(
346
  # respond,
347
  # additional_inputs=[
348
  # gr.Textbox(
349
+ # value=("You are a Chatbot who only answers spiritual questions based on Indian scriptures "
350
+ # "and declines answering other questions."),
 
 
351
  # label="System message",
352
  # ),
353
  # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
354
  # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
355
+ # gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
 
 
 
 
 
 
356
  # ],
357
  # )
358
 
359
  # if __name__ == "__main__":
360
+ # demo.launch(share=True)
 
 
361
 
362
 
363