Spaces:

rajeshlion
/

ask-baba-bhAIro

Running

App Files Files Community

rajeshlion commited on 20 days ago

Commit

cf59443

verified ·

1 Parent(s): 885733a

Update app.py

Browse files

Files changed (1) hide show

app.py +272 -132

app.py CHANGED Viewed

@@ -1,54 +1,207 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
-import spaces
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-@spaces.GPU
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
-        gr.Textbox(value="You are a Chatbot who only answers spiritual questions based on Indian scriptures and declines answering other questions.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
@@ -59,69 +212,57 @@ demo = gr.ChatInterface(
             label="Top-p (nucleus sampling)",
         ),
     ],
 )
 if __name__ == "__main__":
     demo.launch()
 # import os
 # import gradio as gr
 # from huggingface_hub import InferenceClient
-# from huggingface_hub.utils import HfHubHTTPError
-# MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
-# HF_TOKEN = os.getenv("HF_TOKEN")  # ⚠️ set this in Spaces → Settings → Secrets
-# client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
-# def _build_zephyr_prompt(system_message: str, history, user_msg: str) -> str:
 #     parts = []
 #     if system_message:
-#         parts.append(f"<|system|>\n{system_message}\n</s>")
 #     for u, a in (history or []):
 #         if u:
-#             parts.append(f"<|user|>\n{u}\n</s>")
 #         if a:
-#             parts.append(f"<|assistant|>\n{a}\n</s>")
-#     parts.append(f"<|user|>\n{user_msg}\n</s>\n<|assistant|>\n")
 #     return "\n".join(parts)
 # def respond(message, history, system_message, max_tokens, temperature, top_p):
-#     # Early guardrails for missing token
-#     if not HF_TOKEN:
-#         yield (
-#             "⚠️ Missing HF_TOKEN.\n\n"
-#             "Set a Hugging Face access token in your Space:\n"
-#             "Settings → Repository secrets → Add secret → Name: HF_TOKEN, Value: <your token>\n"
-#             "Token needs at least 'read' scope."
-#         )
-#         return
-#     # Try OpenAI-like chat completion first
 #     try:
 #         response_text = ""
 #         for chunk in client.chat_completion(
-#             messages=(
-#                 [{"role": "system", "content": system_message}] if system_message else []
-#             )
-#             + [
-#                 msg
-#                 for pair in (history or [])
-#                 for msg in (
-#                     [{"role": "user", "content": pair[0]}] if pair and pair[0] else []
-#                 )
-#                 + (
-#                     [{"role": "assistant", "content": pair[1]}]
-#                     if pair and len(pair) > 1 and pair[1]
-#                     else []
-#                 )
-#             ]
-#             + [{"role": "user", "content": message}],
 #             max_tokens=max_tokens,
 #             temperature=temperature,
 #             top_p=top_p,
@@ -133,91 +274,90 @@ if __name__ == "__main__":
 #                 yield response_text
 #         return
 #     except HfHubHTTPError as e:
-#         # Handle 401 explicitly with helpful guidance
-#         try:
-#             status = e.response.status_code
-#         except Exception:
-#             status = None
 #         if status == 401:
 #             yield (
-#                 "❌ 401 Unauthorized from Hugging Face Inference API.\n\n"
-#                 "Fix:\n"
-#                 "1) Create a token at https://huggingface.co/settings/tokens with at least 'read' scope.\n"
-#                 "2) In your Space, go to Settings → Repository secrets → Add secret\n"
-#                 "   Name: HF_TOKEN, Value: <your token>\n"
-#                 "3) Restart the Space.\n"
 #             )
 #             return
-#         # Otherwise drop to fallback
 #     except Exception:
-#         pass
-#     # Fallback: raw text_generation with Zephyr chat format
-#     zephyr_prompt = _build_zephyr_prompt(system_message, history, message)
 #     try:
 #         response_text = ""
-#         # for tok in client.text_generation(
-#         #     zephyr_prompt,
-#         #     max_new_tokens=max_tokens,
-#         #     temperature=temperature,
-#         #     top_p=top_p,
-#         #     stream=True,
-#         #     stop=["</s>", "<|user|>", "<|assistant|>", "<|system|>"],
-#         # ):
 #         for tok in client.text_generation(
-#             zephyr_prompt,
 #             max_new_tokens=max_tokens,
 #             temperature=temperature,
 #             top_p=top_p,
 #             stream=True,
 #         ):
 #             if tok:
 #                 response_text += tok
 #                 yield response_text
 #     except HfHubHTTPError as e:
-#         try:
-#             status = e.response.status_code
-#         except Exception:
-#             status = None
 #         if status == 401:
 #             yield (
 #                 "❌ 401 Unauthorized (text_generation fallback).\n\n"
-#                 "Set HF_TOKEN in Space secrets (Settings → Repository secrets)."
 #             )
 #         else:
 #             yield f"[Inference error] {e}"
 #     except Exception as e:
 #         yield f"[Runtime error] {e}"
 # demo = gr.ChatInterface(
 #     respond,
 #     additional_inputs=[
 #         gr.Textbox(
-#             value=(
-#                 "You are a Chatbot who only answers spiritual questions based "
-#                 "on Indian scriptures and declines answering other questions."
-#             ),
 #             label="System message",
 #         ),
 #         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
 #         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-#         gr.Slider(
-#             minimum=0.1,
-#             maximum=1.0,
-#             value=0.95,
-#             step=0.05,
-#             label="Top-p (nucleus sampling)",
-#         ),
 #     ],
 # )
 # if __name__ == "__main__":
-#     demo.launch()

+# import gradio as gr
+# from huggingface_hub import InferenceClient
+# import spaces
+# """
+# For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
+# """
+# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+# @spaces.GPU
+# def respond(
+#     message,
+#     history: list[tuple[str, str]],
+#     system_message,
+#     max_tokens,
+#     temperature,
+#     top_p,
+# ):
+#     messages = [{"role": "system", "content": system_message}]
+#     for val in history:
+#         if val[0]:
+#             messages.append({"role": "user", "content": val[0]})
+#         if val[1]:
+#             messages.append({"role": "assistant", "content": val[1]})
+#     messages.append({"role": "user", "content": message})
+#     response = ""
+#     for message in client.chat_completion(
+#         messages,
+#         max_tokens=max_tokens,
+#         stream=True,
+#         temperature=temperature,
+#         top_p=top_p,
+#     ):
+#         token = message.choices[0].delta.content
+#         response += token
+#         yield response
+# """
+# For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
+# """
+# demo = gr.ChatInterface(
+#     respond,
+#     additional_inputs=[
+#         gr.Textbox(value="You are a Chatbot who only answers spiritual questions based on Indian scriptures and declines answering other questions.", label="System message"),
+#         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+#         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+#         gr.Slider(
+#             minimum=0.1,
+#             maximum=1.0,
+#             value=0.95,
+#             step=0.05,
+#             label="Top-p (nucleus sampling)",
+#         ),
+#     ],
+# )
+# if __name__ == "__main__":
+#     demo.launch()
+import os
 import gradio as gr
 from huggingface_hub import InferenceClient
+from huggingface_hub.utils import HfHubHTTPError
+MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
+HF_TOKEN = os.getenv("HF_TOKEN")  # ⚠️ set this in Spaces → Settings → Secrets
+client = InferenceClient(model=MODEL_ID, token=HF_TOKEN)
+def _build_zephyr_prompt(system_message: str, history, user_msg: str) -> str:
+    parts = []
+    if system_message:
+        parts.append(f"<|system|>\n{system_message}\n</s>")
+    for u, a in (history or []):
+        if u:
+            parts.append(f"<|user|>\n{u}\n</s>")
+        if a:
+            parts.append(f"<|assistant|>\n{a}\n</s>")
+    parts.append(f"<|user|>\n{user_msg}\n</s>\n<|assistant|>\n")
+    return "\n".join(parts)
+def respond(message, history, system_message, max_tokens, temperature, top_p):
+    # Early guardrails for missing token
+    if not HF_TOKEN:
+        yield (
+            "⚠️ Missing HF_TOKEN.\n\n"
+            "Set a Hugging Face access token in your Space:\n"
+            "Settings → Repository secrets → Add secret → Name: HF_TOKEN, Value: <your token>\n"
+            "Token needs at least 'read' scope."
+        )
+        return
+    # Try OpenAI-like chat completion first
+    try:
+        response_text = ""
+        for chunk in client.chat_completion(
+            messages=(
+                [{"role": "system", "content": system_message}] if system_message else []
+            )
+            + [
+                msg
+                for pair in (history or [])
+                for msg in (
+                    [{"role": "user", "content": pair[0]}] if pair and pair[0] else []
+                )
+                + (
+                    [{"role": "assistant", "content": pair[1]}]
+                    if pair and len(pair) > 1 and pair[1]
+                    else []
+                )
+            ]
+            + [{"role": "user", "content": message}],
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            stream=True,
+        ):
+            token = getattr(chunk.choices[0].delta, "content", None)
+            if token:
+                response_text += token
+                yield response_text
+        return
+    except HfHubHTTPError as e:
+        # Handle 401 explicitly with helpful guidance
+        try:
+            status = e.response.status_code
+        except Exception:
+            status = None
+        if status == 401:
+            yield (
+                "❌ 401 Unauthorized from Hugging Face Inference API.\n\n"
+                "Fix:\n"
+                "1) Create a token at https://huggingface.co/settings/tokens with at least 'read' scope.\n"
+                "2) In your Space, go to Settings → Repository secrets → Add secret\n"
+                "   Name: HF_TOKEN, Value: <your token>\n"
+                "3) Restart the Space.\n"
+            )
+            return
+        # Otherwise drop to fallback
+    except Exception:
+        pass
+    # Fallback: raw text_generation with Zephyr chat format
+    zephyr_prompt = _build_zephyr_prompt(system_message, history, message)
+    try:
+        response_text = ""
+        # for tok in client.text_generation(
+        #     zephyr_prompt,
+        #     max_new_tokens=max_tokens,
+        #     temperature=temperature,
+        #     top_p=top_p,
+        #     stream=True,
+        #     stop=["</s>", "<|user|>", "<|assistant|>", "<|system|>"],
+        # ):
+        for tok in client.text_generation(
+            zephyr_prompt,
+            max_new_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p,
+            stream=True,
+        ):
+            if tok:
+                response_text += tok
+                yield response_text
+    except HfHubHTTPError as e:
+        try:
+            status = e.response.status_code
+        except Exception:
+            status = None
+        if status == 401:
+            yield (
+                "❌ 401 Unauthorized (text_generation fallback).\n\n"
+                "Set HF_TOKEN in Space secrets (Settings → Repository secrets)."
+            )
+        else:
+            yield f"[Inference error] {e}"
+    except Exception as e:
+        yield f"[Runtime error] {e}"
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
+        gr.Textbox(
+            value=(
+                "You are a Chatbot who only answers spiritual questions based "
+                "on Indian scriptures and declines answering other questions."
+            ),
+            label="System message",
+        ),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
             label="Top-p (nucleus sampling)",
         ),
     ],
 )
 if __name__ == "__main__":
     demo.launch()
 # import os
 # import gradio as gr
 # from huggingface_hub import InferenceClient
+# from huggingface_hub.utils import HfHubHTTPError  # correct import for 0.22.2
+# # ✅ You can override this in the Space secrets: MODEL_ID=google/gemma-2-2b-it (or Qwen/Qwen2...)
+# MODEL_ID = os.getenv("MODEL_ID", "microsoft/Phi-3-mini-4k-instruct")
+# # Accept either token name (matches your other Spaces)
+# HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN")
+# client = InferenceClient(model=MODEL_ID, token=HF_TOKEN) if HF_TOKEN else InferenceClient(model=MODEL_ID)
+# def _build_generic_prompt(system_message: str, history, user_msg: str) -> str:
+#     """
+#     Generic chat-style prompt that works across most instruct-tuned models.
+#     """
 #     parts = []
 #     if system_message:
+#         parts.append(f"System: {system_message}")
 #     for u, a in (history or []):
 #         if u:
+#             parts.append(f"User: {u}")
 #         if a:
+#             parts.append(f"Assistant: {a}")
+#     parts.append(f"User: {user_msg}")
+#     parts.append("Assistant:")
 #     return "\n".join(parts)
 # def respond(message, history, system_message, max_tokens, temperature, top_p):
+#     # Try chat-completions first (if backend supports it)
 #     try:
 #         response_text = ""
+#         msgs = (
+#             [{"role": "system", "content": system_message}] if system_message else []
+#         )
+#         for u, a in (history or []):
+#             if u:
+#                 msgs.append({"role": "user", "content": u})
+#             if a:
+#                 msgs.append({"role": "assistant", "content": a})
+#         msgs.append({"role": "user", "content": message})
 #         for chunk in client.chat_completion(
+#             messages=msgs,
 #             max_tokens=max_tokens,
 #             temperature=temperature,
 #             top_p=top_p,
 #                 yield response_text
 #         return
 #     except HfHubHTTPError as e:
+#         status = getattr(e, "response", None).status_code if getattr(e, "response", None) else None
 #         if status == 401:
+#             which = "HF_TOKEN or HUGGINGFACEHUB_API_TOKEN"
 #             yield (
+#                 "❌ 401 Unauthorized from HF Inference API.\n\n"
+#                 f"Add a read-scoped token as **{which}** and restart."
 #             )
 #             return
+#         if status == 403:
+#             yield (
+#                 "❌ 403 Forbidden from HF Inference API.\n\n"
+#                 "Model may require Inference Providers permissions & billing. "
+#                 "Either enable those for your token or switch MODEL_ID to a free hosted model."
+#             )
+#             return
+#         if status == 404:
+#             yield (
+#                 f"❌ 404 Not Found for model `{MODEL_ID}` via chat-completions.\n\n"
+#                 "The serverless endpoint is likely unavailable. "
+#                 "Set MODEL_ID to a hosted model (e.g., microsoft/Phi-3-mini-4k-instruct, "
+#                 "google/gemma-2-2b-it, Qwen/Qwen2-1.5B-Instruct) in Space secrets and restart."
+#             )
+#             # fall through to fallback too
 #     except Exception:
+#         pass  # fall through to fallback
+#     # Fallback: plain text_generation with a generic prompt
+#     prompt = _build_generic_prompt(system_message, history, message)
 #     try:
 #         response_text = ""
 #         for tok in client.text_generation(
+#             prompt,
 #             max_new_tokens=max_tokens,
 #             temperature=temperature,
 #             top_p=top_p,
 #             stream=True,
 #         ):
+#             # Manual stop filtering (since 0.22.2 lacks 'stop' kwarg)
+#             if any(s in tok for s in ["</s>", "<|user|>", "<|assistant|>", "<|system|>"]):
+#                 break
 #             if tok:
 #                 response_text += tok
 #                 yield response_text
 #     except HfHubHTTPError as e:
+#         status = getattr(e, "response", None).status_code if getattr(e, "response", None) else None
 #         if status == 401:
+#             which = "HF_TOKEN or HUGGINGFACEHUB_API_TOKEN"
 #             yield (
 #                 "❌ 401 Unauthorized (text_generation fallback).\n\n"
+#                 f"Set **{which}** in Space secrets and restart."
+#             )
+#         elif status == 403:
+#             yield (
+#                 "❌ 403 Forbidden (text_generation fallback).\n\n"
+#                 "Your token lacks 'Use Inference API/Providers' or billing is not enabled. "
+#                 "Either grant those permissions & restart, or set MODEL_ID to a free hosted model."
+#             )
+#         elif status == 404:
+#             yield (
+#                 f"❌ 404 Not Found for model `{MODEL_ID}` via text-generation.\n\n"
+#                 "Switch MODEL_ID to a hosted model (e.g., microsoft/Phi-3-mini-4k-instruct, "
+#                 "google/gemma-2-2b-it, Qwen/Qwen2-1.5B-Instruct) and restart."
 #             )
 #         else:
 #             yield f"[Inference error] {e}"
 #     except Exception as e:
 #         yield f"[Runtime error] {e}"
 # demo = gr.ChatInterface(
 #     respond,
 #     additional_inputs=[
 #         gr.Textbox(
+#             value=("You are a Chatbot who only answers spiritual questions based on Indian scriptures "
+#                    "and declines answering other questions."),
 #             label="System message",
 #         ),
 #         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
 #         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+#         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
 #     ],
 # )
 # if __name__ == "__main__":
+#     demo.launch(share=True)