Spaces:

2ba
/

babyLLM

Sleeping

App Files Files Community

2ba commited on 15 days ago

Commit

08658e2

verified ·

1 Parent(s): 19ec252

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -19

app.py CHANGED Viewed

@@ -4,7 +4,6 @@ import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
-# مدل خیلی کوچک برای سرعت
 REPO_ID = os.getenv("GGUF_REPO_ID", "bartowski/SmolLM2-135M-Instruct-GGUF")
 FILENAME = os.getenv("GGUF_FILENAME", "SmolLM2-135M-Instruct-Q4_K_M.gguf")
@@ -29,36 +28,50 @@ def load_llm():
 SYSTEM_PROMPT = "به فارسی، روشن و کوتاه پاسخ بده (حداکثر ۲ جمله)."
 def respond(message, history):
-    # تاریخچه می‌تونه tuples یا messages باشه؛ هر دو رو پشتیبانی می‌کنیم
     msgs = [{"role": "system", "content": SYSTEM_PROMPT}]
     if history and isinstance(history[0], dict) and "role" in history[0]:
-        msgs.extend(history)  # already messages format
     else:
         for user, assistant in (history or []):
             msgs.append({"role": "user", "content": user})
             msgs.append({"role": "assistant", "content": assistant})
-    msgs.append({"role": "user", "content": message})
     llm = load_llm()
-    print(">> gen start")  # برای لاگ
-    stream = llm.create_chat_completion(
-        messages=msgs,
-        max_tokens=64,
-        temperature=0.4,
-        top_p=0.9,
-        stream=True,
-    )
     partial = ""
-    for chunk in stream:
-        delta = chunk["choices"][0]["delta"].get("content", "")
-        if delta:
-            partial += delta
-            yield partial
-    print(">> gen done")   # برای لاگ
 demo = gr.ChatInterface(
     fn=respond,
-    type="messages",  # هشدار Gradio هم برطرف می‌شود
     title="چت‌بات خیلی ساده (CPU رایگان)",
     description="SmolLM2-135M (GGUF) با llama.cpp روی CPU.",
 )

 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 REPO_ID = os.getenv("GGUF_REPO_ID", "bartowski/SmolLM2-135M-Instruct-GGUF")
 FILENAME = os.getenv("GGUF_FILENAME", "SmolLM2-135M-Instruct-Q4_K_M.gguf")
 SYSTEM_PROMPT = "به فارسی، روشن و کوتاه پاسخ بده (حداکثر ۲ جمله)."
 def respond(message, history):
+    # 1) message می‌تونه dict یا str باشد
+    if isinstance(message, dict):
+        user_text = message.get("content", "")
+    else:
+        user_text = str(message or "")
+    # 2) history می‌تونه tuples یا messages باشد
     msgs = [{"role": "system", "content": SYSTEM_PROMPT}]
     if history and isinstance(history[0], dict) and "role" in history[0]:
+        msgs.extend(history)
     else:
         for user, assistant in (history or []):
             msgs.append({"role": "user", "content": user})
             msgs.append({"role": "assistant", "content": assistant})
+    msgs.append({"role": "user", "content": user_text})
     llm = load_llm()
+    print(">> gen start")
     partial = ""
+    try:
+        for chunk in llm.create_chat_completion(
+            messages=msgs,
+            max_tokens=64,
+            temperature=0.4,
+            top_p=0.9,
+            stream=True,
+        ):
+            choice = chunk["choices"][0]
+            delta = choice.get("delta") or {}
+            token = delta.get("content") or ""
+            if token:
+                partial += token
+                yield partial
+            if choice.get("finish_reason"):
+                break
+        print(">> gen done")
+    except Exception as e:
+        print(">> exception:", repr(e))
+        yield "متاسفم، در تولید پاسخ خطایی رخ داد. دوباره امتحان کن."
 demo = gr.ChatInterface(
     fn=respond,
+    type="messages",  # تاریخچه را با role/content می‌فرستد
     title="چت‌بات خیلی ساده (CPU رایگان)",
     description="SmolLM2-135M (GGUF) با llama.cpp روی CPU.",
 )