Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,6 @@ import gradio as gr
|
|
4 |
from huggingface_hub import hf_hub_download
|
5 |
from llama_cpp import Llama
|
6 |
|
7 |
-
# مدل خیلی کوچک برای سرعت
|
8 |
REPO_ID = os.getenv("GGUF_REPO_ID", "bartowski/SmolLM2-135M-Instruct-GGUF")
|
9 |
FILENAME = os.getenv("GGUF_FILENAME", "SmolLM2-135M-Instruct-Q4_K_M.gguf")
|
10 |
|
@@ -29,36 +28,50 @@ def load_llm():
|
|
29 |
SYSTEM_PROMPT = "به فارسی، روشن و کوتاه پاسخ بده (حداکثر ۲ جمله)."
|
30 |
|
31 |
def respond(message, history):
|
32 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
msgs = [{"role": "system", "content": SYSTEM_PROMPT}]
|
34 |
if history and isinstance(history[0], dict) and "role" in history[0]:
|
35 |
-
msgs.extend(history)
|
36 |
else:
|
37 |
for user, assistant in (history or []):
|
38 |
msgs.append({"role": "user", "content": user})
|
39 |
msgs.append({"role": "assistant", "content": assistant})
|
40 |
-
msgs.append({"role": "user", "content":
|
41 |
|
42 |
llm = load_llm()
|
43 |
-
print(">> gen start")
|
44 |
-
|
45 |
-
messages=msgs,
|
46 |
-
max_tokens=64,
|
47 |
-
temperature=0.4,
|
48 |
-
top_p=0.9,
|
49 |
-
stream=True,
|
50 |
-
)
|
51 |
partial = ""
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
demo = gr.ChatInterface(
|
60 |
fn=respond,
|
61 |
-
type="messages", #
|
62 |
title="چتبات خیلی ساده (CPU رایگان)",
|
63 |
description="SmolLM2-135M (GGUF) با llama.cpp روی CPU.",
|
64 |
)
|
|
|
4 |
from huggingface_hub import hf_hub_download
|
5 |
from llama_cpp import Llama
|
6 |
|
|
|
7 |
REPO_ID = os.getenv("GGUF_REPO_ID", "bartowski/SmolLM2-135M-Instruct-GGUF")
|
8 |
FILENAME = os.getenv("GGUF_FILENAME", "SmolLM2-135M-Instruct-Q4_K_M.gguf")
|
9 |
|
|
|
28 |
SYSTEM_PROMPT = "به فارسی، روشن و کوتاه پاسخ بده (حداکثر ۲ جمله)."
|
29 |
|
30 |
def respond(message, history):
|
31 |
+
# 1) message میتونه dict یا str باشد
|
32 |
+
if isinstance(message, dict):
|
33 |
+
user_text = message.get("content", "")
|
34 |
+
else:
|
35 |
+
user_text = str(message or "")
|
36 |
+
|
37 |
+
# 2) history میتونه tuples یا messages باشد
|
38 |
msgs = [{"role": "system", "content": SYSTEM_PROMPT}]
|
39 |
if history and isinstance(history[0], dict) and "role" in history[0]:
|
40 |
+
msgs.extend(history)
|
41 |
else:
|
42 |
for user, assistant in (history or []):
|
43 |
msgs.append({"role": "user", "content": user})
|
44 |
msgs.append({"role": "assistant", "content": assistant})
|
45 |
+
msgs.append({"role": "user", "content": user_text})
|
46 |
|
47 |
llm = load_llm()
|
48 |
+
print(">> gen start")
|
49 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
partial = ""
|
51 |
+
try:
|
52 |
+
for chunk in llm.create_chat_completion(
|
53 |
+
messages=msgs,
|
54 |
+
max_tokens=64,
|
55 |
+
temperature=0.4,
|
56 |
+
top_p=0.9,
|
57 |
+
stream=True,
|
58 |
+
):
|
59 |
+
choice = chunk["choices"][0]
|
60 |
+
delta = choice.get("delta") or {}
|
61 |
+
token = delta.get("content") or ""
|
62 |
+
if token:
|
63 |
+
partial += token
|
64 |
+
yield partial
|
65 |
+
if choice.get("finish_reason"):
|
66 |
+
break
|
67 |
+
print(">> gen done")
|
68 |
+
except Exception as e:
|
69 |
+
print(">> exception:", repr(e))
|
70 |
+
yield "متاسفم، در تولید پاسخ خطایی رخ داد. دوباره امتحان کن."
|
71 |
|
72 |
demo = gr.ChatInterface(
|
73 |
fn=respond,
|
74 |
+
type="messages", # تاریخچه را با role/content میفرستد
|
75 |
title="چتبات خیلی ساده (CPU رایگان)",
|
76 |
description="SmolLM2-135M (GGUF) با llama.cpp روی CPU.",
|
77 |
)
|