Update app.py
Browse files
app.py
CHANGED
@@ -9,12 +9,12 @@ from transformers import (
|
|
9 |
# ======================
|
10 |
# 可调参数(也可用 Space 的 Variables 覆盖)
|
11 |
# ======================
|
12 |
-
|
13 |
-
MODEL_ID = os.getenv("MODEL_ID", "happzy2633/qwen2.5-7b-ins-v3").strip()
|
14 |
MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "256"))
|
15 |
-
TEMPERATURE = float(os.getenv("TEMPERATURE", "0.
|
16 |
TOP_P = float(os.getenv("TOP_P", "0.9"))
|
17 |
-
|
|
|
18 |
SAFE_MODE = os.getenv("SAFE_MODE", "1") != "0" # 1=开启基础过滤;想关就设为 0
|
19 |
|
20 |
# ——系统基础提示 + 人设默认(强化禁止泄露思考过程)——
|
@@ -22,8 +22,9 @@ BASE_SYSTEM_PROMPT = os.getenv(
|
|
22 |
"SYSTEM_PROMPT",
|
23 |
"""
|
24 |
You are a helpful, concise chat assistant.
|
25 |
-
Do NOT reveal chain-of-thought, analysis, inner reasoning,
|
26 |
If asked to explain reasoning, provide a brief, high-level summary of steps only.
|
|
|
27 |
"""
|
28 |
).strip()
|
29 |
DEFAULT_PERSONA = os.getenv("PERSONA", "").strip()
|
@@ -101,44 +102,52 @@ def violates(text: str) -> bool:
|
|
101 |
return False
|
102 |
|
103 |
# ======================
|
104 |
-
#
|
105 |
# ======================
|
106 |
-
|
|
|
107 |
|
108 |
-
|
109 |
-
"
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
def
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
# ======================
|
144 |
# 动态长度:根据输入长短调 max_new_tokens
|
@@ -155,11 +164,12 @@ def choose_max_new_tokens(user_text: str) -> int:
|
|
155 |
# ======================
|
156 |
PERSONA_TEMPLATES = {
|
157 |
"默认(Default)": "",
|
158 |
-
"
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
),
|
|
|
163 |
"温柔体贴 Sweet & Caring": (
|
164 |
"风格:温柔、共情、耐心,语气轻柔,简洁而温暖。\n"
|
165 |
"行为:多用复述与情感标注(我理解你感到…),先共情后建议;适度使用表情符号。\n"
|
@@ -170,7 +180,7 @@ PERSONA_TEMPLATES = {
|
|
170 |
"行为:简短吐槽 + 实用建议;保持幽默,避免人身攻击。"
|
171 |
),
|
172 |
"风趣段子手 Witty Jokester": (
|
173 |
-
"
|
174 |
"行为:先给答案,再加简短段子或比喻;避免冷笑话过载。"
|
175 |
),
|
176 |
"严谨导师 Analytical Coach": (
|
@@ -178,7 +188,7 @@ PERSONA_TEMPLATES = {
|
|
178 |
"行为:先澄清目标,再给里程碑与检查清单;引用通用最佳实践。"
|
179 |
),
|
180 |
"冷淡毒舌 Deadpan Sarcasm": (
|
181 |
-
"
|
182 |
"行为:直切重点,短句给结论;如用户受挫,稍作暖场。"
|
183 |
),
|
184 |
"心理咨询同伴 Therapeutic Listener": (
|
@@ -197,27 +207,31 @@ def compose_system_prompt(base_prompt: str, persona_text: str) -> str:
|
|
197 |
f"# Rules\n"
|
198 |
f"- Stay in persona unless the user explicitly asks to change.\n"
|
199 |
f"- Be concise unless the user asks for detail.\n"
|
200 |
-
f"- Do NOT reveal chain-of-thought or <Thought> sections.\n"
|
|
|
201 |
)
|
202 |
|
203 |
# ======================
|
204 |
-
# 构建
|
205 |
# ======================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
def build_prompt(history_msgs, user_msg: str, persona_text: str) -> str:
|
207 |
-
"""
|
208 |
-
history_msgs: Chatbot(type='messages') 的历史 [{role, content}, ...]
|
209 |
-
"""
|
210 |
system_prompt = compose_system_prompt(BASE_SYSTEM_PROMPT, persona_text)
|
211 |
-
messages = [{"role": "system", "content": system_prompt}]
|
212 |
tail = [m for m in history_msgs if m.get("role") in ("user", "assistant")]
|
213 |
tail = tail[-8:] if len(tail) > 8 else tail
|
214 |
-
messages
|
215 |
-
messages
|
216 |
-
|
217 |
-
prompt = tokenizer.apply_chat_template(
|
218 |
-
messages, tokenize=False, add_generation_prompt=True
|
219 |
-
)
|
220 |
-
return prompt
|
221 |
|
222 |
# ======================
|
223 |
# 生成参数(默认档)
|
@@ -225,6 +239,7 @@ def build_prompt(history_msgs, user_msg: str, persona_text: str) -> str:
|
|
225 |
BASE_GEN_KW = dict(
|
226 |
temperature=TEMPERATURE,
|
227 |
top_p=TOP_P,
|
|
|
228 |
repetition_penalty=REPETITION_PENALTY,
|
229 |
do_sample=True,
|
230 |
eos_token_id=tokenizer.eos_token_id,
|
@@ -232,14 +247,14 @@ BASE_GEN_KW = dict(
|
|
232 |
)
|
233 |
|
234 |
# ======================
|
235 |
-
#
|
236 |
# ======================
|
237 |
def stream_chat(history_msgs, user_msg, persona_text):
|
238 |
try:
|
239 |
if not user_msg or not user_msg.strip():
|
240 |
yield history_msgs; return
|
241 |
|
242 |
-
#
|
243 |
if violates(user_msg):
|
244 |
yield history_msgs + [
|
245 |
{"role": "user", "content": user_msg},
|
@@ -256,20 +271,40 @@ def stream_chat(history_msgs, user_msg, persona_text):
|
|
256 |
max_new_tokens=choose_max_new_tokens(user_msg),
|
257 |
**BASE_GEN_KW
|
258 |
)
|
259 |
-
|
260 |
-
|
261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
|
263 |
print("[gen] start")
|
264 |
th = Thread(target=model.generate, kwargs=gen_kwargs, daemon=True)
|
265 |
th.start()
|
266 |
|
267 |
-
|
|
|
|
|
268 |
for chunk in streamer:
|
269 |
-
|
270 |
-
visible =
|
271 |
|
272 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
273 |
if violates(visible):
|
274 |
yield history_msgs + [
|
275 |
{"role": "user", "content": user_msg},
|
@@ -281,7 +316,17 @@ def stream_chat(history_msgs, user_msg, persona_text):
|
|
281 |
{"role": "user", "content": user_msg},
|
282 |
{"role": "assistant", "content": visible},
|
283 |
]
|
284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
|
286 |
except Exception as e:
|
287 |
traceback.print_exc()
|
@@ -303,7 +348,7 @@ def pick_persona(name: str) -> str:
|
|
303 |
return PERSONA_TEMPLATES.get(name or "默认(Default)", "")
|
304 |
|
305 |
with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
|
306 |
-
gr.Markdown("### 懂你寂寞 · Let's Chat\n
|
307 |
|
308 |
# Persona 折叠区
|
309 |
with gr.Accordion("🎭 Persona(人设)", open=False):
|
|
|
9 |
# ======================
|
10 |
# 可调参数(也可用 Space 的 Variables 覆盖)
|
11 |
# ======================
|
12 |
+
MODEL_ID = os.getenv("MODEL_ID", "huihui_ai/qwen2.5-abliterate:7b-instruct").strip()
|
|
|
13 |
MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "256"))
|
14 |
+
TEMPERATURE = float(os.getenv("TEMPERATURE", "0.85")) # 略升,减复读
|
15 |
TOP_P = float(os.getenv("TOP_P", "0.9"))
|
16 |
+
TOP_K = int(os.getenv("TOP_K", "50"))
|
17 |
+
REPETITION_PENALTY = float(os.getenv("REPETITION_PENALTY", "1.12"))
|
18 |
SAFE_MODE = os.getenv("SAFE_MODE", "1") != "0" # 1=开启基础过滤;想关就设为 0
|
19 |
|
20 |
# ——系统基础提示 + 人设默认(强化禁止泄露思考过程)——
|
|
|
22 |
"SYSTEM_PROMPT",
|
23 |
"""
|
24 |
You are a helpful, concise chat assistant.
|
25 |
+
Do NOT reveal chain-of-thought, analysis, inner reasoning, <Thought>, <analysis>, <think>, or similar sections.
|
26 |
If asked to explain reasoning, provide a brief, high-level summary of steps only.
|
27 |
+
The final user-visible answer SHOULD be enclosed in <final> ... </final>. If you don't use <final>, output plain text.
|
28 |
"""
|
29 |
).strip()
|
30 |
DEFAULT_PERSONA = os.getenv("PERSONA", "").strip()
|
|
|
102 |
return False
|
103 |
|
104 |
# ======================
|
105 |
+
# FinalFilter:优先只保留 <final>;没有 <final> 时回退显示全文
|
106 |
# ======================
|
107 |
+
FINAL_OPEN_PAT = re.compile(r"(?is)<\s*final\s*>")
|
108 |
+
FINAL_CLOSE_PAT = re.compile(r"(?is)</\s*final\s*>")
|
109 |
|
110 |
+
class FinalFilter:
|
111 |
+
"""
|
112 |
+
逻辑:
|
113 |
+
- 流式累计 self.buffer = 原始可见文本(不跳过任何内容)
|
114 |
+
- 只要检测到 <final>…</final>,把其中内容追加到 self.visible,并置 seen_final=True
|
115 |
+
- get_visible():
|
116 |
+
- 若 seen_final=True 且 self.visible 非空:返回 self.visible
|
117 |
+
- 否则:返回 self.buffer(回退到“显示全文”,避免空白)
|
118 |
+
"""
|
119 |
+
def __init__(self):
|
120 |
+
self.in_final = False
|
121 |
+
self.seen_final = False
|
122 |
+
self.visible = "" # 已抓到的 <final> 内容
|
123 |
+
self.buffer = "" # 原始累计(用于无 <final> 回退)
|
124 |
+
|
125 |
+
def push(self, chunk: str):
|
126 |
+
self.buffer += chunk # 始终保留原始流(做回退)
|
127 |
+
i = 0
|
128 |
+
while i < len(chunk):
|
129 |
+
if not self.in_final:
|
130 |
+
m = FINAL_OPEN_PAT.search(chunk, i)
|
131 |
+
if not m:
|
132 |
+
# 未进入 <final>,跳过(但 buffer 已包含)
|
133 |
+
break
|
134 |
+
i = m.end()
|
135 |
+
self.in_final = True
|
136 |
+
self.seen_final = True
|
137 |
+
else:
|
138 |
+
m = FINAL_CLOSE_PAT.search(chunk, i)
|
139 |
+
if not m:
|
140 |
+
# final 未闭合,继续累计
|
141 |
+
self.visible += chunk[i:]
|
142 |
+
i = len(chunk)
|
143 |
+
else:
|
144 |
+
self.visible += chunk[i:m.start()]
|
145 |
+
i = m.end()
|
146 |
+
self.in_final = False
|
147 |
+
|
148 |
+
def get_visible(self) -> str:
|
149 |
+
text = (self.visible if self.seen_final and self.visible.strip() else self.buffer)
|
150 |
+
return text.strip()
|
151 |
|
152 |
# ======================
|
153 |
# 动态长度:根据输入长短调 max_new_tokens
|
|
|
164 |
# ======================
|
165 |
PERSONA_TEMPLATES = {
|
166 |
"默认(Default)": "",
|
167 |
+
"性感火辣 Flirty & Spicy": (
|
168 |
+
"风格:挑逗、暧昧、直接,语言热辣大胆,带点坏坏的撩人气息。\n"
|
169 |
+
"行为:多用双关语、调侃与骚话;主动带动暧昧氛围,让对话充满张力;适度使用火焰🔥、唇👄、爱心❤️等表情符号。\n"
|
170 |
+
"边界:不涉及未成年人,不提供医疗/法律/暴力等敏感内容;保持娱乐和幻想氛围,不延伸到现实危险行为。"
|
171 |
),
|
172 |
+
|
173 |
"温柔体贴 Sweet & Caring": (
|
174 |
"风格:温柔、共情、耐心,语气轻柔,简洁而温暖。\n"
|
175 |
"行为:多用复述与情感标注(我理解你感到…),先共情后建议;适度使用表情符号。\n"
|
|
|
180 |
"行为:简短吐槽 + 实用建议;保持幽默,避免人身攻击。"
|
181 |
),
|
182 |
"风趣段子手 Witty Jokester": (
|
183 |
+
"风格:机智幽默、梗多但不低俗;注重包袱节奏。\n"
|
184 |
"行为:先给答案,再加简短段子或比喻;避免冷笑话过载。"
|
185 |
),
|
186 |
"严谨导师 Analytical Coach": (
|
|
|
188 |
"行为:先澄清目标,再给里程碑与检查清单;引用通用最佳实践。"
|
189 |
),
|
190 |
"冷淡毒舌 Deadpan Sarcasm": (
|
191 |
+
"风格:冷面、克制、轻微反讽;不粗鲁不辱骂。\n"
|
192 |
"行为:直切重点,短句给结论;如用户受挫,稍作暖场。"
|
193 |
),
|
194 |
"心理咨询同伴 Therapeutic Listener": (
|
|
|
207 |
f"# Rules\n"
|
208 |
f"- Stay in persona unless the user explicitly asks to change.\n"
|
209 |
f"- Be concise unless the user asks for detail.\n"
|
210 |
+
f"- Do NOT reveal chain-of-thought or <Thought>/<analysis>/<think> sections.\n"
|
211 |
+
f"- Prefer enclosing the final answer in <final> ... </final>.\n"
|
212 |
)
|
213 |
|
214 |
# ======================
|
215 |
+
# 构建 Prompt(messages 形式 → chat_template;无则回退)
|
216 |
# ======================
|
217 |
+
def apply_chat_template_with_fallback(messages):
|
218 |
+
try:
|
219 |
+
return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
220 |
+
except Exception:
|
221 |
+
parts = []
|
222 |
+
role_map = {"system":"system", "user":"user", "assistant":"assistant"}
|
223 |
+
for m in messages:
|
224 |
+
role = role_map.get(m["role"], "user")
|
225 |
+
parts.append(f"<|{role}|>\n{m['content']}\n</s>")
|
226 |
+
parts.append("<|assistant|>\n") # generation prompt
|
227 |
+
return "".join(parts)
|
228 |
+
|
229 |
def build_prompt(history_msgs, user_msg: str, persona_text: str) -> str:
|
|
|
|
|
|
|
230 |
system_prompt = compose_system_prompt(BASE_SYSTEM_PROMPT, persona_text)
|
|
|
231 |
tail = [m for m in history_msgs if m.get("role") in ("user", "assistant")]
|
232 |
tail = tail[-8:] if len(tail) > 8 else tail
|
233 |
+
messages = [{"role": "system", "content": system_prompt}] + tail + [{"role": "user", "content": user_msg}]
|
234 |
+
return apply_chat_template_with_fallback(messages)
|
|
|
|
|
|
|
|
|
|
|
235 |
|
236 |
# ======================
|
237 |
# 生成参数(默认档)
|
|
|
239 |
BASE_GEN_KW = dict(
|
240 |
temperature=TEMPERATURE,
|
241 |
top_p=TOP_P,
|
242 |
+
top_k=TOP_K,
|
243 |
repetition_penalty=REPETITION_PENALTY,
|
244 |
do_sample=True,
|
245 |
eos_token_id=tokenizer.eos_token_id,
|
|
|
247 |
)
|
248 |
|
249 |
# ======================
|
250 |
+
# 主推理:流式输出(优先显示 <final>;无 <final> 则全文回退;含 Persona + 防复读)
|
251 |
# ======================
|
252 |
def stream_chat(history_msgs, user_msg, persona_text):
|
253 |
try:
|
254 |
if not user_msg or not user_msg.strip():
|
255 |
yield history_msgs; return
|
256 |
|
257 |
+
# 原始输入安全检测
|
258 |
if violates(user_msg):
|
259 |
yield history_msgs + [
|
260 |
{"role": "user", "content": user_msg},
|
|
|
271 |
max_new_tokens=choose_max_new_tokens(user_msg),
|
272 |
**BASE_GEN_KW
|
273 |
)
|
274 |
+
|
275 |
+
# 可选停词:减少模型输出思考标签
|
276 |
+
HIDE_THOUGHT = os.getenv("HIDE_THOUGHT", "1") != "0"
|
277 |
+
if HIDE_THOUGHT:
|
278 |
+
bad_strings = [
|
279 |
+
"<Thought>", "</Thought>", "Thought:", "Chain-of-Thought",
|
280 |
+
"<analysis>", "</analysis>", "analysis:", "reasoning:",
|
281 |
+
"<think>", "</think>", "reflection:", "思考过程", "推理过程", "分析:"
|
282 |
+
]
|
283 |
+
bad_words_ids = []
|
284 |
+
for s in bad_strings:
|
285 |
+
ids = tokenizer(s, add_special_tokens=False).input_ids
|
286 |
+
if ids: bad_words_ids.append(ids)
|
287 |
+
if bad_words_ids:
|
288 |
+
gen_kwargs["bad_words_ids"] = bad_words_ids
|
289 |
|
290 |
print("[gen] start")
|
291 |
th = Thread(target=model.generate, kwargs=gen_kwargs, daemon=True)
|
292 |
th.start()
|
293 |
|
294 |
+
ff = FinalFilter()
|
295 |
+
last_len = 0 # 防复读关键:记录“当前展示文本”的已展示长度
|
296 |
+
|
297 |
for chunk in streamer:
|
298 |
+
ff.push(chunk)
|
299 |
+
visible = ff.get_visible() # 若已有 <final> → 是 final 内容;否则 → 原文回退
|
300 |
|
301 |
+
# 新增差量
|
302 |
+
new_text = visible[last_len:]
|
303 |
+
if not new_text:
|
304 |
+
continue
|
305 |
+
last_len = len(visible)
|
306 |
+
|
307 |
+
# 安全检查对当前可见文本进行
|
308 |
if violates(visible):
|
309 |
yield history_msgs + [
|
310 |
{"role": "user", "content": user_msg},
|
|
|
316 |
{"role": "user", "content": user_msg},
|
317 |
{"role": "assistant", "content": visible},
|
318 |
]
|
319 |
+
|
320 |
+
print("[gen] done, shown_len:", last_len)
|
321 |
+
|
322 |
+
# 正常情况下这里不需要兜底,因为没有 <final> 时一路都在显示 buffer。
|
323 |
+
# 但如果你想在完全空白时提醒,可保留以下逻辑:
|
324 |
+
if last_len == 0:
|
325 |
+
hint = "(未产生可见输出,建议重试或更换提示词)"
|
326 |
+
yield history_msgs + [
|
327 |
+
{"role": "user", "content": user_msg},
|
328 |
+
{"role": "assistant", "content": hint},
|
329 |
+
]
|
330 |
|
331 |
except Exception as e:
|
332 |
traceback.print_exc()
|
|
|
348 |
return PERSONA_TEMPLATES.get(name or "默认(Default)", "")
|
349 |
|
350 |
with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
|
351 |
+
gr.Markdown("### 懂你寂寞 · Let's Chat\n")
|
352 |
|
353 |
# Persona 折叠区
|
354 |
with gr.Accordion("🎭 Persona(人设)", open=False):
|