Gilvaa commited on
Commit
1b751e2
·
verified ·
1 Parent(s): 2d701a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -71
app.py CHANGED
@@ -9,12 +9,12 @@ from transformers import (
9
  # ======================
10
  # 可调参数(也可用 Space 的 Variables 覆盖)
11
  # ======================
12
- # MODEL_ID = os.getenv("MODEL_ID", "huihui_ai/qwen2.5-abliterate:7b-instruct").strip()
13
- MODEL_ID = os.getenv("MODEL_ID", "happzy2633/qwen2.5-7b-ins-v3").strip()
14
  MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "256"))
15
- TEMPERATURE = float(os.getenv("TEMPERATURE", "0.75"))
16
  TOP_P = float(os.getenv("TOP_P", "0.9"))
17
- REPETITION_PENALTY = float(os.getenv("REPETITION_PENALTY", "1.08"))
 
18
  SAFE_MODE = os.getenv("SAFE_MODE", "1") != "0" # 1=开启基础过滤;想关就设为 0
19
 
20
  # ——系统基础提示 + 人设默认(强化禁止泄露思考过程)——
@@ -22,8 +22,9 @@ BASE_SYSTEM_PROMPT = os.getenv(
22
  "SYSTEM_PROMPT",
23
  """
24
  You are a helpful, concise chat assistant.
25
- Do NOT reveal chain-of-thought, analysis, inner reasoning, or <Thought> sections.
26
  If asked to explain reasoning, provide a brief, high-level summary of steps only.
 
27
  """
28
  ).strip()
29
  DEFAULT_PERSONA = os.getenv("PERSONA", "").strip()
@@ -101,44 +102,52 @@ def violates(text: str) -> bool:
101
  return False
102
 
103
  # ======================
104
- # 关闭“思考/推理”可见输出(总开关 + 生成屏蔽 + 流式清洗)
105
  # ======================
106
- HIDE_THOUGHT = os.getenv("HIDE_THOUGHT", "1") != "0" # 1=隐藏思考过程
 
107
 
108
- BAD_STRINGS = [
109
- "<Thought>", "</Thought>", "Thought:", "Chain-of-Thought",
110
- "<analysis>", "</analysis>", "analysis:", "reasoning:",
111
- "推理过程", "思考过程", "分析:"
112
- ]
113
-
114
- # generate() 使用的 bad_words_ids
115
- BAD_WORDS_IDS = []
116
- if HIDE_THOUGHT:
117
- for s in BAD_STRINGS:
118
- ids = tokenizer(s, add_special_tokens=False).input_ids
119
- if ids:
120
- BAD_WORDS_IDS.append(ids)
121
-
122
- # 正则清洗(即使未闭合标签也会被隐藏)
123
- def sanitize_visible(text: str) -> str:
124
- if not HIDE_THOUGHT or not text:
125
- return text
126
- # 1) 清除成对标签 <Thought> ... </Thought> / <analysis> ... </analysis>
127
- text = re.sub(
128
- r"(?is)<\s*(thought|analysis|chain[_\s-]?of[_\s-]?thought)\s*>.*?</\s*\1\s*>",
129
- "", text
130
- )
131
- # 2) 清除从开标签开始到末尾(处理未闭合情况)
132
- text = re.sub(
133
- r"(?is)<\s*(thought|analysis|chain[_\s-]?of[_\s-]?thought)\s*>.*$",
134
- "", text
135
- )
136
- # 3) 清常见行前缀(Thought: / analysis: / reasoning: / 思考过程 / 分析:)
137
- text = re.sub(
138
- r"(?im)^\s*(Thought:|analysis:|reasoning:|思考过程|推理过程|分析:).*$",
139
- "", text
140
- )
141
- return text.strip()
 
 
 
 
 
 
 
142
 
143
  # ======================
144
  # 动态长度:根据输入长短调 max_new_tokens
@@ -155,11 +164,12 @@ def choose_max_new_tokens(user_text: str) -> int:
155
  # ======================
156
  PERSONA_TEMPLATES = {
157
  "默认(Default)": "",
158
- "风骚御姐": (
159
- "风格:风骚、骚话连篇、性感、露骨、挑逗、性欲\n"
160
- "行为:先共情后建议;适度使用表情符号。\n"
161
- "边界:不提供医疗/法律结论;出现风险话题时建议求助专业人士。"
162
  ),
 
163
  "温柔体贴 Sweet & Caring": (
164
  "风格:温柔、共情、耐心,语气轻柔,简洁而温暖。\n"
165
  "行为:多用复述与情感标注(我理解你感到…),先共情后建议;适度使用表情符号。\n"
@@ -170,7 +180,7 @@ PERSONA_TEMPLATES = {
170
  "行为:简短吐槽 + 实用建议;保持幽默,避免人身攻击。"
171
  ),
172
  "风趣段子手 Witty Jokester": (
173
- "风格:机智幽默、梗多低俗;注重包袱节奏。\n"
174
  "行为:先给答案,再加简短段子或比喻;避免冷笑话过载。"
175
  ),
176
  "严谨导师 Analytical Coach": (
@@ -178,7 +188,7 @@ PERSONA_TEMPLATES = {
178
  "行为:先澄清目标,再给里程碑与检查清单;引用通用最佳实践。"
179
  ),
180
  "冷淡毒舌 Deadpan Sarcasm": (
181
- "风格:冷面、反讽、粗鲁、辱骂。\n"
182
  "行为:直切重点,短句给结论;如用户受挫,稍作暖场。"
183
  ),
184
  "心理咨询同伴 Therapeutic Listener": (
@@ -197,27 +207,31 @@ def compose_system_prompt(base_prompt: str, persona_text: str) -> str:
197
  f"# Rules\n"
198
  f"- Stay in persona unless the user explicitly asks to change.\n"
199
  f"- Be concise unless the user asks for detail.\n"
200
- f"- Do NOT reveal chain-of-thought or <Thought> sections.\n"
 
201
  )
202
 
203
  # ======================
204
- # 构建 Qwen 模板 Prompt(messages 形式 → chat_template
205
  # ======================
 
 
 
 
 
 
 
 
 
 
 
 
206
  def build_prompt(history_msgs, user_msg: str, persona_text: str) -> str:
207
- """
208
- history_msgs: Chatbot(type='messages') 的历史 [{role, content}, ...]
209
- """
210
  system_prompt = compose_system_prompt(BASE_SYSTEM_PROMPT, persona_text)
211
- messages = [{"role": "system", "content": system_prompt}]
212
  tail = [m for m in history_msgs if m.get("role") in ("user", "assistant")]
213
  tail = tail[-8:] if len(tail) > 8 else tail
214
- messages.extend(tail)
215
- messages.append({"role": "user", "content": user_msg})
216
-
217
- prompt = tokenizer.apply_chat_template(
218
- messages, tokenize=False, add_generation_prompt=True
219
- )
220
- return prompt
221
 
222
  # ======================
223
  # 生成参数(默认档)
@@ -225,6 +239,7 @@ def build_prompt(history_msgs, user_msg: str, persona_text: str) -> str:
225
  BASE_GEN_KW = dict(
226
  temperature=TEMPERATURE,
227
  top_p=TOP_P,
 
228
  repetition_penalty=REPETITION_PENALTY,
229
  do_sample=True,
230
  eos_token_id=tokenizer.eos_token_id,
@@ -232,14 +247,14 @@ BASE_GEN_KW = dict(
232
  )
233
 
234
  # ======================
235
- # 主推理:流式输出(含 persona + 思考清洗)
236
  # ======================
237
  def stream_chat(history_msgs, user_msg, persona_text):
238
  try:
239
  if not user_msg or not user_msg.strip():
240
  yield history_msgs; return
241
 
242
- # 先用原始用户输入做安全检测
243
  if violates(user_msg):
244
  yield history_msgs + [
245
  {"role": "user", "content": user_msg},
@@ -256,20 +271,40 @@ def stream_chat(history_msgs, user_msg, persona_text):
256
  max_new_tokens=choose_max_new_tokens(user_msg),
257
  **BASE_GEN_KW
258
  )
259
- # 仅在需要时传入 bad_words_ids
260
- if HIDE_THOUGHT and BAD_WORDS_IDS:
261
- gen_kwargs["bad_words_ids"] = BAD_WORDS_IDS
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
  print("[gen] start")
264
  th = Thread(target=model.generate, kwargs=gen_kwargs, daemon=True)
265
  th.start()
266
 
267
- reply = ""
 
 
268
  for chunk in streamer:
269
- reply += chunk
270
- visible = sanitize_visible(reply) # 每步清洗
271
 
272
- # 用可见文本做安全检测与展示
 
 
 
 
 
 
273
  if violates(visible):
274
  yield history_msgs + [
275
  {"role": "user", "content": user_msg},
@@ -281,7 +316,17 @@ def stream_chat(history_msgs, user_msg, persona_text):
281
  {"role": "user", "content": user_msg},
282
  {"role": "assistant", "content": visible},
283
  ]
284
- print("[gen] done, len:", len(reply))
 
 
 
 
 
 
 
 
 
 
285
 
286
  except Exception as e:
287
  traceback.print_exc()
@@ -303,7 +348,7 @@ def pick_persona(name: str) -> str:
303
  return PERSONA_TEMPLATES.get(name or "默认(Default)", "")
304
 
305
  with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
306
- gr.Markdown("### 懂你寂寞 · Let's Chat\n ")
307
 
308
  # Persona 折叠区
309
  with gr.Accordion("🎭 Persona(人设)", open=False):
 
9
  # ======================
10
  # 可调参数(也可用 Space 的 Variables 覆盖)
11
  # ======================
12
+ MODEL_ID = os.getenv("MODEL_ID", "huihui_ai/qwen2.5-abliterate:7b-instruct").strip()
 
13
  MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "256"))
14
+ TEMPERATURE = float(os.getenv("TEMPERATURE", "0.85")) # 略升,减复读
15
  TOP_P = float(os.getenv("TOP_P", "0.9"))
16
+ TOP_K = int(os.getenv("TOP_K", "50"))
17
+ REPETITION_PENALTY = float(os.getenv("REPETITION_PENALTY", "1.12"))
18
  SAFE_MODE = os.getenv("SAFE_MODE", "1") != "0" # 1=开启基础过滤;想关就设为 0
19
 
20
  # ——系统基础提示 + 人设默认(强化禁止泄露思考过程)——
 
22
  "SYSTEM_PROMPT",
23
  """
24
  You are a helpful, concise chat assistant.
25
+ Do NOT reveal chain-of-thought, analysis, inner reasoning, <Thought>, <analysis>, <think>, or similar sections.
26
  If asked to explain reasoning, provide a brief, high-level summary of steps only.
27
+ The final user-visible answer SHOULD be enclosed in <final> ... </final>. If you don't use <final>, output plain text.
28
  """
29
  ).strip()
30
  DEFAULT_PERSONA = os.getenv("PERSONA", "").strip()
 
102
  return False
103
 
104
  # ======================
105
+ # FinalFilter:优先只保留 <final>;没有 <final> 时回退显示全文
106
  # ======================
107
+ FINAL_OPEN_PAT = re.compile(r"(?is)<\s*final\s*>")
108
+ FINAL_CLOSE_PAT = re.compile(r"(?is)</\s*final\s*>")
109
 
110
+ class FinalFilter:
111
+ """
112
+ 逻辑:
113
+ - 流式累计 self.buffer = 原始可见文本(不跳过任何内容)
114
+ - 只要检测到 <final>…</final>,把其中内容追加到 self.visible,并置 seen_final=True
115
+ - get_visible():
116
+ - seen_final=True self.visible 非空:返回 self.visible
117
+ - 否则:返回 self.buffer(回退到“显示全文”,避免空白)
118
+ """
119
+ def __init__(self):
120
+ self.in_final = False
121
+ self.seen_final = False
122
+ self.visible = "" # 已抓到的 <final> 内容
123
+ self.buffer = "" # 原始累计(用于无 <final> 回退)
124
+
125
+ def push(self, chunk: str):
126
+ self.buffer += chunk # 始终保留原始流(做回退)
127
+ i = 0
128
+ while i < len(chunk):
129
+ if not self.in_final:
130
+ m = FINAL_OPEN_PAT.search(chunk, i)
131
+ if not m:
132
+ # 未进入 <final>,跳过(但 buffer 已包含)
133
+ break
134
+ i = m.end()
135
+ self.in_final = True
136
+ self.seen_final = True
137
+ else:
138
+ m = FINAL_CLOSE_PAT.search(chunk, i)
139
+ if not m:
140
+ # final 未闭合,继续累计
141
+ self.visible += chunk[i:]
142
+ i = len(chunk)
143
+ else:
144
+ self.visible += chunk[i:m.start()]
145
+ i = m.end()
146
+ self.in_final = False
147
+
148
+ def get_visible(self) -> str:
149
+ text = (self.visible if self.seen_final and self.visible.strip() else self.buffer)
150
+ return text.strip()
151
 
152
  # ======================
153
  # 动态长度:根据输入长短调 max_new_tokens
 
164
  # ======================
165
  PERSONA_TEMPLATES = {
166
  "默认(Default)": "",
167
+ "性感火辣 Flirty & Spicy": (
168
+ "风格:挑逗、暧昧、直接,语言热辣大胆,带点坏坏的撩人气息。\n"
169
+ "行为:多用双关语、调侃与骚话;主动带动暧昧氛围,让对话充满张力;适度使用火焰🔥、唇👄、爱心❤️等表情符号。\n"
170
+ "边界:不涉及未成年人,不提供医疗/法律/暴力等敏感内容;保持娱乐和幻想氛围,不延伸到现实危险行为。"
171
  ),
172
+
173
  "温柔体贴 Sweet & Caring": (
174
  "风格:温柔、共情、耐心,语气轻柔,简洁而温暖。\n"
175
  "行为:多用复述与情感标注(我理解你感到…),先共情后建议;适度使用表情符号。\n"
 
180
  "行为:简短吐槽 + 实用建议;保持幽默,避免人身攻击。"
181
  ),
182
  "风趣段子手 Witty Jokester": (
183
+ "风格:机智幽默、梗多但不低俗;注重包袱节奏。\n"
184
  "行为:先给答案,再加简短段子或比喻;避免冷笑话过载。"
185
  ),
186
  "严谨导师 Analytical Coach": (
 
188
  "行为:先澄清目标,再给里程碑与检查清单;引用通用最佳实践。"
189
  ),
190
  "冷淡毒舌 Deadpan Sarcasm": (
191
+ "风格:冷面、克制、轻微反讽;不粗鲁不辱骂。\n"
192
  "行为:直切重点,短句给结论;如用户受挫,稍作暖场。"
193
  ),
194
  "心理咨询同伴 Therapeutic Listener": (
 
207
  f"# Rules\n"
208
  f"- Stay in persona unless the user explicitly asks to change.\n"
209
  f"- Be concise unless the user asks for detail.\n"
210
+ f"- Do NOT reveal chain-of-thought or <Thought>/<analysis>/<think> sections.\n"
211
+ f"- Prefer enclosing the final answer in <final> ... </final>.\n"
212
  )
213
 
214
  # ======================
215
+ # 构建 Prompt(messages 形式 → chat_template;无则回退)
216
  # ======================
217
+ def apply_chat_template_with_fallback(messages):
218
+ try:
219
+ return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
220
+ except Exception:
221
+ parts = []
222
+ role_map = {"system":"system", "user":"user", "assistant":"assistant"}
223
+ for m in messages:
224
+ role = role_map.get(m["role"], "user")
225
+ parts.append(f"<|{role}|>\n{m['content']}\n</s>")
226
+ parts.append("<|assistant|>\n") # generation prompt
227
+ return "".join(parts)
228
+
229
  def build_prompt(history_msgs, user_msg: str, persona_text: str) -> str:
 
 
 
230
  system_prompt = compose_system_prompt(BASE_SYSTEM_PROMPT, persona_text)
 
231
  tail = [m for m in history_msgs if m.get("role") in ("user", "assistant")]
232
  tail = tail[-8:] if len(tail) > 8 else tail
233
+ messages = [{"role": "system", "content": system_prompt}] + tail + [{"role": "user", "content": user_msg}]
234
+ return apply_chat_template_with_fallback(messages)
 
 
 
 
 
235
 
236
  # ======================
237
  # 生成参数(默认档)
 
239
  BASE_GEN_KW = dict(
240
  temperature=TEMPERATURE,
241
  top_p=TOP_P,
242
+ top_k=TOP_K,
243
  repetition_penalty=REPETITION_PENALTY,
244
  do_sample=True,
245
  eos_token_id=tokenizer.eos_token_id,
 
247
  )
248
 
249
  # ======================
250
+ # 主推理:流式输出(优先显示 <final>;无 <final> 则全文回退;含 Persona + 防复读)
251
  # ======================
252
  def stream_chat(history_msgs, user_msg, persona_text):
253
  try:
254
  if not user_msg or not user_msg.strip():
255
  yield history_msgs; return
256
 
257
+ # 原始输入安全检测
258
  if violates(user_msg):
259
  yield history_msgs + [
260
  {"role": "user", "content": user_msg},
 
271
  max_new_tokens=choose_max_new_tokens(user_msg),
272
  **BASE_GEN_KW
273
  )
274
+
275
+ # 可选停词:减少模型输出思考标签
276
+ HIDE_THOUGHT = os.getenv("HIDE_THOUGHT", "1") != "0"
277
+ if HIDE_THOUGHT:
278
+ bad_strings = [
279
+ "<Thought>", "</Thought>", "Thought:", "Chain-of-Thought",
280
+ "<analysis>", "</analysis>", "analysis:", "reasoning:",
281
+ "<think>", "</think>", "reflection:", "思考过程", "推理过程", "分析:"
282
+ ]
283
+ bad_words_ids = []
284
+ for s in bad_strings:
285
+ ids = tokenizer(s, add_special_tokens=False).input_ids
286
+ if ids: bad_words_ids.append(ids)
287
+ if bad_words_ids:
288
+ gen_kwargs["bad_words_ids"] = bad_words_ids
289
 
290
  print("[gen] start")
291
  th = Thread(target=model.generate, kwargs=gen_kwargs, daemon=True)
292
  th.start()
293
 
294
+ ff = FinalFilter()
295
+ last_len = 0 # 防复读关键:记录“当前展示文本”的已展示长度
296
+
297
  for chunk in streamer:
298
+ ff.push(chunk)
299
+ visible = ff.get_visible() # 若已有 <final> → 是 final 内容;否则 → 原文回退
300
 
301
+ # 新增差量
302
+ new_text = visible[last_len:]
303
+ if not new_text:
304
+ continue
305
+ last_len = len(visible)
306
+
307
+ # 安全检查对当前可见文本进行
308
  if violates(visible):
309
  yield history_msgs + [
310
  {"role": "user", "content": user_msg},
 
316
  {"role": "user", "content": user_msg},
317
  {"role": "assistant", "content": visible},
318
  ]
319
+
320
+ print("[gen] done, shown_len:", last_len)
321
+
322
+ # 正常情况下这里不需要兜底,因为没有 <final> 时一路都在显示 buffer。
323
+ # 但如果你想在完全空白时提醒,可保留以下逻辑:
324
+ if last_len == 0:
325
+ hint = "(未产生可见输出,建议重试或更换提示词)"
326
+ yield history_msgs + [
327
+ {"role": "user", "content": user_msg},
328
+ {"role": "assistant", "content": hint},
329
+ ]
330
 
331
  except Exception as e:
332
  traceback.print_exc()
 
348
  return PERSONA_TEMPLATES.get(name or "默认(Default)", "")
349
 
350
  with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
351
+ gr.Markdown("### 懂你寂寞 · Let's Chat\n")
352
 
353
  # Persona 折叠区
354
  with gr.Accordion("🎭 Persona(人设)", open=False):