Spaces:

Gilvaa
/

lovass

Sleeping

App Files Files Community

Gilvaa commited on 26 days ago

Commit

1b751e2

verified ·

1 Parent(s): 2d701a4

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -71

app.py CHANGED Viewed

@@ -9,12 +9,12 @@ from transformers import (
 # ======================
 # 可调参数（也可用 Space 的 Variables 覆盖）
 # ======================
-# MODEL_ID = os.getenv("MODEL_ID", "huihui_ai/qwen2.5-abliterate:7b-instruct").strip()
-MODEL_ID = os.getenv("MODEL_ID", "happzy2633/qwen2.5-7b-ins-v3").strip()
 MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "256"))
-TEMPERATURE = float(os.getenv("TEMPERATURE", "0.75"))
 TOP_P = float(os.getenv("TOP_P", "0.9"))
-REPETITION_PENALTY = float(os.getenv("REPETITION_PENALTY", "1.08"))
 SAFE_MODE = os.getenv("SAFE_MODE", "1") != "0"  # 1=开启基础过滤；想关就设为 0
 # ——系统基础提示 + 人设默认（强化禁止泄露思考过程）——
@@ -22,8 +22,9 @@ BASE_SYSTEM_PROMPT = os.getenv(
     "SYSTEM_PROMPT",
     """
 You are a helpful, concise chat assistant.
-Do NOT reveal chain-of-thought, analysis, inner reasoning, or <Thought> sections.
 If asked to explain reasoning, provide a brief, high-level summary of steps only.
     """
 ).strip()
 DEFAULT_PERSONA = os.getenv("PERSONA", "").strip()
@@ -101,44 +102,52 @@ def violates(text: str) -> bool:
     return False
 # ======================
-# 关闭“思考/推理”可见输出（总开关 + 生成屏蔽 + 流式清洗）
 # ======================
-HIDE_THOUGHT = os.getenv("HIDE_THOUGHT", "1") != "0"  # 1=隐藏思考过程
-BAD_STRINGS = [
-    "<Thought>", "</Thought>", "Thought:", "Chain-of-Thought",
-    "<analysis>", "</analysis>", "analysis:", "reasoning:",
-    "推理过程", "思考过程", "分析："
-]
-# 供 generate() 使用的 bad_words_ids
-BAD_WORDS_IDS = []
-if HIDE_THOUGHT:
-    for s in BAD_STRINGS:
-        ids = tokenizer(s, add_special_tokens=False).input_ids
-        if ids:
-            BAD_WORDS_IDS.append(ids)
-# 正则清洗（即使未闭合标签也会被隐藏）
-def sanitize_visible(text: str) -> str:
-    if not HIDE_THOUGHT or not text:
-        return text
-    # 1) 清除成对标签 <Thought> ... </Thought> / <analysis> ... </analysis>
-    text = re.sub(
-        r"(?is)<\s*(thought|analysis|chain[_\s-]?of[_\s-]?thought)\s*>.*?</\s*\1\s*>",
-        "", text
-    )
-    # 2) 清除从开标签开始到末尾（处理未闭合情况）
-    text = re.sub(
-        r"(?is)<\s*(thought|analysis|chain[_\s-]?of[_\s-]?thought)\s*>.*$",
-        "", text
-    )
-    # 3) 清常见行前缀（Thought: / analysis: / reasoning: / 思考过程 / 分析：）
-    text = re.sub(
-        r"(?im)^\s*(Thought:|analysis:|reasoning:|思考过程|推理过程|分析：).*$",
-        "", text
-    )
-    return text.strip()
 # ======================
 # 动态长度：根据输入长短调 max_new_tokens
@@ -155,11 +164,12 @@ def choose_max_new_tokens(user_text: str) -> int:
 # ======================
 PERSONA_TEMPLATES = {
     "默认(Default)": "",
-    "风骚御姐": (
-        "风格：风骚、骚话连篇、性感、露骨、挑逗、性欲\n"
-        "行为：先共情后建议；适度使用表情符号。\n"
-        "边界：不提供医疗/法律结论；出现风险话题时建议求助专业人士。"
     ),
     "温柔体贴 Sweet & Caring": (
         "风格：温柔、共情、耐心，语气轻柔，简洁而温暖。\n"
         "行为：多用复述与情感标注（我理解你感到…），先共情后建议；适度使用表情符号。\n"
@@ -170,7 +180,7 @@ PERSONA_TEMPLATES = {
         "行为：简短吐槽 + 实用建议；保持幽默，避免人身攻击。"
     ),
     "风趣段子手 Witty Jokester": (
-        "风格：机智幽默、梗多低俗；注重包袱节奏。\n"
         "行为：先给答案，再加简短段子或比喻；避免冷笑话过载。"
     ),
     "严谨导师 Analytical Coach": (
@@ -178,7 +188,7 @@ PERSONA_TEMPLATES = {
         "行为：先澄清目标，再给里程碑与检查清单；引用通用最佳实践。"
     ),
     "冷淡毒舌 Deadpan Sarcasm": (
-        "风格：冷面、反讽、粗鲁、辱骂。\n"
         "行为：直切重点，短句给结论；如用户受挫，稍作暖场。"
     ),
     "心理咨询同伴 Therapeutic Listener": (
@@ -197,27 +207,31 @@ def compose_system_prompt(base_prompt: str, persona_text: str) -> str:
         f"# Rules\n"
         f"- Stay in persona unless the user explicitly asks to change.\n"
         f"- Be concise unless the user asks for detail.\n"
-        f"- Do NOT reveal chain-of-thought or <Thought> sections.\n"
     )
 # ======================
-# 构建 Qwen 模板 Prompt（messages 形式 → chat_template）
 # ======================
 def build_prompt(history_msgs, user_msg: str, persona_text: str) -> str:
-    """
-    history_msgs: Chatbot(type='messages') 的历史 [{role, content}, ...]
-    """
     system_prompt = compose_system_prompt(BASE_SYSTEM_PROMPT, persona_text)
-    messages = [{"role": "system", "content": system_prompt}]
     tail = [m for m in history_msgs if m.get("role") in ("user", "assistant")]
     tail = tail[-8:] if len(tail) > 8 else tail
-    messages.extend(tail)
-    messages.append({"role": "user", "content": user_msg})
-    prompt = tokenizer.apply_chat_template(
-        messages, tokenize=False, add_generation_prompt=True
-    )
-    return prompt
 # ======================
 # 生成参数（默认档）
@@ -225,6 +239,7 @@ def build_prompt(history_msgs, user_msg: str, persona_text: str) -> str:
 BASE_GEN_KW = dict(
     temperature=TEMPERATURE,
     top_p=TOP_P,
     repetition_penalty=REPETITION_PENALTY,
     do_sample=True,
     eos_token_id=tokenizer.eos_token_id,
@@ -232,14 +247,14 @@ BASE_GEN_KW = dict(
 )
 # ======================
-# 主推理：流式输出（含 persona + 思考清洗）
 # ======================
 def stream_chat(history_msgs, user_msg, persona_text):
     try:
         if not user_msg or not user_msg.strip():
             yield history_msgs; return
-        # 先用原始用户输入做安全检测
         if violates(user_msg):
             yield history_msgs + [
                 {"role": "user", "content": user_msg},
@@ -256,20 +271,40 @@ def stream_chat(history_msgs, user_msg, persona_text):
             max_new_tokens=choose_max_new_tokens(user_msg),
             **BASE_GEN_KW
         )
-        # 仅在需要时传入 bad_words_ids
-        if HIDE_THOUGHT and BAD_WORDS_IDS:
-            gen_kwargs["bad_words_ids"] = BAD_WORDS_IDS
         print("[gen] start")
         th = Thread(target=model.generate, kwargs=gen_kwargs, daemon=True)
         th.start()
-        reply = ""
         for chunk in streamer:
-            reply += chunk
-            visible = sanitize_visible(reply)  # 每步清洗
-            # 用可见文本做安全检测与展示
             if violates(visible):
                 yield history_msgs + [
                     {"role": "user", "content": user_msg},
@@ -281,7 +316,17 @@ def stream_chat(history_msgs, user_msg, persona_text):
                 {"role": "user", "content": user_msg},
                 {"role": "assistant", "content": visible},
             ]
-        print("[gen] done, len:", len(reply))
     except Exception as e:
         traceback.print_exc()
@@ -303,7 +348,7 @@ def pick_persona(name: str) -> str:
     return PERSONA_TEMPLATES.get(name or "默认(Default)", "")
 with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
-    gr.Markdown("### 懂你寂寞 · Let's Chat\n ")
     # Persona 折叠区
     with gr.Accordion("🎭 Persona（人设）", open=False):

 # ======================
 # 可调参数（也可用 Space 的 Variables 覆盖）
 # ======================
+MODEL_ID = os.getenv("MODEL_ID", "huihui_ai/qwen2.5-abliterate:7b-instruct").strip()
 MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", "256"))
+TEMPERATURE = float(os.getenv("TEMPERATURE", "0.85"))   # 略升，减复读
 TOP_P = float(os.getenv("TOP_P", "0.9"))
+TOP_K = int(os.getenv("TOP_K", "50"))
+REPETITION_PENALTY = float(os.getenv("REPETITION_PENALTY", "1.12"))
 SAFE_MODE = os.getenv("SAFE_MODE", "1") != "0"  # 1=开启基础过滤；想关就设为 0
 # ——系统基础提示 + 人设默认（强化禁止泄露思考过程）——
     "SYSTEM_PROMPT",
     """
 You are a helpful, concise chat assistant.
+Do NOT reveal chain-of-thought, analysis, inner reasoning, <Thought>, <analysis>, <think>, or similar sections.
 If asked to explain reasoning, provide a brief, high-level summary of steps only.
+The final user-visible answer SHOULD be enclosed in <final> ... </final>. If you don't use <final>, output plain text.
     """
 ).strip()
 DEFAULT_PERSONA = os.getenv("PERSONA", "").strip()
     return False
 # ======================
+# FinalFilter：优先只保留 <final>；没有 <final> 时回退显示全文
 # ======================
+FINAL_OPEN_PAT = re.compile(r"(?is)<\s*final\s*>")
+FINAL_CLOSE_PAT = re.compile(r"(?is)</\s*final\s*>")
+class FinalFilter:
+    """
+    逻辑：
+    - 流式累计 self.buffer = 原始可见文本（不跳过任何内容）
+    - 只要检测到 <final>…</final>，把其中内容追加到 self.visible，并置 seen_final=True
+    - get_visible():
+        - 若 seen_final=True 且 self.visible 非空：返回 self.visible
+        - 否则：返回 self.buffer（回退到“显示全文”，避免空白）
+    """
+    def __init__(self):
+        self.in_final = False
+        self.seen_final = False
+        self.visible = ""   # 已抓到的 <final> 内容
+        self.buffer = ""    # 原始累计（用于无 <final> 回退）
+    def push(self, chunk: str):
+        self.buffer += chunk  # 始终保留原始流（做回退）
+        i = 0
+        while i < len(chunk):
+            if not self.in_final:
+                m = FINAL_OPEN_PAT.search(chunk, i)
+                if not m:
+                    # 未进入 <final>，跳过（但 buffer 已包含）
+                    break
+                i = m.end()
+                self.in_final = True
+                self.seen_final = True
+            else:
+                m = FINAL_CLOSE_PAT.search(chunk, i)
+                if not m:
+                    # final 未闭合，继续累计
+                    self.visible += chunk[i:]
+                    i = len(chunk)
+                else:
+                    self.visible += chunk[i:m.start()]
+                    i = m.end()
+                    self.in_final = False
+    def get_visible(self) -> str:
+        text = (self.visible if self.seen_final and self.visible.strip() else self.buffer)
+        return text.strip()
 # ======================
 # 动态长度：根据输入长短调 max_new_tokens
 # ======================
 PERSONA_TEMPLATES = {
     "默认(Default)": "",
+    "性感火辣 Flirty & Spicy": (
+    "风格：挑逗、暧昧、直接，语言热辣大胆，带点坏坏的撩人气息。\n"
+    "行为：多用双关语、调侃与骚话；主动带动暧昧氛围，让对话充满张力；适度使用火焰🔥、唇👄、爱心❤️等表情符号。\n"
+    "边界：不涉及未成年人，不提供医疗/法律/暴力等敏感内容；保持娱乐和幻想氛围，不延伸到现实危险行为。"
     ),
     "温柔体贴 Sweet & Caring": (
         "风格：温柔、共情、耐心，语气轻柔，简洁而温暖。\n"
         "行为：多用复述与情感标注（我理解你感到…），先共情后建议；适度使用表情符号。\n"
         "行为：简短吐槽 + 实用建议；保持幽默，避免人身攻击。"
     ),
     "风趣段子手 Witty Jokester": (
+        "风格：机智幽默、梗多但不低俗；注重包袱节奏。\n"
         "行为：先给答案，再加简短段子或比喻；避免冷笑话过载。"
     ),
     "严谨导师 Analytical Coach": (
         "行为：先澄清目标，再给里程碑与检查清单；引用通用最佳实践。"
     ),
     "冷淡毒舌 Deadpan Sarcasm": (
+        "风格：冷面、克制、轻微反讽；不粗鲁不辱骂。\n"
         "行为：直切重点，短句给结论；如用户受挫，稍作暖场。"
     ),
     "心理咨询同伴 Therapeutic Listener": (
         f"# Rules\n"
         f"- Stay in persona unless the user explicitly asks to change.\n"
         f"- Be concise unless the user asks for detail.\n"
+        f"- Do NOT reveal chain-of-thought or <Thought>/<analysis>/<think> sections.\n"
+        f"- Prefer enclosing the final answer in <final> ... </final>.\n"
     )
 # ======================
+# 构建 Prompt（messages 形式 → chat_template；无则回退）
 # ======================
+def apply_chat_template_with_fallback(messages):
+    try:
+        return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    except Exception:
+        parts = []
+        role_map = {"system":"system", "user":"user", "assistant":"assistant"}
+        for m in messages:
+            role = role_map.get(m["role"], "user")
+            parts.append(f"<|{role}|>\n{m['content']}\n</s>")
+        parts.append("<|assistant|>\n")  # generation prompt
+        return "".join(parts)
 def build_prompt(history_msgs, user_msg: str, persona_text: str) -> str:
     system_prompt = compose_system_prompt(BASE_SYSTEM_PROMPT, persona_text)
     tail = [m for m in history_msgs if m.get("role") in ("user", "assistant")]
     tail = tail[-8:] if len(tail) > 8 else tail
+    messages = [{"role": "system", "content": system_prompt}] + tail + [{"role": "user", "content": user_msg}]
+    return apply_chat_template_with_fallback(messages)
 # ======================
 # 生成参数（默认档）
 BASE_GEN_KW = dict(
     temperature=TEMPERATURE,
     top_p=TOP_P,
+    top_k=TOP_K,
     repetition_penalty=REPETITION_PENALTY,
     do_sample=True,
     eos_token_id=tokenizer.eos_token_id,
 )
 # ======================
+# 主推理：流式输出（优先显示 <final>；无 <final> 则全文回退；含 Persona + 防复读）
 # ======================
 def stream_chat(history_msgs, user_msg, persona_text):
     try:
         if not user_msg or not user_msg.strip():
             yield history_msgs; return
+        # 原始输入安全检测
         if violates(user_msg):
             yield history_msgs + [
                 {"role": "user", "content": user_msg},
             max_new_tokens=choose_max_new_tokens(user_msg),
             **BASE_GEN_KW
         )
+        # 可选停词：减少模型输出思考标签
+        HIDE_THOUGHT = os.getenv("HIDE_THOUGHT", "1") != "0"
+        if HIDE_THOUGHT:
+            bad_strings = [
+                "<Thought>", "</Thought>", "Thought:", "Chain-of-Thought",
+                "<analysis>", "</analysis>", "analysis:", "reasoning:",
+                "<think>", "</think>", "reflection:", "思考过程", "推理过程", "分析："
+            ]
+            bad_words_ids = []
+            for s in bad_strings:
+                ids = tokenizer(s, add_special_tokens=False).input_ids
+                if ids: bad_words_ids.append(ids)
+            if bad_words_ids:
+                gen_kwargs["bad_words_ids"] = bad_words_ids
         print("[gen] start")
         th = Thread(target=model.generate, kwargs=gen_kwargs, daemon=True)
         th.start()
+        ff = FinalFilter()
+        last_len = 0  # 防复读关键：记录“当前展示文本”的已展示长度
         for chunk in streamer:
+            ff.push(chunk)
+            visible = ff.get_visible()  # 若已有 <final> → 是 final 内容；否则 → 原文回退
+            # 新增差量
+            new_text = visible[last_len:]
+            if not new_text:
+                continue
+            last_len = len(visible)
+            # 安全检查对当前可见文本进行
             if violates(visible):
                 yield history_msgs + [
                     {"role": "user", "content": user_msg},
                 {"role": "user", "content": user_msg},
                 {"role": "assistant", "content": visible},
             ]
+        print("[gen] done, shown_len:", last_len)
+        # 正常情况下这里不需要兜底，因为没有 <final> 时一路都在显示 buffer。
+        # 但如果你想在完全空白时提醒，可保留以下逻辑：
+        if last_len == 0:
+            hint = "（未产生可见输出，建议重试或更换提示词）"
+            yield history_msgs + [
+                {"role": "user", "content": user_msg},
+                {"role": "assistant", "content": hint},
+            ]
     except Exception as e:
         traceback.print_exc()
     return PERSONA_TEMPLATES.get(name or "默认(Default)", "")
 with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
+    gr.Markdown("### 懂你寂寞 · Let's Chat\n")
     # Persona 折叠区
     with gr.Accordion("🎭 Persona（人设）", open=False):