File size: 8,139 Bytes
f6e0b33
 
 
 
 
 
 
 
 
 
 
 
 
 
503b578
f6e0b33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c559f51
 
 
 
 
f6e0b33
 
 
 
 
 
83d2d2e
baa0bb5
 
 
 
f6e0b33
baa0bb5
f6e0b33
 
 
 
1b751e2
f6e0b33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83d2d2e
f6e0b33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503b578
85700de
f6e0b33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17a1603
f6e0b33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c559f51
1b751e2
c559f51
a55beaf
c559f51
 
 
a55beaf
1b751e2
f6e0b33
a55beaf
 
f6e0b33
85700de
f6e0b33
 
83d2d2e
c559f51
f6e0b33
 
 
c559f51
a55beaf
 
 
f6e0b33
 
17a1603
c559f51
17a1603
f6e0b33
 
 
17a1603
f6e0b33
 
83d2d2e
c559f51
a55beaf
f6e0b33
a55beaf
f6e0b33
 
 
 
 
 
 
503b578
f6e0b33
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# app.py
# Qwen ✖ SimSimi Hybrid Chat for Hugging Face Spaces
# ---------------------------------------------------
# Requirements (requirements.txt):
#   transformers
#   torch
#   accelerate
#   gradio
#   httpx

import os
import re
import asyncio
import httpx
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# ---------------------------
# Config & Secrets
# ---------------------------
MODEL_ID = "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v3"

SIMSIMI_ENDPOINT = "https://wsapi.simsimi.com/190410/talk"  # 固定版本化 SmallTalk endpoint
SIMSIMI_API_KEY  = os.getenv("SIMSIMI_API_KEY", "").strip()
SIMSIMI_LANG     = os.getenv("SIMSIMI_LANG", "ch").strip()   # "ch"=中文简体, "en"=English, "zh"=繁中 等
SIMSIMI_BAD_MAX  = float(os.getenv("SIMSIMI_BAD_MAX", "0.30"))

# ---------------------------
# Load Qwen
# ---------------------------
dtype = (
    torch.float16 if torch.cuda.is_available()
    else torch.bfloat16 if torch.backends.mps.is_available()
    else torch.float32
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=dtype,
    device_map="auto",
    trust_remote_code=True
)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

SYSTEM_PROMPT = "You are a helpful, concise, and friendly AI assistant. Keep answers direct and useful."

def qwen_generate(messages, max_new_tokens=512, temperature=0.7, top_p=0.9):
    """
    messages: list[{"role": "system"|"user"|"assistant", "content": str}]
    """
    try:
        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
        inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=max_new_tokens,
                do_sample=True,
                temperature=temperature,
                top_p=top_p,
                eos_token_id=tokenizer.eos_token_id,
                pad_token_id=tokenizer.pad_token_id,
            )
        # 仅取新增的部分
        gen_ids = outputs[0][inputs["input_ids"].shape[1]:]
        text = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
        return text
    except Exception as e:
        return f"[Qwen 生成异常] {e}"

# ---------------------------
# SimSimi SmallTalk Bridge
# ---------------------------
async def simsimi_smalltalk(user_text: str, lang: str = None, bad_max: float = None, timeout: float = 10.0):
    """
    调用 SimSimi SmallTalk:
      - Endpoint: https://wsapi.simsimi.com/190410/talk
      - Header: x-api-key: <Project Key>
      - Body: {"utext": "...", "lang":"ch", "atext_bad_prob_max": 0.3}
    """
    if not SIMSIMI_API_KEY:
        return None, "未配置 SIMSIMI_API_KEY"

    lang = (lang or SIMSIMI_LANG or "ch").strip()
    bad = SIMSIMI_BAD_MAX if bad_max is None else float(bad_max)

    headers = {
        "Content-Type": "application/json",
        "x-api-key": SIMSIMI_API_KEY
    }
    payload = {
        "utext": user_text,
        "lang": lang,
        "atext_bad_prob_max": bad  # 越小越干净(0.0~1.0)
    }

    try:
        async with httpx.AsyncClient(timeout=timeout) as client:
            r = await client.post(SIMSIMI_ENDPOINT, headers=headers, json=payload)
            r.raise_for_status()
            data = r.json()
            # 官方返回字段名为 atext(若你的项目返回有所不同,可改这里)
            reply = data.get("atext")
            if not reply:
                # 兼容少数场景
                reply = data.get("response") or data.get("msg")
            return reply, None
    except Exception as e:
        return None, f"SimSimi 调用失败: {e}"

# ---------------------------
# Simple Intent Router
# ---------------------------
CHATY_HINTS = [
    r"讲个(笑话|段子)", r"无聊", r"随便聊",
    r"你(会|能)吐槽", r"来点梗", r"夸我", r"损我一下",
    r"夸夸我", r"给我一句毒舌"
]
TASK_HINTS = [
    r"(怎么|如何|为何|为什么|为啥)",
    r"(写|生成|改|优化).{0,12}(代码|脚本|文案|提示词|SQL|正则)",
    r"(安装|配置|部署|报错|调试|报错)",
    r"(引用|数据|来源|对比|表格)"
]

def is_chitchat(text: str) -> bool:
    if re.search("|".join(TASK_HINTS), text, flags=re.I):
        return False
    if re.search("|".join(CHATY_HINTS), text, flags=re.I):
        return True
    # 短句且非问答型,更可能是闲聊
    return (len(text) <= 22 and not re.search(r"[,。!?.!??]", text))

# ---------------------------
# Hybrid Reply
# ---------------------------
async def hybrid_reply(history_messages, user_text, mode: str, lang: str, bad_max: float):
    """
    mode:
      - "Auto 混合": 闲聊→SimSimi;任务→Qwen;二者都合适时先 Qwen 再 SimSimi 补一句
      - "只用 Qwen"
      - "只用 SimSimi"
    """
    lang = (lang or SIMSIMI_LANG or "ch").strip()
    bad_max = SIMSIMI_BAD_MAX if bad_max is None else float(bad_max)

    if mode == "只用 SimSimi":
        sim, err = await simsimi_smalltalk(user_text, lang=lang, bad_max=bad_max)
        return sim or (f"[SimSimi 无回复] {err or '未知错误'}")

    if mode == "只用 Qwen":
        base = qwen_generate(history_messages + [{"role": "user", "content": user_text}])
        return base

    # Auto 混合
    if is_chitchat(user_text):
        sim, err = await simsimi_smalltalk(user_text, lang=lang, bad_max=bad_max)
        if sim:
            return sim
        # 兜底 Qwen
        return qwen_generate(history_messages + [{"role": "user", "content": user_text}])

    # 任务型:Qwen 主答,SimSimi 补一句“活泼结尾”
    base = qwen_generate(history_messages + [{"role": "user", "content": user_text}])
    sim_tail, _ = await simsimi_smalltalk(f"用一句简短幽默的方式做个收尾:{user_text}", lang=lang, bad_max=bad_max)
    if sim_tail:
        return f"{base}\n\n—— {sim_tail}"
    return base

# ---------------------------
# Gradio UI
# ---------------------------
with gr.Blocks(css="""
#chatbot {height: 560px}
""") as demo:
    gr.Markdown("## Qwen × SimSimi Hybrid Chat\n")

    # ✅ 页面只保留一个下拉框(含三种模式),以及提交/清空按钮
    mode_dd = gr.Dropdown(
        choices=["Auto 混合", "只用 Qwen", "只用 SimSimi"],
        value="Auto 混合",
        label="对话模式"
    )

    chatbox = gr.Chatbot(elem_id="chatbot")
    user_in = gr.Textbox(placeholder="输入内容,然后点击【提交】发送…", lines=2)
    submit_btn = gr.Button("提交", variant="primary")
    clear_btn = gr.Button("清空对话")

    # 内部状态:存储 Qwen 用的 messages
    state_msgs = gr.State([{"role": "system", "content": SYSTEM_PROMPT}])

    async def respond(user_text, history, messages, mode):
        user_text = (user_text or "").strip()
        if not user_text:
            return gr.update(), messages, ""
        # 使用环境变量作为 SimSimi 参数(不在页面暴露)
        lang = SIMSIMI_LANG
        bad_max = SIMSIMI_BAD_MAX

        messages = list(messages) if messages else [{"role": "system", "content": SYSTEM_PROMPT}]
        messages.append({"role": "user", "content": user_text})

        reply = await hybrid_reply(messages, user_text, mode=mode, lang=lang, bad_max=bad_max)

        messages.append({"role": "assistant", "content": reply})
        history = (history or []) + [[user_text, reply]]
        return history, messages, ""

    def clear_all():
        return [], [{"role": "system", "content": SYSTEM_PROMPT}]

    # 仅“提交”按钮触发发送;如需回车发送,可再加 user_in.submit(...)
    submit_btn.click(
        respond,
        inputs=[user_in, chatbox, state_msgs, mode_dd],
        outputs=[chatbox, state_msgs, user_in]
    )
    clear_btn.click(
        clear_all,
        inputs=None,
        outputs=[chatbox, state_msgs]
    )

if __name__ == "__main__":
    demo.launch()