File size: 8,139 Bytes
f6e0b33 503b578 f6e0b33 c559f51 f6e0b33 83d2d2e baa0bb5 f6e0b33 baa0bb5 f6e0b33 1b751e2 f6e0b33 83d2d2e f6e0b33 503b578 85700de f6e0b33 17a1603 f6e0b33 c559f51 1b751e2 c559f51 a55beaf c559f51 a55beaf 1b751e2 f6e0b33 a55beaf f6e0b33 85700de f6e0b33 83d2d2e c559f51 f6e0b33 c559f51 a55beaf f6e0b33 17a1603 c559f51 17a1603 f6e0b33 17a1603 f6e0b33 83d2d2e c559f51 a55beaf f6e0b33 a55beaf f6e0b33 503b578 f6e0b33 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 |
# app.py
# Qwen ✖ SimSimi Hybrid Chat for Hugging Face Spaces
# ---------------------------------------------------
# Requirements (requirements.txt):
# transformers
# torch
# accelerate
# gradio
# httpx
import os
import re
import asyncio
import httpx
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# ---------------------------
# Config & Secrets
# ---------------------------
MODEL_ID = "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v3"
SIMSIMI_ENDPOINT = "https://wsapi.simsimi.com/190410/talk" # 固定版本化 SmallTalk endpoint
SIMSIMI_API_KEY = os.getenv("SIMSIMI_API_KEY", "").strip()
SIMSIMI_LANG = os.getenv("SIMSIMI_LANG", "ch").strip() # "ch"=中文简体, "en"=English, "zh"=繁中 等
SIMSIMI_BAD_MAX = float(os.getenv("SIMSIMI_BAD_MAX", "0.30"))
# ---------------------------
# Load Qwen
# ---------------------------
dtype = (
torch.float16 if torch.cuda.is_available()
else torch.bfloat16 if torch.backends.mps.is_available()
else torch.float32
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=dtype,
device_map="auto",
trust_remote_code=True
)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
SYSTEM_PROMPT = "You are a helpful, concise, and friendly AI assistant. Keep answers direct and useful."
def qwen_generate(messages, max_new_tokens=512, temperature=0.7, top_p=0.9):
"""
messages: list[{"role": "system"|"user"|"assistant", "content": str}]
"""
try:
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer([prompt], return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature,
top_p=top_p,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id,
)
# 仅取新增的部分
gen_ids = outputs[0][inputs["input_ids"].shape[1]:]
text = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
return text
except Exception as e:
return f"[Qwen 生成异常] {e}"
# ---------------------------
# SimSimi SmallTalk Bridge
# ---------------------------
async def simsimi_smalltalk(user_text: str, lang: str = None, bad_max: float = None, timeout: float = 10.0):
"""
调用 SimSimi SmallTalk:
- Endpoint: https://wsapi.simsimi.com/190410/talk
- Header: x-api-key: <Project Key>
- Body: {"utext": "...", "lang":"ch", "atext_bad_prob_max": 0.3}
"""
if not SIMSIMI_API_KEY:
return None, "未配置 SIMSIMI_API_KEY"
lang = (lang or SIMSIMI_LANG or "ch").strip()
bad = SIMSIMI_BAD_MAX if bad_max is None else float(bad_max)
headers = {
"Content-Type": "application/json",
"x-api-key": SIMSIMI_API_KEY
}
payload = {
"utext": user_text,
"lang": lang,
"atext_bad_prob_max": bad # 越小越干净(0.0~1.0)
}
try:
async with httpx.AsyncClient(timeout=timeout) as client:
r = await client.post(SIMSIMI_ENDPOINT, headers=headers, json=payload)
r.raise_for_status()
data = r.json()
# 官方返回字段名为 atext(若你的项目返回有所不同,可改这里)
reply = data.get("atext")
if not reply:
# 兼容少数场景
reply = data.get("response") or data.get("msg")
return reply, None
except Exception as e:
return None, f"SimSimi 调用失败: {e}"
# ---------------------------
# Simple Intent Router
# ---------------------------
CHATY_HINTS = [
r"讲个(笑话|段子)", r"无聊", r"随便聊",
r"你(会|能)吐槽", r"来点梗", r"夸我", r"损我一下",
r"夸夸我", r"给我一句毒舌"
]
TASK_HINTS = [
r"(怎么|如何|为何|为什么|为啥)",
r"(写|生成|改|优化).{0,12}(代码|脚本|文案|提示词|SQL|正则)",
r"(安装|配置|部署|报错|调试|报错)",
r"(引用|数据|来源|对比|表格)"
]
def is_chitchat(text: str) -> bool:
if re.search("|".join(TASK_HINTS), text, flags=re.I):
return False
if re.search("|".join(CHATY_HINTS), text, flags=re.I):
return True
# 短句且非问答型,更可能是闲聊
return (len(text) <= 22 and not re.search(r"[,。!?.!??]", text))
# ---------------------------
# Hybrid Reply
# ---------------------------
async def hybrid_reply(history_messages, user_text, mode: str, lang: str, bad_max: float):
"""
mode:
- "Auto 混合": 闲聊→SimSimi;任务→Qwen;二者都合适时先 Qwen 再 SimSimi 补一句
- "只用 Qwen"
- "只用 SimSimi"
"""
lang = (lang or SIMSIMI_LANG or "ch").strip()
bad_max = SIMSIMI_BAD_MAX if bad_max is None else float(bad_max)
if mode == "只用 SimSimi":
sim, err = await simsimi_smalltalk(user_text, lang=lang, bad_max=bad_max)
return sim or (f"[SimSimi 无回复] {err or '未知错误'}")
if mode == "只用 Qwen":
base = qwen_generate(history_messages + [{"role": "user", "content": user_text}])
return base
# Auto 混合
if is_chitchat(user_text):
sim, err = await simsimi_smalltalk(user_text, lang=lang, bad_max=bad_max)
if sim:
return sim
# 兜底 Qwen
return qwen_generate(history_messages + [{"role": "user", "content": user_text}])
# 任务型:Qwen 主答,SimSimi 补一句“活泼结尾”
base = qwen_generate(history_messages + [{"role": "user", "content": user_text}])
sim_tail, _ = await simsimi_smalltalk(f"用一句简短幽默的方式做个收尾:{user_text}", lang=lang, bad_max=bad_max)
if sim_tail:
return f"{base}\n\n—— {sim_tail}"
return base
# ---------------------------
# Gradio UI
# ---------------------------
with gr.Blocks(css="""
#chatbot {height: 560px}
""") as demo:
gr.Markdown("## Qwen × SimSimi Hybrid Chat\n")
# ✅ 页面只保留一个下拉框(含三种模式),以及提交/清空按钮
mode_dd = gr.Dropdown(
choices=["Auto 混合", "只用 Qwen", "只用 SimSimi"],
value="Auto 混合",
label="对话模式"
)
chatbox = gr.Chatbot(elem_id="chatbot")
user_in = gr.Textbox(placeholder="输入内容,然后点击【提交】发送…", lines=2)
submit_btn = gr.Button("提交", variant="primary")
clear_btn = gr.Button("清空对话")
# 内部状态:存储 Qwen 用的 messages
state_msgs = gr.State([{"role": "system", "content": SYSTEM_PROMPT}])
async def respond(user_text, history, messages, mode):
user_text = (user_text or "").strip()
if not user_text:
return gr.update(), messages, ""
# 使用环境变量作为 SimSimi 参数(不在页面暴露)
lang = SIMSIMI_LANG
bad_max = SIMSIMI_BAD_MAX
messages = list(messages) if messages else [{"role": "system", "content": SYSTEM_PROMPT}]
messages.append({"role": "user", "content": user_text})
reply = await hybrid_reply(messages, user_text, mode=mode, lang=lang, bad_max=bad_max)
messages.append({"role": "assistant", "content": reply})
history = (history or []) + [[user_text, reply]]
return history, messages, ""
def clear_all():
return [], [{"role": "system", "content": SYSTEM_PROMPT}]
# 仅“提交”按钮触发发送;如需回车发送,可再加 user_in.submit(...)
submit_btn.click(
respond,
inputs=[user_in, chatbox, state_msgs, mode_dd],
outputs=[chatbox, state_msgs, user_in]
)
clear_btn.click(
clear_all,
inputs=None,
outputs=[chatbox, state_msgs]
)
if __name__ == "__main__":
demo.launch()
|