Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,8 @@ import ctypes
|
|
12 |
from wasmtime import Store, Module, Linker
|
13 |
import re
|
14 |
import transformers
|
|
|
|
|
15 |
|
16 |
# -------------------------- 初始化 tokenizer --------------------------
|
17 |
chat_tokenizer_dir = "THUDM/chatglm2-6b" # 使用现成的模型tokenizer
|
@@ -114,14 +116,13 @@ DEEPSEEK_COMPLETION_URL = f"https://{DEEPSEEK_HOST}/api/v0/chat/completion"
|
|
114 |
|
115 |
BASE_HEADERS = {
|
116 |
'Host': "chat.deepseek.com",
|
117 |
-
'User-Agent': "DeepSeek/1.0.
|
118 |
'Accept': "application/json",
|
119 |
'Accept-Encoding': "gzip",
|
120 |
'Content-Type': "application/json",
|
121 |
'x-client-platform': "android",
|
122 |
-
'x-client-version': "1.0.
|
123 |
'x-client-locale': "zh_CN",
|
124 |
-
'x-rangers-id': "7883327620434123524",
|
125 |
'accept-charset': "UTF-8",
|
126 |
}
|
127 |
|
@@ -184,21 +185,36 @@ def login_deepseek_via_account(account):
|
|
184 |
return new_token
|
185 |
|
186 |
# ----------------------------------------------------------------------
|
187 |
-
#
|
188 |
-
#
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
return None
|
201 |
|
|
|
|
|
|
|
|
|
202 |
# ----------------------------------------------------------------------
|
203 |
# (5) 判断调用模式:配置模式 vs 用户自带 token
|
204 |
# ----------------------------------------------------------------------
|
@@ -218,10 +234,10 @@ def determine_mode_and_token():
|
|
218 |
if caller_key in config_keys:
|
219 |
g.use_config_token = True
|
220 |
g.tried_accounts = [] # 初始化已尝试账号
|
221 |
-
selected_account = choose_new_account(
|
222 |
if not selected_account:
|
223 |
-
return Response(json.dumps({"error": "No accounts configured."}),
|
224 |
-
status=
|
225 |
if not selected_account.get("token", "").strip():
|
226 |
try:
|
227 |
login_deepseek_via_account(selected_account)
|
@@ -229,15 +245,11 @@ def determine_mode_and_token():
|
|
229 |
app.logger.error(f"[determine_mode_and_token] 账号 {get_account_identifier(selected_account)} 登录失败:{e}")
|
230 |
return Response(json.dumps({"error": "Account login failed."}),
|
231 |
status=500, mimetype="application/json")
|
232 |
-
else:
|
233 |
-
app.logger.info(f"[determine_mode_and_token] 账号 {get_account_identifier(selected_account)} 已有 token,无需重新登录")
|
234 |
g.deepseek_token = selected_account.get("token")
|
235 |
g.account = selected_account
|
236 |
-
app.logger.info(f"[determine_mode_and_token] 配置模式:使用账号 {get_account_identifier(selected_account)} 的 token")
|
237 |
else:
|
238 |
g.use_config_token = False
|
239 |
g.deepseek_token = caller_key
|
240 |
-
app.logger.info("[determine_mode_and_token] 使用用户自带 DeepSeek token")
|
241 |
return None
|
242 |
|
243 |
def get_auth_headers():
|
@@ -299,7 +311,7 @@ def create_session(max_attempts=3):
|
|
299 |
g.tried_accounts = []
|
300 |
if current_id not in g.tried_accounts:
|
301 |
g.tried_accounts.append(current_id)
|
302 |
-
new_account = choose_new_account(
|
303 |
if new_account is None:
|
304 |
break
|
305 |
try:
|
@@ -464,7 +476,7 @@ def get_pow_response(max_attempts=3):
|
|
464 |
g.tried_accounts = []
|
465 |
if current_id not in g.tried_accounts:
|
466 |
g.tried_accounts.append(current_id)
|
467 |
-
new_account = choose_new_account(
|
468 |
if new_account is None:
|
469 |
break
|
470 |
try:
|
@@ -564,8 +576,8 @@ def messages_prepare(messages: list) -> str:
|
|
564 |
else:
|
565 |
parts.append(text)
|
566 |
final_prompt = "".join(parts)
|
567 |
-
#
|
568 |
-
final_prompt = re.sub(r"
|
569 |
return final_prompt
|
570 |
|
571 |
# ----------------------------------------------------------------------
|
@@ -577,22 +589,6 @@ def chat_completions():
|
|
577 |
if mode_resp:
|
578 |
return mode_resp
|
579 |
|
580 |
-
# 如果使用配置模式,检查账号是否正忙;如果忙则尝试切换账号
|
581 |
-
if g.use_config_token:
|
582 |
-
account_id = get_account_identifier(g.account)
|
583 |
-
if account_id in active_accounts:
|
584 |
-
g.tried_accounts.append(account_id)
|
585 |
-
new_account = choose_new_account(g.tried_accounts)
|
586 |
-
if new_account is None:
|
587 |
-
return jsonify({"error": "All accounts are busy."}), 503
|
588 |
-
try:
|
589 |
-
login_deepseek_via_account(new_account)
|
590 |
-
except Exception as e:
|
591 |
-
return jsonify({"error": "Account login failed."}), 500
|
592 |
-
g.account = new_account
|
593 |
-
g.deepseek_token = new_account.get("token")
|
594 |
-
account_id = get_account_identifier(new_account)
|
595 |
-
active_accounts.add(account_id)
|
596 |
try:
|
597 |
req_data = request.json or {}
|
598 |
app.logger.info(f"[chat_completions] 收到请求: {req_data}")
|
@@ -661,29 +657,81 @@ def chat_completions():
|
|
661 |
status=deepseek_resp.status_code,
|
662 |
mimetype="application/json")
|
663 |
|
|
|
|
|
|
|
664 |
def sse_stream():
|
665 |
try:
|
666 |
final_text = ""
|
667 |
final_thinking = ""
|
668 |
first_chunk_sent = False
|
|
|
|
|
669 |
citation_map = {} # 用于存储引用链接的字典
|
670 |
-
|
|
|
671 |
try:
|
672 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
673 |
except Exception as e:
|
674 |
-
app.logger.warning(f"[sse_stream]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
675 |
continue
|
676 |
-
|
677 |
-
|
678 |
-
|
679 |
-
|
680 |
-
if data_str == "[DONE]":
|
681 |
prompt_tokens = len(tokenizer.encode(final_prompt))
|
682 |
completion_tokens = len(tokenizer.encode(final_text))
|
683 |
usage = {
|
684 |
"prompt_tokens": prompt_tokens,
|
685 |
"completion_tokens": completion_tokens,
|
686 |
-
"total_tokens": prompt_tokens + completion_tokens
|
687 |
}
|
688 |
finish_chunk = {
|
689 |
"id": completion_id,
|
@@ -691,30 +739,25 @@ def chat_completions():
|
|
691 |
"created": created_time,
|
692 |
"model": model,
|
693 |
"choices": [
|
694 |
-
{
|
|
|
|
|
|
|
|
|
695 |
],
|
696 |
-
"usage": usage
|
697 |
}
|
698 |
yield f"data: {json.dumps(finish_chunk, ensure_ascii=False)}\n\n"
|
699 |
yield "data: [DONE]\n\n"
|
|
|
700 |
break
|
701 |
-
try:
|
702 |
-
chunk = json.loads(data_str)
|
703 |
-
app.logger.debug(f"[sse_stream] 解析到 chunk: {chunk}")
|
704 |
-
# 处理搜索索引数据
|
705 |
-
if chunk.get("choices", [{}])[0].get("delta", {}).get("type") == "search_index":
|
706 |
-
search_indexes = chunk["choices"][0]["delta"].get("search_indexes", [])
|
707 |
-
for idx in search_indexes:
|
708 |
-
citation_map[str(idx.get("cite_index"))] = idx.get("url", "")
|
709 |
-
continue
|
710 |
-
except Exception as e:
|
711 |
-
app.logger.warning(f"[sse_stream] 无法解析: {data_str}, 错误: {e}")
|
712 |
-
continue
|
713 |
new_choices = []
|
714 |
for choice in chunk.get("choices", []):
|
715 |
delta = choice.get("delta", {})
|
716 |
ctype = delta.get("type")
|
717 |
ctext = delta.get("content", "")
|
|
|
|
|
718 |
if search_enabled and ctext.startswith("[citation:"):
|
719 |
ctext = ""
|
720 |
if ctype == "thinking":
|
@@ -732,96 +775,141 @@ def chat_completions():
|
|
732 |
elif ctype == "text":
|
733 |
delta_obj["content"] = ctext
|
734 |
if delta_obj:
|
735 |
-
new_choices.append(
|
|
|
|
|
|
|
|
|
|
|
736 |
if new_choices:
|
737 |
out_chunk = {
|
738 |
"id": completion_id,
|
739 |
"object": "chat.completion.chunk",
|
740 |
"created": created_time,
|
741 |
"model": model,
|
742 |
-
"choices": new_choices
|
743 |
}
|
744 |
yield f"data: {json.dumps(out_chunk, ensure_ascii=False)}\n\n"
|
|
|
|
|
|
|
745 |
except Exception as e:
|
746 |
app.logger.error(f"[sse_stream] 异常: {e}")
|
747 |
finally:
|
748 |
deepseek_resp.close()
|
749 |
if g.use_config_token:
|
750 |
-
|
751 |
return Response(stream_with_context(sse_stream()), content_type="text/event-stream")
|
752 |
else:
|
753 |
# 非流式响应处理
|
754 |
think_list = []
|
755 |
text_list = []
|
|
|
756 |
citation_map = {} # 用于存储引用链接的字典
|
757 |
-
|
758 |
-
|
759 |
-
|
760 |
-
|
761 |
-
|
762 |
-
|
763 |
-
|
764 |
-
if not line:
|
765 |
-
continue
|
766 |
-
if line.startswith("data:"):
|
767 |
-
data_str = line[5:].strip()
|
768 |
-
if data_str == "[DONE]":
|
769 |
-
break
|
770 |
try:
|
771 |
-
|
772 |
-
app.logger.debug(f"[chat_completions] 非流式 chunk: {chunk}")
|
773 |
-
# 处理搜索索引数据
|
774 |
-
if chunk.get("choices", [{}])[0].get("delta", {}).get("type") == "search_index":
|
775 |
-
search_indexes = chunk["choices"][0]["delta"].get("search_indexes", [])
|
776 |
-
for idx in search_indexes:
|
777 |
-
citation_map[str(idx.get("cite_index"))] = idx.get("url", "")
|
778 |
-
continue
|
779 |
except Exception as e:
|
780 |
-
app.logger.warning(f"[chat_completions]
|
|
|
|
|
|
|
|
|
|
|
781 |
continue
|
782 |
-
|
783 |
-
|
784 |
-
|
785 |
-
|
786 |
-
|
787 |
-
|
788 |
-
|
789 |
-
|
790 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
791 |
text_list.append(ctext)
|
792 |
-
|
793 |
-
|
794 |
-
|
795 |
-
|
796 |
-
|
797 |
-
|
798 |
-
|
799 |
-
|
800 |
-
|
801 |
-
|
802 |
-
|
803 |
-
|
804 |
-
|
805 |
-
{
|
806 |
-
"
|
807 |
-
"
|
808 |
-
|
809 |
-
|
810 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
811 |
},
|
812 |
-
"finish_reason": "stop"
|
813 |
}
|
814 |
-
|
815 |
-
|
816 |
-
|
817 |
-
|
818 |
-
|
819 |
-
|
820 |
-
|
821 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
822 |
finally:
|
823 |
if g.use_config_token:
|
824 |
-
|
825 |
|
826 |
# ----------------------------------------------------------------------
|
827 |
# (11) 路由:/
|
|
|
12 |
from wasmtime import Store, Module, Linker
|
13 |
import re
|
14 |
import transformers
|
15 |
+
import queue
|
16 |
+
import threading
|
17 |
|
18 |
# -------------------------- 初始化 tokenizer --------------------------
|
19 |
chat_tokenizer_dir = "THUDM/chatglm2-6b" # 使用现成的模型tokenizer
|
|
|
116 |
|
117 |
BASE_HEADERS = {
|
118 |
'Host': "chat.deepseek.com",
|
119 |
+
'User-Agent': "DeepSeek/1.0.13 Android/35",
|
120 |
'Accept': "application/json",
|
121 |
'Accept-Encoding': "gzip",
|
122 |
'Content-Type': "application/json",
|
123 |
'x-client-platform': "android",
|
124 |
+
'x-client-version': "1.0.13",
|
125 |
'x-client-locale': "zh_CN",
|
|
|
126 |
'accept-charset': "UTF-8",
|
127 |
}
|
128 |
|
|
|
185 |
return new_token
|
186 |
|
187 |
# ----------------------------------------------------------------------
|
188 |
+
# -------------------------- 全局账号队列 --------------------------
|
189 |
+
account_queue = [] # 维护所有可用账号
|
190 |
+
|
191 |
+
def init_account_queue():
|
192 |
+
"""初始化时从配置加载账号"""
|
193 |
+
global account_queue
|
194 |
+
account_queue = CONFIG.get("accounts", [])[:] # 深拷贝
|
195 |
+
random.shuffle(account_queue) # 初始随机排序
|
196 |
+
|
197 |
+
init_account_queue()
|
198 |
+
|
199 |
+
def choose_new_account():
|
200 |
+
"""选择策略:
|
201 |
+
1. 遍历队列,找到第一个未被 exclude_ids 包含的账号
|
202 |
+
2. 从队列中移除该账号
|
203 |
+
3. 返回该账号(由后续逻辑保证最终会重新入队)
|
204 |
+
"""
|
205 |
+
for i in range(len(account_queue)):
|
206 |
+
acc = account_queue[i]
|
207 |
+
acc_id = get_account_identifier(acc)
|
208 |
+
if acc_id:
|
209 |
+
# 从队列中移除并返回
|
210 |
+
return account_queue.pop(i)
|
211 |
+
app.logger.warning("[choose_new_account] 没有可用的账号或所有账号都在使用中")
|
212 |
return None
|
213 |
|
214 |
+
def release_account(account):
|
215 |
+
"""将账号重新加入队列末尾"""
|
216 |
+
account_queue.append(account)
|
217 |
+
|
218 |
# ----------------------------------------------------------------------
|
219 |
# (5) 判断调用模式:配置模式 vs 用户自带 token
|
220 |
# ----------------------------------------------------------------------
|
|
|
234 |
if caller_key in config_keys:
|
235 |
g.use_config_token = True
|
236 |
g.tried_accounts = [] # 初始化已尝试账号
|
237 |
+
selected_account = choose_new_account()
|
238 |
if not selected_account:
|
239 |
+
return Response(json.dumps({"error": "No accounts configured or all accounts are busy."}),
|
240 |
+
status=429, mimetype="application/json")
|
241 |
if not selected_account.get("token", "").strip():
|
242 |
try:
|
243 |
login_deepseek_via_account(selected_account)
|
|
|
245 |
app.logger.error(f"[determine_mode_and_token] 账号 {get_account_identifier(selected_account)} 登录失败:{e}")
|
246 |
return Response(json.dumps({"error": "Account login failed."}),
|
247 |
status=500, mimetype="application/json")
|
|
|
|
|
248 |
g.deepseek_token = selected_account.get("token")
|
249 |
g.account = selected_account
|
|
|
250 |
else:
|
251 |
g.use_config_token = False
|
252 |
g.deepseek_token = caller_key
|
|
|
253 |
return None
|
254 |
|
255 |
def get_auth_headers():
|
|
|
311 |
g.tried_accounts = []
|
312 |
if current_id not in g.tried_accounts:
|
313 |
g.tried_accounts.append(current_id)
|
314 |
+
new_account = choose_new_account()
|
315 |
if new_account is None:
|
316 |
break
|
317 |
try:
|
|
|
476 |
g.tried_accounts = []
|
477 |
if current_id not in g.tried_accounts:
|
478 |
g.tried_accounts.append(current_id)
|
479 |
+
new_account = choose_new_account()
|
480 |
if new_account is None:
|
481 |
break
|
482 |
try:
|
|
|
576 |
else:
|
577 |
parts.append(text)
|
578 |
final_prompt = "".join(parts)
|
579 |
+
# 仅移除 markdown 图片格式(不全部移除 !)
|
580 |
+
final_prompt = re.sub(r"!\[(.*?)\]\((.*?)\)", r"[\1](\2)", final_prompt)
|
581 |
return final_prompt
|
582 |
|
583 |
# ----------------------------------------------------------------------
|
|
|
589 |
if mode_resp:
|
590 |
return mode_resp
|
591 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
592 |
try:
|
593 |
req_data = request.json or {}
|
594 |
app.logger.info(f"[chat_completions] 收到请求: {req_data}")
|
|
|
657 |
status=deepseek_resp.status_code,
|
658 |
mimetype="application/json")
|
659 |
|
660 |
+
# 添加保活超时配置(5秒)
|
661 |
+
KEEP_ALIVE_TIMEOUT = 5
|
662 |
+
|
663 |
def sse_stream():
|
664 |
try:
|
665 |
final_text = ""
|
666 |
final_thinking = ""
|
667 |
first_chunk_sent = False
|
668 |
+
result_queue = queue.Queue()
|
669 |
+
last_send_time = time.time()
|
670 |
citation_map = {} # 用于存储引用链接的字典
|
671 |
+
|
672 |
+
def process_data():
|
673 |
try:
|
674 |
+
for raw_line in deepseek_resp.iter_lines():
|
675 |
+
try:
|
676 |
+
line = raw_line.decode("utf-8")
|
677 |
+
except Exception as e:
|
678 |
+
app.logger.warning(f"[sse_stream] 解码失败: {e}")
|
679 |
+
busy_content_str = '{"choices":[{"index":0,"delta":{"content":"服务器繁忙,请稍候再试","type":"text"}}],"model":"","chunk_token_usage":1,"created":0,"message_id":-1,"parent_id":-1}'
|
680 |
+
busy_content = json.loads(busy_content_str)
|
681 |
+
result_queue.put(busy_content)
|
682 |
+
result_queue.put(None)
|
683 |
+
break
|
684 |
+
if not line:
|
685 |
+
continue
|
686 |
+
if line.startswith("data:"):
|
687 |
+
data_str = line[5:].strip()
|
688 |
+
if data_str == "[DONE]":
|
689 |
+
result_queue.put(None) # 结束信号
|
690 |
+
break
|
691 |
+
try:
|
692 |
+
chunk = json.loads(data_str)
|
693 |
+
# 处理搜索索引数据
|
694 |
+
if chunk.get("choices", [{}])[0].get("delta", {}).get("type") == "search_index":
|
695 |
+
search_indexes = chunk["choices"][0]["delta"].get("search_indexes", [])
|
696 |
+
for idx in search_indexes:
|
697 |
+
citation_map[str(idx.get("cite_index"))] = idx.get("url", "")
|
698 |
+
continue
|
699 |
+
result_queue.put(chunk) # 将数据放入队列
|
700 |
+
except Exception as e:
|
701 |
+
app.logger.warning(f"[sse_stream] 无法解析: {data_str}, 错误: {e}")
|
702 |
+
busy_content_str = '{"choices":[{"index":0,"delta":{"content":"服务器繁忙,请稍候再试","type":"text"}}],"model":"","chunk_token_usage":1,"created":0,"message_id":-1,"parent_id":-1}'
|
703 |
+
busy_content = json.loads(busy_content_str)
|
704 |
+
result_queue.put(busy_content)
|
705 |
+
result_queue.put(None)
|
706 |
+
break
|
707 |
except Exception as e:
|
708 |
+
app.logger.warning(f"[sse_stream] 错误: {e}")
|
709 |
+
busy_content_str = '{"choices":[{"index":0,"delta":{"content":"服务器繁忙,请稍候再试","type":"text"}}],"model":"","chunk_token_usage":1,"created":0,"message_id":-1,"parent_id":-1}'
|
710 |
+
busy_content = json.loads(busy_content_str)
|
711 |
+
result_queue.put(busy_content)
|
712 |
+
result_queue.put(None)
|
713 |
+
finally:
|
714 |
+
deepseek_resp.close()
|
715 |
+
|
716 |
+
process_thread = threading.Thread(target=process_data)
|
717 |
+
process_thread.start()
|
718 |
+
|
719 |
+
while True:
|
720 |
+
current_time = time.time()
|
721 |
+
if current_time - last_send_time >= KEEP_ALIVE_TIMEOUT:
|
722 |
+
yield ": keep-alive\n\n"
|
723 |
+
last_send_time = current_time
|
724 |
continue
|
725 |
+
try:
|
726 |
+
chunk = result_queue.get(timeout=0.1)
|
727 |
+
if chunk is None:
|
728 |
+
# 发送最终统计信息
|
|
|
729 |
prompt_tokens = len(tokenizer.encode(final_prompt))
|
730 |
completion_tokens = len(tokenizer.encode(final_text))
|
731 |
usage = {
|
732 |
"prompt_tokens": prompt_tokens,
|
733 |
"completion_tokens": completion_tokens,
|
734 |
+
"total_tokens": prompt_tokens + completion_tokens,
|
735 |
}
|
736 |
finish_chunk = {
|
737 |
"id": completion_id,
|
|
|
739 |
"created": created_time,
|
740 |
"model": model,
|
741 |
"choices": [
|
742 |
+
{
|
743 |
+
"delta": {},
|
744 |
+
"index": 0,
|
745 |
+
"finish_reason": "stop",
|
746 |
+
}
|
747 |
],
|
748 |
+
"usage": usage,
|
749 |
}
|
750 |
yield f"data: {json.dumps(finish_chunk, ensure_ascii=False)}\n\n"
|
751 |
yield "data: [DONE]\n\n"
|
752 |
+
last_send_time = current_time
|
753 |
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
754 |
new_choices = []
|
755 |
for choice in chunk.get("choices", []):
|
756 |
delta = choice.get("delta", {})
|
757 |
ctype = delta.get("type")
|
758 |
ctext = delta.get("content", "")
|
759 |
+
if choice.get("finish_reason") == "backend_busy":
|
760 |
+
ctext = '服务器繁忙,请稍候再试'
|
761 |
if search_enabled and ctext.startswith("[citation:"):
|
762 |
ctext = ""
|
763 |
if ctype == "thinking":
|
|
|
775 |
elif ctype == "text":
|
776 |
delta_obj["content"] = ctext
|
777 |
if delta_obj:
|
778 |
+
new_choices.append(
|
779 |
+
{
|
780 |
+
"delta": delta_obj,
|
781 |
+
"index": choice.get("index", 0),
|
782 |
+
}
|
783 |
+
)
|
784 |
if new_choices:
|
785 |
out_chunk = {
|
786 |
"id": completion_id,
|
787 |
"object": "chat.completion.chunk",
|
788 |
"created": created_time,
|
789 |
"model": model,
|
790 |
+
"choices": new_choices,
|
791 |
}
|
792 |
yield f"data: {json.dumps(out_chunk, ensure_ascii=False)}\n\n"
|
793 |
+
last_send_time = current_time
|
794 |
+
except queue.Empty:
|
795 |
+
continue
|
796 |
except Exception as e:
|
797 |
app.logger.error(f"[sse_stream] 异常: {e}")
|
798 |
finally:
|
799 |
deepseek_resp.close()
|
800 |
if g.use_config_token:
|
801 |
+
release_account(g.account)
|
802 |
return Response(stream_with_context(sse_stream()), content_type="text/event-stream")
|
803 |
else:
|
804 |
# 非流式响应处理
|
805 |
think_list = []
|
806 |
text_list = []
|
807 |
+
result = None
|
808 |
citation_map = {} # 用于存储引用链接的字典
|
809 |
+
|
810 |
+
data_queue = queue.Queue()
|
811 |
+
|
812 |
+
def collect_data():
|
813 |
+
nonlocal result
|
814 |
+
try:
|
815 |
+
for raw_line in deepseek_resp.iter_lines():
|
|
|
|
|
|
|
|
|
|
|
|
|
816 |
try:
|
817 |
+
line = raw_line.decode("utf-8")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
818 |
except Exception as e:
|
819 |
+
app.logger.warning(f"[chat_completions] 解码失败: {e}")
|
820 |
+
ctext = '服务器繁忙,请稍候再试'
|
821 |
+
text_list.append(ctext)
|
822 |
+
data_queue.put(None)
|
823 |
+
break
|
824 |
+
if not line:
|
825 |
continue
|
826 |
+
if line.startswith("data:"):
|
827 |
+
data_str = line[5:].strip()
|
828 |
+
if data_str == "[DONE]":
|
829 |
+
data_queue.put(None)
|
830 |
+
break
|
831 |
+
try:
|
832 |
+
chunk = json.loads(data_str)
|
833 |
+
if chunk.get("choices", [{}])[0].get("delta", {}).get("type") == "search_index":
|
834 |
+
search_indexes = chunk["choices"][0]["delta"].get("search_indexes", [])
|
835 |
+
for idx in search_indexes:
|
836 |
+
citation_map[str(idx.get("cite_index"))] = idx.get("url", "")
|
837 |
+
continue
|
838 |
+
for choice in chunk.get("choices", []):
|
839 |
+
delta = choice.get("delta", {})
|
840 |
+
ctype = delta.get("type")
|
841 |
+
ctext = delta.get("content", "")
|
842 |
+
if choice.get("finish_reason") == "backend_busy":
|
843 |
+
ctext = '服务器繁忙,请稍候再试'
|
844 |
+
if search_enabled and ctext.startswith("[citation:"):
|
845 |
+
ctext = ""
|
846 |
+
if ctype == "thinking" and thinking_enabled:
|
847 |
+
think_list.append(ctext)
|
848 |
+
elif ctype == "text":
|
849 |
+
text_list.append(ctext)
|
850 |
+
except Exception as e:
|
851 |
+
app.logger.warning(f"[collect_data] 无法解析: {data_str}, 错误: {e}")
|
852 |
+
ctext = '服务器繁忙,请稍候再试'
|
853 |
text_list.append(ctext)
|
854 |
+
data_queue.put(None)
|
855 |
+
break
|
856 |
+
except Exception as e:
|
857 |
+
app.logger.warning(f"[collect_data] 错误: {e}")
|
858 |
+
ctext = '服务器繁忙,请稍候再试'
|
859 |
+
text_list.append(ctext)
|
860 |
+
data_queue.put(None)
|
861 |
+
finally:
|
862 |
+
deepseek_resp.close()
|
863 |
+
final_reasoning = "".join(think_list)
|
864 |
+
final_content = "".join(text_list)
|
865 |
+
prompt_tokens = len(tokenizer.encode(final_prompt))
|
866 |
+
completion_tokens = len(tokenizer.encode(final_content))
|
867 |
+
result = {
|
868 |
+
"id": completion_id,
|
869 |
+
"object": "chat.completion",
|
870 |
+
"created": created_time,
|
871 |
+
"model": model,
|
872 |
+
"choices": [
|
873 |
+
{
|
874 |
+
"index": 0,
|
875 |
+
"message": {
|
876 |
+
"role": "assistant",
|
877 |
+
"content": final_content,
|
878 |
+
"reasoning_content": final_reasoning,
|
879 |
+
},
|
880 |
+
"finish_reason": "stop",
|
881 |
+
}
|
882 |
+
],
|
883 |
+
"usage": {
|
884 |
+
"prompt_tokens": prompt_tokens,
|
885 |
+
"completion_tokens": completion_tokens,
|
886 |
+
"total_tokens": prompt_tokens + completion_tokens,
|
887 |
},
|
|
|
888 |
}
|
889 |
+
data_queue.put("DONE")
|
890 |
+
|
891 |
+
collect_thread = threading.Thread(target=collect_data)
|
892 |
+
collect_thread.start()
|
893 |
+
|
894 |
+
def generate():
|
895 |
+
last_send_time = time.time()
|
896 |
+
while True:
|
897 |
+
current_time = time.time()
|
898 |
+
if current_time - last_send_time >= KEEP_ALIVE_TIMEOUT:
|
899 |
+
yield ""
|
900 |
+
last_send_time = current_time
|
901 |
+
if not collect_thread.is_alive() and result is not None:
|
902 |
+
yield json.dumps(result)
|
903 |
+
break
|
904 |
+
time.sleep(0.1)
|
905 |
+
|
906 |
+
return Response(generate(), mimetype="application/json")
|
907 |
+
except Exception as e:
|
908 |
+
app.logger.error(f"[chat_completions] 未知异常: {e}")
|
909 |
+
return jsonify({"error": "Internal Server Error"}), 500
|
910 |
finally:
|
911 |
if g.use_config_token:
|
912 |
+
release_account(g.account)
|
913 |
|
914 |
# ----------------------------------------------------------------------
|
915 |
# (11) 路由:/
|