coo7 commited on
Commit
c79b38d
·
verified ·
1 Parent(s): fec88c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +217 -129
app.py CHANGED
@@ -12,6 +12,8 @@ import ctypes
12
  from wasmtime import Store, Module, Linker
13
  import re
14
  import transformers
 
 
15
 
16
  # -------------------------- 初始化 tokenizer --------------------------
17
  chat_tokenizer_dir = "THUDM/chatglm2-6b" # 使用现成的模型tokenizer
@@ -114,14 +116,13 @@ DEEPSEEK_COMPLETION_URL = f"https://{DEEPSEEK_HOST}/api/v0/chat/completion"
114
 
115
  BASE_HEADERS = {
116
  'Host': "chat.deepseek.com",
117
- 'User-Agent': "DeepSeek/1.0.7 Android/34",
118
  'Accept': "application/json",
119
  'Accept-Encoding': "gzip",
120
  'Content-Type': "application/json",
121
  'x-client-platform': "android",
122
- 'x-client-version': "1.0.7",
123
  'x-client-locale': "zh_CN",
124
- 'x-rangers-id': "7883327620434123524",
125
  'accept-charset': "UTF-8",
126
  }
127
 
@@ -184,21 +185,36 @@ def login_deepseek_via_account(account):
184
  return new_token
185
 
186
  # ----------------------------------------------------------------------
187
- # (4) accounts 中随机选择一个未忙且未尝试过的账号
188
- # ----------------------------------------------------------------------
189
- def choose_new_account(exclude_ids):
190
- accounts = CONFIG.get("accounts", [])
191
- available = [
192
- acc for acc in accounts
193
- if get_account_identifier(acc) not in exclude_ids and get_account_identifier(acc) not in active_accounts
194
- ]
195
- if available:
196
- chosen = random.choice(available)
197
- app.logger.info(f"[choose_new_account] 新选择账号: {get_account_identifier(chosen)}")
198
- return chosen
199
- app.logger.warning("[choose_new_account] 没有可用的账号")
 
 
 
 
 
 
 
 
 
 
 
200
  return None
201
 
 
 
 
 
202
  # ----------------------------------------------------------------------
203
  # (5) 判断调用模式:配置模式 vs 用户自带 token
204
  # ----------------------------------------------------------------------
@@ -218,10 +234,10 @@ def determine_mode_and_token():
218
  if caller_key in config_keys:
219
  g.use_config_token = True
220
  g.tried_accounts = [] # 初始化已尝试账号
221
- selected_account = choose_new_account(g.tried_accounts)
222
  if not selected_account:
223
- return Response(json.dumps({"error": "No accounts configured."}),
224
- status=500, mimetype="application/json")
225
  if not selected_account.get("token", "").strip():
226
  try:
227
  login_deepseek_via_account(selected_account)
@@ -229,15 +245,11 @@ def determine_mode_and_token():
229
  app.logger.error(f"[determine_mode_and_token] 账号 {get_account_identifier(selected_account)} 登录失败:{e}")
230
  return Response(json.dumps({"error": "Account login failed."}),
231
  status=500, mimetype="application/json")
232
- else:
233
- app.logger.info(f"[determine_mode_and_token] 账号 {get_account_identifier(selected_account)} 已有 token,无需重新登录")
234
  g.deepseek_token = selected_account.get("token")
235
  g.account = selected_account
236
- app.logger.info(f"[determine_mode_and_token] 配置模式:使用账号 {get_account_identifier(selected_account)} 的 token")
237
  else:
238
  g.use_config_token = False
239
  g.deepseek_token = caller_key
240
- app.logger.info("[determine_mode_and_token] 使用用户自带 DeepSeek token")
241
  return None
242
 
243
  def get_auth_headers():
@@ -299,7 +311,7 @@ def create_session(max_attempts=3):
299
  g.tried_accounts = []
300
  if current_id not in g.tried_accounts:
301
  g.tried_accounts.append(current_id)
302
- new_account = choose_new_account(g.tried_accounts)
303
  if new_account is None:
304
  break
305
  try:
@@ -464,7 +476,7 @@ def get_pow_response(max_attempts=3):
464
  g.tried_accounts = []
465
  if current_id not in g.tried_accounts:
466
  g.tried_accounts.append(current_id)
467
- new_account = choose_new_account(g.tried_accounts)
468
  if new_account is None:
469
  break
470
  try:
@@ -564,8 +576,8 @@ def messages_prepare(messages: list) -> str:
564
  else:
565
  parts.append(text)
566
  final_prompt = "".join(parts)
567
- # 移除 markdown 图片格式:
568
- final_prompt = re.sub(r"!", "", final_prompt)
569
  return final_prompt
570
 
571
  # ----------------------------------------------------------------------
@@ -577,22 +589,6 @@ def chat_completions():
577
  if mode_resp:
578
  return mode_resp
579
 
580
- # 如果使用配置模式,检查账号是否正忙;如果忙则尝试切换账号
581
- if g.use_config_token:
582
- account_id = get_account_identifier(g.account)
583
- if account_id in active_accounts:
584
- g.tried_accounts.append(account_id)
585
- new_account = choose_new_account(g.tried_accounts)
586
- if new_account is None:
587
- return jsonify({"error": "All accounts are busy."}), 503
588
- try:
589
- login_deepseek_via_account(new_account)
590
- except Exception as e:
591
- return jsonify({"error": "Account login failed."}), 500
592
- g.account = new_account
593
- g.deepseek_token = new_account.get("token")
594
- account_id = get_account_identifier(new_account)
595
- active_accounts.add(account_id)
596
  try:
597
  req_data = request.json or {}
598
  app.logger.info(f"[chat_completions] 收到请求: {req_data}")
@@ -661,29 +657,81 @@ def chat_completions():
661
  status=deepseek_resp.status_code,
662
  mimetype="application/json")
663
 
 
 
 
664
  def sse_stream():
665
  try:
666
  final_text = ""
667
  final_thinking = ""
668
  first_chunk_sent = False
 
 
669
  citation_map = {} # 用于存储引用链接的字典
670
- for raw_line in deepseek_resp.iter_lines(chunk_size=512):
 
671
  try:
672
- line = raw_line.decode("utf-8")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
673
  except Exception as e:
674
- app.logger.warning(f"[sse_stream] 解码失败: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
675
  continue
676
- if not line:
677
- continue
678
- if line.startswith("data:"):
679
- data_str = line[5:].strip()
680
- if data_str == "[DONE]":
681
  prompt_tokens = len(tokenizer.encode(final_prompt))
682
  completion_tokens = len(tokenizer.encode(final_text))
683
  usage = {
684
  "prompt_tokens": prompt_tokens,
685
  "completion_tokens": completion_tokens,
686
- "total_tokens": prompt_tokens + completion_tokens
687
  }
688
  finish_chunk = {
689
  "id": completion_id,
@@ -691,30 +739,25 @@ def chat_completions():
691
  "created": created_time,
692
  "model": model,
693
  "choices": [
694
- {"delta": {}, "index": 0, "finish_reason": "stop"}
 
 
 
 
695
  ],
696
- "usage": usage
697
  }
698
  yield f"data: {json.dumps(finish_chunk, ensure_ascii=False)}\n\n"
699
  yield "data: [DONE]\n\n"
 
700
  break
701
- try:
702
- chunk = json.loads(data_str)
703
- app.logger.debug(f"[sse_stream] 解析到 chunk: {chunk}")
704
- # 处理搜索索引数据
705
- if chunk.get("choices", [{}])[0].get("delta", {}).get("type") == "search_index":
706
- search_indexes = chunk["choices"][0]["delta"].get("search_indexes", [])
707
- for idx in search_indexes:
708
- citation_map[str(idx.get("cite_index"))] = idx.get("url", "")
709
- continue
710
- except Exception as e:
711
- app.logger.warning(f"[sse_stream] 无法解析: {data_str}, 错误: {e}")
712
- continue
713
  new_choices = []
714
  for choice in chunk.get("choices", []):
715
  delta = choice.get("delta", {})
716
  ctype = delta.get("type")
717
  ctext = delta.get("content", "")
 
 
718
  if search_enabled and ctext.startswith("[citation:"):
719
  ctext = ""
720
  if ctype == "thinking":
@@ -732,96 +775,141 @@ def chat_completions():
732
  elif ctype == "text":
733
  delta_obj["content"] = ctext
734
  if delta_obj:
735
- new_choices.append({"delta": delta_obj, "index": choice.get("index", 0)})
 
 
 
 
 
736
  if new_choices:
737
  out_chunk = {
738
  "id": completion_id,
739
  "object": "chat.completion.chunk",
740
  "created": created_time,
741
  "model": model,
742
- "choices": new_choices
743
  }
744
  yield f"data: {json.dumps(out_chunk, ensure_ascii=False)}\n\n"
 
 
 
745
  except Exception as e:
746
  app.logger.error(f"[sse_stream] 异常: {e}")
747
  finally:
748
  deepseek_resp.close()
749
  if g.use_config_token:
750
- active_accounts.discard(get_account_identifier(g.account))
751
  return Response(stream_with_context(sse_stream()), content_type="text/event-stream")
752
  else:
753
  # 非流式响应处理
754
  think_list = []
755
  text_list = []
 
756
  citation_map = {} # 用于存储引用链接的字典
757
- try:
758
- for raw_line in deepseek_resp.iter_lines(chunk_size=512):
759
- try:
760
- line = raw_line.decode("utf-8")
761
- except Exception as e:
762
- app.logger.warning(f"[chat_completions] 解码失败: {e}")
763
- continue
764
- if not line:
765
- continue
766
- if line.startswith("data:"):
767
- data_str = line[5:].strip()
768
- if data_str == "[DONE]":
769
- break
770
  try:
771
- chunk = json.loads(data_str)
772
- app.logger.debug(f"[chat_completions] 非流式 chunk: {chunk}")
773
- # 处理搜索索引数据
774
- if chunk.get("choices", [{}])[0].get("delta", {}).get("type") == "search_index":
775
- search_indexes = chunk["choices"][0]["delta"].get("search_indexes", [])
776
- for idx in search_indexes:
777
- citation_map[str(idx.get("cite_index"))] = idx.get("url", "")
778
- continue
779
  except Exception as e:
780
- app.logger.warning(f"[chat_completions] 无法解析: {data_str}, 错误: {e}")
 
 
 
 
 
781
  continue
782
- for choice in chunk.get("choices", []):
783
- delta = choice.get("delta", {})
784
- ctype = delta.get("type")
785
- ctext = delta.get("content", "")
786
- if search_enabled and ctext.startswith("[citation:"):
787
- ctext = ""
788
- if ctype == "thinking" and thinking_enabled:
789
- think_list.append(ctext)
790
- elif ctype == "text":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
791
  text_list.append(ctext)
792
- finally:
793
- deepseek_resp.close()
794
- final_reasoning = "".join(think_list)
795
- final_content = "".join(text_list)
796
- prompt_tokens = len(tokenizer.encode(final_prompt))
797
- completion_tokens = len(tokenizer.encode(final_content))
798
- total_tokens = prompt_tokens + completion_tokens
799
- result = {
800
- "id": completion_id,
801
- "object": "chat.completion",
802
- "created": created_time,
803
- "model": model,
804
- "choices": [
805
- {
806
- "index": 0,
807
- "message": {
808
- "role": "assistant",
809
- "content": final_content,
810
- "reasoning_content": final_reasoning
 
 
 
 
 
 
 
 
 
 
 
 
 
 
811
  },
812
- "finish_reason": "stop"
813
  }
814
- ],
815
- "usage": {
816
- "prompt_tokens": prompt_tokens,
817
- "completion_tokens": completion_tokens,
818
- "total_tokens": total_tokens
819
- }
820
- }
821
- return jsonify(result), 200
 
 
 
 
 
 
 
 
 
 
 
 
 
822
  finally:
823
  if g.use_config_token:
824
- active_accounts.discard(get_account_identifier(g.account))
825
 
826
  # ----------------------------------------------------------------------
827
  # (11) 路由:/
 
12
  from wasmtime import Store, Module, Linker
13
  import re
14
  import transformers
15
+ import queue
16
+ import threading
17
 
18
  # -------------------------- 初始化 tokenizer --------------------------
19
  chat_tokenizer_dir = "THUDM/chatglm2-6b" # 使用现成的模型tokenizer
 
116
 
117
  BASE_HEADERS = {
118
  'Host': "chat.deepseek.com",
119
+ 'User-Agent': "DeepSeek/1.0.13 Android/35",
120
  'Accept': "application/json",
121
  'Accept-Encoding': "gzip",
122
  'Content-Type': "application/json",
123
  'x-client-platform': "android",
124
+ 'x-client-version': "1.0.13",
125
  'x-client-locale': "zh_CN",
 
126
  'accept-charset': "UTF-8",
127
  }
128
 
 
185
  return new_token
186
 
187
  # ----------------------------------------------------------------------
188
+ # -------------------------- 全局账号队列 --------------------------
189
+ account_queue = [] # 维护所有可用账号
190
+
191
+ def init_account_queue():
192
+ """初始化时从配置加载账号"""
193
+ global account_queue
194
+ account_queue = CONFIG.get("accounts", [])[:] # 深拷贝
195
+ random.shuffle(account_queue) # 初始随机排序
196
+
197
+ init_account_queue()
198
+
199
+ def choose_new_account():
200
+ """选择策略:
201
+ 1. 遍历队列,找到第一个未被 exclude_ids 包含的账号
202
+ 2. 从队列中移除该账号
203
+ 3. 返回该账号(由后续逻辑保证最终会重新入队)
204
+ """
205
+ for i in range(len(account_queue)):
206
+ acc = account_queue[i]
207
+ acc_id = get_account_identifier(acc)
208
+ if acc_id:
209
+ # 从队列中移除并返回
210
+ return account_queue.pop(i)
211
+ app.logger.warning("[choose_new_account] 没有可用的账号或所有账号都在使用中")
212
  return None
213
 
214
+ def release_account(account):
215
+ """将账号重新加入队列末尾"""
216
+ account_queue.append(account)
217
+
218
  # ----------------------------------------------------------------------
219
  # (5) 判断调用模式:配置模式 vs 用户自带 token
220
  # ----------------------------------------------------------------------
 
234
  if caller_key in config_keys:
235
  g.use_config_token = True
236
  g.tried_accounts = [] # 初始化已尝试账号
237
+ selected_account = choose_new_account()
238
  if not selected_account:
239
+ return Response(json.dumps({"error": "No accounts configured or all accounts are busy."}),
240
+ status=429, mimetype="application/json")
241
  if not selected_account.get("token", "").strip():
242
  try:
243
  login_deepseek_via_account(selected_account)
 
245
  app.logger.error(f"[determine_mode_and_token] 账号 {get_account_identifier(selected_account)} 登录失败:{e}")
246
  return Response(json.dumps({"error": "Account login failed."}),
247
  status=500, mimetype="application/json")
 
 
248
  g.deepseek_token = selected_account.get("token")
249
  g.account = selected_account
 
250
  else:
251
  g.use_config_token = False
252
  g.deepseek_token = caller_key
 
253
  return None
254
 
255
  def get_auth_headers():
 
311
  g.tried_accounts = []
312
  if current_id not in g.tried_accounts:
313
  g.tried_accounts.append(current_id)
314
+ new_account = choose_new_account()
315
  if new_account is None:
316
  break
317
  try:
 
476
  g.tried_accounts = []
477
  if current_id not in g.tried_accounts:
478
  g.tried_accounts.append(current_id)
479
+ new_account = choose_new_account()
480
  if new_account is None:
481
  break
482
  try:
 
576
  else:
577
  parts.append(text)
578
  final_prompt = "".join(parts)
579
+ # 仅移除 markdown 图片格式(不全部移除 !)
580
+ final_prompt = re.sub(r"!\[(.*?)\]\((.*?)\)", r"[\1](\2)", final_prompt)
581
  return final_prompt
582
 
583
  # ----------------------------------------------------------------------
 
589
  if mode_resp:
590
  return mode_resp
591
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592
  try:
593
  req_data = request.json or {}
594
  app.logger.info(f"[chat_completions] 收到请求: {req_data}")
 
657
  status=deepseek_resp.status_code,
658
  mimetype="application/json")
659
 
660
+ # 添加保活超时配置(5秒)
661
+ KEEP_ALIVE_TIMEOUT = 5
662
+
663
  def sse_stream():
664
  try:
665
  final_text = ""
666
  final_thinking = ""
667
  first_chunk_sent = False
668
+ result_queue = queue.Queue()
669
+ last_send_time = time.time()
670
  citation_map = {} # 用于存储引用链接的字典
671
+
672
+ def process_data():
673
  try:
674
+ for raw_line in deepseek_resp.iter_lines():
675
+ try:
676
+ line = raw_line.decode("utf-8")
677
+ except Exception as e:
678
+ app.logger.warning(f"[sse_stream] 解码失败: {e}")
679
+ busy_content_str = '{"choices":[{"index":0,"delta":{"content":"服务器繁忙,请稍候再试","type":"text"}}],"model":"","chunk_token_usage":1,"created":0,"message_id":-1,"parent_id":-1}'
680
+ busy_content = json.loads(busy_content_str)
681
+ result_queue.put(busy_content)
682
+ result_queue.put(None)
683
+ break
684
+ if not line:
685
+ continue
686
+ if line.startswith("data:"):
687
+ data_str = line[5:].strip()
688
+ if data_str == "[DONE]":
689
+ result_queue.put(None) # 结束信号
690
+ break
691
+ try:
692
+ chunk = json.loads(data_str)
693
+ # 处理搜索索引数据
694
+ if chunk.get("choices", [{}])[0].get("delta", {}).get("type") == "search_index":
695
+ search_indexes = chunk["choices"][0]["delta"].get("search_indexes", [])
696
+ for idx in search_indexes:
697
+ citation_map[str(idx.get("cite_index"))] = idx.get("url", "")
698
+ continue
699
+ result_queue.put(chunk) # 将数据放入队列
700
+ except Exception as e:
701
+ app.logger.warning(f"[sse_stream] 无法解析: {data_str}, 错误: {e}")
702
+ busy_content_str = '{"choices":[{"index":0,"delta":{"content":"服务器繁忙,请稍候再试","type":"text"}}],"model":"","chunk_token_usage":1,"created":0,"message_id":-1,"parent_id":-1}'
703
+ busy_content = json.loads(busy_content_str)
704
+ result_queue.put(busy_content)
705
+ result_queue.put(None)
706
+ break
707
  except Exception as e:
708
+ app.logger.warning(f"[sse_stream] 错误: {e}")
709
+ busy_content_str = '{"choices":[{"index":0,"delta":{"content":"服务器繁忙,请稍候再试","type":"text"}}],"model":"","chunk_token_usage":1,"created":0,"message_id":-1,"parent_id":-1}'
710
+ busy_content = json.loads(busy_content_str)
711
+ result_queue.put(busy_content)
712
+ result_queue.put(None)
713
+ finally:
714
+ deepseek_resp.close()
715
+
716
+ process_thread = threading.Thread(target=process_data)
717
+ process_thread.start()
718
+
719
+ while True:
720
+ current_time = time.time()
721
+ if current_time - last_send_time >= KEEP_ALIVE_TIMEOUT:
722
+ yield ": keep-alive\n\n"
723
+ last_send_time = current_time
724
  continue
725
+ try:
726
+ chunk = result_queue.get(timeout=0.1)
727
+ if chunk is None:
728
+ # 发送最终统计信息
 
729
  prompt_tokens = len(tokenizer.encode(final_prompt))
730
  completion_tokens = len(tokenizer.encode(final_text))
731
  usage = {
732
  "prompt_tokens": prompt_tokens,
733
  "completion_tokens": completion_tokens,
734
+ "total_tokens": prompt_tokens + completion_tokens,
735
  }
736
  finish_chunk = {
737
  "id": completion_id,
 
739
  "created": created_time,
740
  "model": model,
741
  "choices": [
742
+ {
743
+ "delta": {},
744
+ "index": 0,
745
+ "finish_reason": "stop",
746
+ }
747
  ],
748
+ "usage": usage,
749
  }
750
  yield f"data: {json.dumps(finish_chunk, ensure_ascii=False)}\n\n"
751
  yield "data: [DONE]\n\n"
752
+ last_send_time = current_time
753
  break
 
 
 
 
 
 
 
 
 
 
 
 
754
  new_choices = []
755
  for choice in chunk.get("choices", []):
756
  delta = choice.get("delta", {})
757
  ctype = delta.get("type")
758
  ctext = delta.get("content", "")
759
+ if choice.get("finish_reason") == "backend_busy":
760
+ ctext = '服务器繁忙,请稍候再试'
761
  if search_enabled and ctext.startswith("[citation:"):
762
  ctext = ""
763
  if ctype == "thinking":
 
775
  elif ctype == "text":
776
  delta_obj["content"] = ctext
777
  if delta_obj:
778
+ new_choices.append(
779
+ {
780
+ "delta": delta_obj,
781
+ "index": choice.get("index", 0),
782
+ }
783
+ )
784
  if new_choices:
785
  out_chunk = {
786
  "id": completion_id,
787
  "object": "chat.completion.chunk",
788
  "created": created_time,
789
  "model": model,
790
+ "choices": new_choices,
791
  }
792
  yield f"data: {json.dumps(out_chunk, ensure_ascii=False)}\n\n"
793
+ last_send_time = current_time
794
+ except queue.Empty:
795
+ continue
796
  except Exception as e:
797
  app.logger.error(f"[sse_stream] 异常: {e}")
798
  finally:
799
  deepseek_resp.close()
800
  if g.use_config_token:
801
+ release_account(g.account)
802
  return Response(stream_with_context(sse_stream()), content_type="text/event-stream")
803
  else:
804
  # 非流式响应处理
805
  think_list = []
806
  text_list = []
807
+ result = None
808
  citation_map = {} # 用于存储引用链接的字典
809
+
810
+ data_queue = queue.Queue()
811
+
812
+ def collect_data():
813
+ nonlocal result
814
+ try:
815
+ for raw_line in deepseek_resp.iter_lines():
 
 
 
 
 
 
816
  try:
817
+ line = raw_line.decode("utf-8")
 
 
 
 
 
 
 
818
  except Exception as e:
819
+ app.logger.warning(f"[chat_completions] 解码失败: {e}")
820
+ ctext = '服务器繁忙,请稍候再试'
821
+ text_list.append(ctext)
822
+ data_queue.put(None)
823
+ break
824
+ if not line:
825
  continue
826
+ if line.startswith("data:"):
827
+ data_str = line[5:].strip()
828
+ if data_str == "[DONE]":
829
+ data_queue.put(None)
830
+ break
831
+ try:
832
+ chunk = json.loads(data_str)
833
+ if chunk.get("choices", [{}])[0].get("delta", {}).get("type") == "search_index":
834
+ search_indexes = chunk["choices"][0]["delta"].get("search_indexes", [])
835
+ for idx in search_indexes:
836
+ citation_map[str(idx.get("cite_index"))] = idx.get("url", "")
837
+ continue
838
+ for choice in chunk.get("choices", []):
839
+ delta = choice.get("delta", {})
840
+ ctype = delta.get("type")
841
+ ctext = delta.get("content", "")
842
+ if choice.get("finish_reason") == "backend_busy":
843
+ ctext = '服务器繁忙,请稍候再试'
844
+ if search_enabled and ctext.startswith("[citation:"):
845
+ ctext = ""
846
+ if ctype == "thinking" and thinking_enabled:
847
+ think_list.append(ctext)
848
+ elif ctype == "text":
849
+ text_list.append(ctext)
850
+ except Exception as e:
851
+ app.logger.warning(f"[collect_data] 无法解析: {data_str}, 错误: {e}")
852
+ ctext = '服务器繁忙,请稍候再试'
853
  text_list.append(ctext)
854
+ data_queue.put(None)
855
+ break
856
+ except Exception as e:
857
+ app.logger.warning(f"[collect_data] 错误: {e}")
858
+ ctext = '服务器繁忙,请稍候再试'
859
+ text_list.append(ctext)
860
+ data_queue.put(None)
861
+ finally:
862
+ deepseek_resp.close()
863
+ final_reasoning = "".join(think_list)
864
+ final_content = "".join(text_list)
865
+ prompt_tokens = len(tokenizer.encode(final_prompt))
866
+ completion_tokens = len(tokenizer.encode(final_content))
867
+ result = {
868
+ "id": completion_id,
869
+ "object": "chat.completion",
870
+ "created": created_time,
871
+ "model": model,
872
+ "choices": [
873
+ {
874
+ "index": 0,
875
+ "message": {
876
+ "role": "assistant",
877
+ "content": final_content,
878
+ "reasoning_content": final_reasoning,
879
+ },
880
+ "finish_reason": "stop",
881
+ }
882
+ ],
883
+ "usage": {
884
+ "prompt_tokens": prompt_tokens,
885
+ "completion_tokens": completion_tokens,
886
+ "total_tokens": prompt_tokens + completion_tokens,
887
  },
 
888
  }
889
+ data_queue.put("DONE")
890
+
891
+ collect_thread = threading.Thread(target=collect_data)
892
+ collect_thread.start()
893
+
894
+ def generate():
895
+ last_send_time = time.time()
896
+ while True:
897
+ current_time = time.time()
898
+ if current_time - last_send_time >= KEEP_ALIVE_TIMEOUT:
899
+ yield ""
900
+ last_send_time = current_time
901
+ if not collect_thread.is_alive() and result is not None:
902
+ yield json.dumps(result)
903
+ break
904
+ time.sleep(0.1)
905
+
906
+ return Response(generate(), mimetype="application/json")
907
+ except Exception as e:
908
+ app.logger.error(f"[chat_completions] 未知异常: {e}")
909
+ return jsonify({"error": "Internal Server Error"}), 500
910
  finally:
911
  if g.use_config_token:
912
+ release_account(g.account)
913
 
914
  # ----------------------------------------------------------------------
915
  # (11) 路由:/