Hjgugugjhuhjggg commited on
Commit
d2283fc
·
verified ·
1 Parent(s): f75753e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -12
app.py CHANGED
@@ -1,6 +1,5 @@
1
  from llama_cpp import Llama
2
  from concurrent.futures import ThreadPoolExecutor, as_completed
3
- import re
4
  import uvicorn
5
  from fastapi import FastAPI, HTTPException
6
  from fastapi.middleware.cors import CORSMiddleware
@@ -51,6 +50,7 @@ for config in model_configs:
51
 
52
  class ChatRequest(BaseModel):
53
  message: str
 
54
 
55
  def normalize_input(input_text):
56
  return input_text.strip()
@@ -66,15 +66,28 @@ def remove_duplicates(text):
66
  seen_lines.add(line)
67
  return '\n'.join(unique_lines)
68
 
69
- def generate_model_response(model, inputs):
70
  try:
71
  if model is None:
72
- return ""
73
- response = model(inputs)
74
- return remove_duplicates(response['choices'][0]['text'])
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  except Exception as e:
76
  print(f"Error generating response: {e}")
77
- return f"Error: {e}"
78
 
79
  app = FastAPI()
80
  origins = ["*"]
@@ -90,17 +103,23 @@ app.add_middleware(
90
  async def generate(request: ChatRequest):
91
  inputs = normalize_input(request.message)
92
  with ThreadPoolExecutor() as executor:
93
- futures = [executor.submit(generate_model_response, model, inputs) for model in models.values()]
94
  responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(models.keys(), as_completed(futures))]
95
 
96
  unique_responses = {}
97
- for response in responses:
98
- if response['model'] not in unique_responses and response['response']:
99
- unique_responses[response['model']] = response['response']
 
 
 
100
 
101
  formatted_response = ""
102
- for model, response in unique_responses.items():
103
- formatted_response += f"**{model}:**\n{response}\n\n"
 
 
 
104
 
105
  return {"response": formatted_response}
106
 
 
1
  from llama_cpp import Llama
2
  from concurrent.futures import ThreadPoolExecutor, as_completed
 
3
  import uvicorn
4
  from fastapi import FastAPI, HTTPException
5
  from fastapi.middleware.cors import CORSMiddleware
 
50
 
51
  class ChatRequest(BaseModel):
52
  message: str
53
+ max_tokens_per_part: int = 256
54
 
55
  def normalize_input(input_text):
56
  return input_text.strip()
 
66
  seen_lines.add(line)
67
  return '\n'.join(unique_lines)
68
 
69
+ def generate_model_response(model, inputs, max_tokens_per_part):
70
  try:
71
  if model is None:
72
+ return []
73
+ full_response = ""
74
+ responses = []
75
+ tokens_generated = 0
76
+ while True:
77
+ response_part = model(inputs, max_tokens=max_tokens_per_part, stop=["\n\n"])
78
+ text = response_part['choices'][0]['text']
79
+ if not text.strip():
80
+ break
81
+ full_response += text
82
+ tokens_generated += len(response_part['choices'][0]['token'])
83
+ responses.append(remove_duplicates(text))
84
+ if "eos_token" in response_part['choices'][0]['token']:
85
+ break
86
+ inputs = ""
87
+ return responses
88
  except Exception as e:
89
  print(f"Error generating response: {e}")
90
+ return [f"Error: {e}"]
91
 
92
  app = FastAPI()
93
  origins = ["*"]
 
103
  async def generate(request: ChatRequest):
104
  inputs = normalize_input(request.message)
105
  with ThreadPoolExecutor() as executor:
106
+ futures = [executor.submit(generate_model_response, model, inputs, request.max_tokens_per_part) for model in models.values()]
107
  responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(models.keys(), as_completed(futures))]
108
 
109
  unique_responses = {}
110
+ for response_set in responses:
111
+ model_name = response_set['model']
112
+ if model_name not in unique_responses:
113
+ unique_responses[model_name] = []
114
+ unique_responses[model_name].extend(response_set['response'])
115
+
116
 
117
  formatted_response = ""
118
+ for model, response_parts in unique_responses.items():
119
+ formatted_response += f"**{model}:**\n"
120
+ for i, part in enumerate(response_parts):
121
+ formatted_response += f"Part {i+1}:\n{part}\n\n"
122
+
123
 
124
  return {"response": formatted_response}
125