Update app.py
Browse files
app.py
CHANGED
@@ -258,31 +258,27 @@ class ModelManager:
|
|
258 |
self.models = {}
|
259 |
|
260 |
def load_model(self, model_config):
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
|
|
|
|
|
|
269 |
|
270 |
def load_all_models(self):
|
271 |
with ThreadPoolExecutor() as executor:
|
272 |
-
for
|
273 |
-
executor.submit(self.load_model,
|
274 |
return self.models
|
275 |
|
276 |
model_manager = ModelManager()
|
277 |
global_data['models'] = model_manager.load_all_models()
|
278 |
|
279 |
-
def free_up_resources():
|
280 |
-
gc.collect()
|
281 |
-
print(f"Memory usage before cleanup: {psutil.virtual_memory().percent}%")
|
282 |
-
if torch.cuda.is_available():
|
283 |
-
torch.cuda.empty_cache()
|
284 |
-
print(f"Memory usage after cleanup: {psutil.virtual_memory().percent}%")
|
285 |
-
|
286 |
class ChatRequest(BaseModel):
|
287 |
message: str
|
288 |
|
@@ -340,7 +336,6 @@ async def process_message(message):
|
|
340 |
]
|
341 |
unique_responses = remove_repetitive_responses(responses)
|
342 |
best_response = choose_best_response(unique_responses)
|
343 |
-
free_up_resources()
|
344 |
return f"**{best_response['model']}:**\n{best_response['response']}"
|
345 |
|
346 |
app = FastAPI()
|
|
|
258 |
self.models = {}
|
259 |
|
260 |
def load_model(self, model_config):
|
261 |
+
if model_config['name'] not in self.models:
|
262 |
+
try:
|
263 |
+
self.models[model_config['name']] = Llama.from_pretrained(
|
264 |
+
repo_id=model_config['repo_id'],
|
265 |
+
filename=model_config['filename'],
|
266 |
+
use_auth_token=HUGGINGFACE_TOKEN,
|
267 |
+
n_threads=8,
|
268 |
+
use_gpu=False
|
269 |
+
)
|
270 |
+
except Exception as e:
|
271 |
+
pass
|
272 |
|
273 |
def load_all_models(self):
|
274 |
with ThreadPoolExecutor() as executor:
|
275 |
+
for config in model_configs:
|
276 |
+
executor.submit(self.load_model, config)
|
277 |
return self.models
|
278 |
|
279 |
model_manager = ModelManager()
|
280 |
global_data['models'] = model_manager.load_all_models()
|
281 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
class ChatRequest(BaseModel):
|
283 |
message: str
|
284 |
|
|
|
336 |
]
|
337 |
unique_responses = remove_repetitive_responses(responses)
|
338 |
best_response = choose_best_response(unique_responses)
|
|
|
339 |
return f"**{best_response['model']}:**\n{best_response['response']}"
|
340 |
|
341 |
app = FastAPI()
|