Hjgugugjhuhjggg commited on
Commit
bf69cdf
verified
1 Parent(s): c350bb5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -18
app.py CHANGED
@@ -258,31 +258,27 @@ class ModelManager:
258
  self.models = {}
259
 
260
  def load_model(self, model_config):
261
- model_path = os.path.join("models", model_config['filename'])
262
- if not os.path.exists(model_path):
263
- print(f"Model {model_config['name']} not found, downloading...")
264
- # Aqui va el c贸digo para descargar el modelo de HuggingFace si es necesario.
265
- self.models[model_config['name']] = Llama(model_path)
266
- else:
267
- print(f"Model {model_config['name']} loaded successfully.")
268
- self.models[model_config['name']] = Llama(model_path)
 
 
 
269
 
270
  def load_all_models(self):
271
  with ThreadPoolExecutor() as executor:
272
- for model_config in model_configs:
273
- executor.submit(self.load_model, model_config)
274
  return self.models
275
 
276
  model_manager = ModelManager()
277
  global_data['models'] = model_manager.load_all_models()
278
 
279
- def free_up_resources():
280
- gc.collect()
281
- print(f"Memory usage before cleanup: {psutil.virtual_memory().percent}%")
282
- if torch.cuda.is_available():
283
- torch.cuda.empty_cache()
284
- print(f"Memory usage after cleanup: {psutil.virtual_memory().percent}%")
285
-
286
  class ChatRequest(BaseModel):
287
  message: str
288
 
@@ -340,7 +336,6 @@ async def process_message(message):
340
  ]
341
  unique_responses = remove_repetitive_responses(responses)
342
  best_response = choose_best_response(unique_responses)
343
- free_up_resources()
344
  return f"**{best_response['model']}:**\n{best_response['response']}"
345
 
346
  app = FastAPI()
 
258
  self.models = {}
259
 
260
  def load_model(self, model_config):
261
+ if model_config['name'] not in self.models:
262
+ try:
263
+ self.models[model_config['name']] = Llama.from_pretrained(
264
+ repo_id=model_config['repo_id'],
265
+ filename=model_config['filename'],
266
+ use_auth_token=HUGGINGFACE_TOKEN,
267
+ n_threads=8,
268
+ use_gpu=False
269
+ )
270
+ except Exception as e:
271
+ pass
272
 
273
  def load_all_models(self):
274
  with ThreadPoolExecutor() as executor:
275
+ for config in model_configs:
276
+ executor.submit(self.load_model, config)
277
  return self.models
278
 
279
  model_manager = ModelManager()
280
  global_data['models'] = model_manager.load_all_models()
281
 
 
 
 
 
 
 
 
282
  class ChatRequest(BaseModel):
283
  message: str
284
 
 
336
  ]
337
  unique_responses = remove_repetitive_responses(responses)
338
  best_response = choose_best_response(unique_responses)
 
339
  return f"**{best_response['model']}:**\n{best_response['response']}"
340
 
341
  app = FastAPI()