Hhhhvasasasasdsddsdsxxxxxxxxxxxxx

Running

App Files Files Community

Hjgugugjhuhjggg commited on Dec 4, 2024

Commit

bf69cdf

verified ·

1 Parent(s): c350bb5

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -18

app.py CHANGED Viewed

@@ -258,31 +258,27 @@ class ModelManager:
         self.models = {}
     def load_model(self, model_config):
-        model_path = os.path.join("models", model_config['filename'])
-        if not os.path.exists(model_path):
-            print(f"Model {model_config['name']} not found, downloading...")
-            # Aqui va el código para descargar el modelo de HuggingFace si es necesario.
-            self.models[model_config['name']] = Llama(model_path)
-        else:
-            print(f"Model {model_config['name']} loaded successfully.")
-            self.models[model_config['name']] = Llama(model_path)
     def load_all_models(self):
         with ThreadPoolExecutor() as executor:
-            for model_config in model_configs:
-                executor.submit(self.load_model, model_config)
         return self.models
 model_manager = ModelManager()
 global_data['models'] = model_manager.load_all_models()
-def free_up_resources():
-    gc.collect()
-    print(f"Memory usage before cleanup: {psutil.virtual_memory().percent}%")
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-    print(f"Memory usage after cleanup: {psutil.virtual_memory().percent}%")
 class ChatRequest(BaseModel):
     message: str
@@ -340,7 +336,6 @@ async def process_message(message):
         ]
     unique_responses = remove_repetitive_responses(responses)
     best_response = choose_best_response(unique_responses)
-    free_up_resources()
     return f"**{best_response['model']}:**\n{best_response['response']}"
 app = FastAPI()

         self.models = {}
     def load_model(self, model_config):
+        if model_config['name'] not in self.models:
+            try:
+                self.models[model_config['name']] = Llama.from_pretrained(
+                    repo_id=model_config['repo_id'],
+                    filename=model_config['filename'],
+                    use_auth_token=HUGGINGFACE_TOKEN,
+                    n_threads=8,
+                    use_gpu=False
+                )
+            except Exception as e:
+                pass
     def load_all_models(self):
         with ThreadPoolExecutor() as executor:
+            for config in model_configs:
+                executor.submit(self.load_model, config)
         return self.models
 model_manager = ModelManager()
 global_data['models'] = model_manager.load_all_models()
 class ChatRequest(BaseModel):
     message: str
         ]
     unique_responses = remove_repetitive_responses(responses)
     best_response = choose_best_response(unique_responses)
     return f"**{best_response['model']}:**\n{best_response['response']}"
 app = FastAPI()