Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
| 5 |
import uvicorn
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
from difflib import SequenceMatcher
|
|
|
|
| 8 |
|
| 9 |
load_dotenv()
|
| 10 |
|
|
@@ -20,6 +21,7 @@ models = [
|
|
| 20 |
|
| 21 |
# Cargar modelos en memoria solo una vez
|
| 22 |
llms = [Llama.from_pretrained(repo_id=model['repo_id'], filename=model['filename']) for model in models]
|
|
|
|
| 23 |
|
| 24 |
class ChatRequest(BaseModel):
|
| 25 |
message: str
|
|
@@ -29,6 +31,7 @@ class ChatRequest(BaseModel):
|
|
| 29 |
|
| 30 |
def generate_chat_response(request, llm):
|
| 31 |
try:
|
|
|
|
| 32 |
user_input = normalize_input(request.message)
|
| 33 |
response = llm.create_chat_completion(
|
| 34 |
messages=[{"role": "user", "content": user_input}],
|
|
@@ -42,10 +45,11 @@ def generate_chat_response(request, llm):
|
|
| 42 |
return {"response": f"Error: {str(e)}", "literal": user_input}
|
| 43 |
|
| 44 |
def normalize_input(input_text):
|
|
|
|
| 45 |
return input_text.strip()
|
| 46 |
|
| 47 |
def select_best_response(responses, request):
|
| 48 |
-
coherent_responses = filter_by_coherence(responses, request)
|
| 49 |
best_response = filter_by_similarity(coherent_responses)
|
| 50 |
return best_response
|
| 51 |
|
|
@@ -68,29 +72,32 @@ async def generate_chat(request: ChatRequest):
|
|
| 68 |
if not request.message.strip():
|
| 69 |
raise HTTPException(status_code=400, detail="The message cannot be empty.")
|
| 70 |
|
|
|
|
|
|
|
|
|
|
| 71 |
with ThreadPoolExecutor(max_workers=None) as executor:
|
|
|
|
| 72 |
futures = [executor.submit(generate_chat_response, request, llm) for llm in llms]
|
| 73 |
responses = []
|
| 74 |
-
|
|
|
|
| 75 |
response = future.result()
|
| 76 |
responses.append(response)
|
|
|
|
| 77 |
|
| 78 |
-
#
|
| 79 |
if any("Error" in response['response'] for response in responses):
|
| 80 |
error_response = next(response for response in responses if "Error" in response['response'])
|
| 81 |
raise HTTPException(status_code=500, detail=error_response['response'])
|
| 82 |
|
| 83 |
-
|
| 84 |
-
response_texts = [resp['response'] for resp in responses]
|
| 85 |
-
literal_inputs = [resp['literal'] for resp in responses]
|
| 86 |
-
|
| 87 |
-
# Selecciona la mejor respuesta
|
| 88 |
-
best_response = select_best_response(response_texts, request)
|
| 89 |
|
|
|
|
|
|
|
| 90 |
return {
|
| 91 |
"best_response": best_response,
|
| 92 |
-
"all_responses":
|
| 93 |
-
"literal_inputs":
|
| 94 |
}
|
| 95 |
|
| 96 |
if __name__ == "__main__":
|
|
|
|
| 5 |
import uvicorn
|
| 6 |
from dotenv import load_dotenv
|
| 7 |
from difflib import SequenceMatcher
|
| 8 |
+
from tqdm import tqdm # Importa tqdm para la barra de progreso
|
| 9 |
|
| 10 |
load_dotenv()
|
| 11 |
|
|
|
|
| 21 |
|
| 22 |
# Cargar modelos en memoria solo una vez
|
| 23 |
llms = [Llama.from_pretrained(repo_id=model['repo_id'], filename=model['filename']) for model in models]
|
| 24 |
+
print(f"Modelos cargados: {[model['repo_id'] for model in models]}")
|
| 25 |
|
| 26 |
class ChatRequest(BaseModel):
|
| 27 |
message: str
|
|
|
|
| 31 |
|
| 32 |
def generate_chat_response(request, llm):
|
| 33 |
try:
|
| 34 |
+
# Normalizaci贸n del mensaje para manejo robusto
|
| 35 |
user_input = normalize_input(request.message)
|
| 36 |
response = llm.create_chat_completion(
|
| 37 |
messages=[{"role": "user", "content": user_input}],
|
|
|
|
| 45 |
return {"response": f"Error: {str(e)}", "literal": user_input}
|
| 46 |
|
| 47 |
def normalize_input(input_text):
|
| 48 |
+
# Implementar aqu铆 cualquier l贸gica de normalizaci贸n que sea necesaria
|
| 49 |
return input_text.strip()
|
| 50 |
|
| 51 |
def select_best_response(responses, request):
|
| 52 |
+
coherent_responses = filter_by_coherence([resp['response'] for resp in responses], request)
|
| 53 |
best_response = filter_by_similarity(coherent_responses)
|
| 54 |
return best_response
|
| 55 |
|
|
|
|
| 72 |
if not request.message.strip():
|
| 73 |
raise HTTPException(status_code=400, detail="The message cannot be empty.")
|
| 74 |
|
| 75 |
+
print(f"Procesando solicitud: {request.message}")
|
| 76 |
+
|
| 77 |
+
# Crear un ThreadPoolExecutor para ejecutar las tareas en paralelo
|
| 78 |
with ThreadPoolExecutor(max_workers=None) as executor:
|
| 79 |
+
# Usar tqdm para mostrar la barra de progreso
|
| 80 |
futures = [executor.submit(generate_chat_response, request, llm) for llm in llms]
|
| 81 |
responses = []
|
| 82 |
+
|
| 83 |
+
for future in tqdm(as_completed(futures), total=len(futures), desc="Generando respuestas"):
|
| 84 |
response = future.result()
|
| 85 |
responses.append(response)
|
| 86 |
+
print(f"Modelo procesado: {response['literal'][:30]}...") # Muestra los primeros 30 caracteres de la respuesta
|
| 87 |
|
| 88 |
+
# Verificar si hay errores en las respuestas
|
| 89 |
if any("Error" in response['response'] for response in responses):
|
| 90 |
error_response = next(response for response in responses if "Error" in response['response'])
|
| 91 |
raise HTTPException(status_code=500, detail=error_response['response'])
|
| 92 |
|
| 93 |
+
best_response = select_best_response([resp['response'] for resp in responses], request)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
+
print(f"Mejor respuesta seleccionada: {best_response}")
|
| 96 |
+
|
| 97 |
return {
|
| 98 |
"best_response": best_response,
|
| 99 |
+
"all_responses": [resp['response'] for resp in responses],
|
| 100 |
+
"literal_inputs": [resp['literal'] for resp in responses]
|
| 101 |
}
|
| 102 |
|
| 103 |
if __name__ == "__main__":
|