chatbot-web-app / huggingface_utils.py
salomonsky's picture
Upload huggingface_utils.py with huggingface_hub
39cc871 verified
raw
history blame
3.94 kB
import requests
import json
import time
from requests.exceptions import RequestException, Timeout
class HuggingFaceUtils:
MODELS = {
'Mixtral': {
'url': "mistralai/Mixtral-8x7B-Instruct-v0.1",
'format': lambda p: f"<s>[INST] {p} [/INST]"
}
}
def __init__(self, token):
if not token:
raise ValueError("Se requiere un token de HuggingFace")
self.token = token
self.headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json"
}
self.session = requests.Session()
self.max_retries = 3
self.base_timeout = 60
def _make_request(self, url, payload, attempt=1):
"""Hacer request con reintentos y manejo de errores"""
try:
response = self.session.post(
url,
headers=self.headers,
json=payload,
timeout=self.base_timeout
)
response.raise_for_status()
return response.json()
except Timeout:
if attempt < self.max_retries:
time.sleep(2 ** attempt) # Espera exponencial
return self._make_request(url, payload, attempt + 1)
raise Exception("Timeout al conectar con HuggingFace")
except RequestException as e:
if hasattr(e, 'response') and e.response is not None:
if e.response.status_code == 401:
raise Exception("Token de HuggingFace inv谩lido")
elif e.response.status_code == 429:
raise Exception("L铆mite de rate excedido")
else:
raise Exception(f"Error de HuggingFace: {e.response.status_code}")
if attempt < self.max_retries:
time.sleep(2 ** attempt)
return self._make_request(url, payload, attempt + 1)
raise Exception(f"Error de conexi贸n con HuggingFace: {str(e)}")
def generate_response(self, prompt, model_name='Mixtral', max_length=200):
"""Generar respuesta con manejo de errores mejorado"""
try:
if not prompt:
raise ValueError("No se proporcion贸 un prompt")
if model_name not in self.MODELS:
raise ValueError(f"Modelo {model_name} no disponible")
model_info = self.MODELS[model_name]
api_url = f"https://api-inference.huggingface.co/models/{model_info['url']}"
formatted_prompt = model_info['format'](prompt)
payload = {
"inputs": formatted_prompt,
"parameters": {
"max_new_tokens": max_length,
"temperature": 0.9,
"top_p": 0.9,
"repetition_penalty": 1.2,
"do_sample": True,
"return_full_text": False,
"stop": ["[/INST]", "</s>"]
}
}
result = self._make_request(api_url, payload)
# A帽adir depuraci贸n para la respuesta
print(f"Respuesta de HuggingFace: {result}")
if isinstance(result, list) and len(result) > 0:
text = result[0].get('generated_text', '').strip()
if text:
# Evitar respuestas predeterminadas
if "隆Hola! Soy tu asistente virtual" in text:
raise ValueError("Respuesta predeterminada detectada")
return text
raise ValueError("No se pudo generar una respuesta coherente")
except Exception as e:
print(f"Error en generate_response: {str(e)}")
raise # Propagar el error para manejarlo en el nivel superior