Spaces:
Paused
Paused
from datetime import datetime, timedelta | |
from fastapi import APIRouter, HTTPException, Depends | |
import logging | |
from lm_eval import evaluator | |
from svc.schemas import LMHarnessTaskRequest, TaskResponse, LoadModelRequest, DeepEvalSuiteRequest | |
from fastapi.security import OAuth2PasswordRequestForm | |
from auth.authentication import get_current_user, create_access_token | |
from dotenv import load_dotenv | |
import os | |
import json | |
from pathlib import Path | |
from src.deepeval.deepeval_task_manager import DeepEvalTaskManager | |
import torch | |
import gc | |
from time import time | |
from huggingface_hub import HfApi, ModelInfo | |
import threading | |
router = APIRouter() | |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
logger = logging.getLogger(__name__) | |
load_dotenv() | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
# Or configure a HfApi client | |
hf_api = HfApi( | |
token=HF_TOKEN, # Token is not persisted on the machine. | |
) | |
async def login_for_access_token(form_data: OAuth2PasswordRequestForm = Depends()): | |
auth_token = os.getenv("AUTH_UUID") | |
if auth_token != form_data.password: | |
raise HTTPException(status_code=400, detail="Incorrect username or password") | |
access_token = create_access_token(data={"sub": form_data.username}) | |
return {"access_token": access_token, "token_type": "bearer"} | |
async def protected_route(username: str = Depends(get_current_user)): | |
return {"message": f"Hello, {username}! This is a protected resource."} | |
async def deep_eval_status(): | |
#Return running with 200 status code | |
return {"status": "running"} | |
def hardware_status(): | |
info = get_gpu_tier() | |
return info | |
def inference_model(request: LMHarnessTaskRequest, username: str = Depends(get_current_user)): | |
logger.info(request) | |
try: | |
logger.info("Inside") | |
results = evaluator.simple_evaluate( | |
model=request.model, | |
model_args=request.model_args, | |
tasks=request.tasks, | |
num_fewshot=request.num_fewshot, | |
batch_size=request.batch_size, | |
device=request.device, | |
limit=request.limit, | |
write_out=request.write_out # Whether to write out an example document and model input, for checking task integrity | |
) | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"lm-harness task execution failed for model: {request.model_args}") | |
torch.cuda.empty_cache() | |
results["config"]["model_dtype"] = request.precision | |
model_name = request.model_args.split(",")[0].split("=")[1] | |
results["config"]["model_name"] = model_name | |
results["config"]["model_sha"] = request.model_sha | |
dumped = json.dumps(results, indent=2) | |
logger.info("-------------------results------------------\n") | |
logger.info(dumped) | |
logger.info("-------------------results end------------------\n") | |
return TaskResponse(results=dumped) | |
def deep_eval_suite(request: DeepEvalSuiteRequest): | |
def run_in_background(): | |
try: | |
torch.cuda.empty_cache() | |
des = DeepEvalTaskManager(request.model_name, request.tasks) | |
start_time = time() | |
results = des.run_tasks() | |
end_time = time() | |
duration = round(end_time - start_time, 2) | |
model_info: ModelInfo = hf_api.model_info(request.model_name) | |
config = { | |
"model_source": "hf", | |
"num_fewshot": 0, | |
"batch_size": 8, | |
"device": "cuda:0", | |
"model_dtype": "torch.float16", | |
"model_name": request.model_name, | |
"model_sha": model_info.sha, | |
} | |
final_results = { | |
"results": results, | |
"config": config, | |
"total_evaluation_time_seconds": duration, | |
"start_time": start_time, | |
"end_time": end_time | |
} | |
# Save and upload | |
dumped = json.dumps(final_results, indent=2) | |
path = Path("/tmp", request.model_name, f"results_{datetime.now()}.json") | |
path.parent.mkdir(parents=True, exist_ok=True) | |
path.write_text(dumped) | |
RESULTS_REPO = "metunlp/results" | |
hf_api.upload_file( | |
path_or_fileobj=path, | |
path_in_repo=path.relative_to("/tmp").as_posix(), | |
repo_id=RESULTS_REPO, | |
repo_type="dataset", | |
) | |
logger.info(f"β Uploaded results to HF Hub for {request.model_name}") | |
except Exception as e: | |
logger.exception(f"β Background evaluation failed: {e}") | |
# π Start evaluation in background | |
threading.Thread(target=run_in_background, daemon=True).start() | |
# β Immediately respond | |
return TaskResponse(results=json.dumps({"status": "Evaluation started in background"})) | |
def get_gpu_tier(): | |
if not torch.cuda.is_available(): | |
return {"gpu": "CPU", "tier": "cpu"} | |
gpu_name = torch.cuda.get_device_name(0).lower() | |
# Normalize GPU model to your custom tier system | |
if "t4" in gpu_name: | |
# You can improve this by checking memory or other context | |
return {"gpu": "Tesla T4", "tier": "t4-medium"} | |
elif "l4" in gpu_name: | |
return {"gpu": "NVIDIA L4", "tier": "l4x1"} | |
elif "l40s" in gpu_name: | |
return {"gpu": "NVIDIA L40S", "tier": "l40sx1"} | |
elif "a10g" in gpu_name: | |
return {"gpu": "NVIDIA A10G", "tier": "a10g"} | |
else: | |
return {"gpu": gpu_name, "tier": "unknown"} |