Spaces:
Build error
Build error
""" | |
Leaderboard processing. | |
""" | |
import datetime | |
import os | |
from src.agents.parser_agent import process_leaderboard | |
from src.file_utils import create_category_slug, split_combined_id | |
def normalize_category(category_name): | |
""" | |
Normalizes a category name by replacing spaces and underscores with hyphens and converting to lowercase. | |
Args: | |
category_name: The category name to normalize | |
Returns: | |
The normalized category | |
""" | |
# Use the create_category_slug function from file_utils.py | |
return create_category_slug(category_name) | |
def process_single_leaderboard(uid, host, model, index, all_results, additional_rules=None, category=None): | |
""" | |
Process a single leaderboard and update the results. | |
Args: | |
uid: The UID of the leaderboard to process | |
host: The URL of the leaderboard | |
model: The model to use | |
index: The index of the leaderboard | |
all_results: The list of all results | |
additional_rules: Additional specific rules for this leaderboard | |
category: The category of the leaderboard (for combined identifier) | |
Returns: | |
The updated list of results | |
""" | |
print(f"\n\nProcessing leaderboard: {uid} - {host}") | |
if additional_rules: | |
print(f"Additional rules for this leaderboard: {additional_rules}") | |
if category: | |
normalized_category = normalize_category(category) | |
print(f"Category: {category} (normalized: {normalized_category})") | |
else: | |
normalized_category = None | |
# Get the maximum number of retries from environment variables | |
max_retries = int(os.getenv("LEADERBOARD_MAX_RETRIES", "3")) | |
print(f"Maximum number of retries configured: {max_retries}") | |
attempt = 0 | |
last_error = None | |
# Try to process the leaderboard multiple times | |
while attempt < max_retries: | |
attempt += 1 | |
if attempt > 1: | |
print(f"Retry attempt {attempt}/{max_retries} for leaderboard {uid} - {host}") | |
# Process the leaderboard | |
result = process_leaderboard(host, model, index, uid, additional_rules) | |
# If the parsing was successful or we've reached the maximum number of retries | |
if result.get("parsing_status") == "success" or attempt >= max_retries: | |
break | |
# If there was an error, save it for later | |
if result.get("parsing_status") == "error": | |
last_error = result.get("parsing_message", "Unknown error") | |
print(f"Error during attempt {attempt}: {last_error}") | |
# Get parsing date from result or generate a new one if not available | |
if result and "parsed_at" in result: | |
parsed_at = result["parsed_at"] | |
else: | |
# Fallback to current time if not provided by process_leaderboard | |
now = datetime.datetime.now() | |
parsed_at = now.isoformat() | |
# Create combined ID if category is provided | |
result_uid = uid | |
if normalized_category: | |
# Format of the combined UID: category_uid | |
# The category is already normalized (slugified) by normalize_category | |
# The underscore "_" is the ONLY separator between the category and the UID | |
result_uid = f"{normalized_category}_{uid}" | |
# Create base result object with uid, host, and thumbnail | |
leaderboard_result = { | |
"uid": result_uid, | |
"original_uid": uid, | |
"category": normalized_category, | |
"host": host, | |
"parsing_status": "rejected", # Default to rejected | |
"parsed_at": parsed_at | |
} | |
# Check if we have valid results | |
valid_result = False | |
if result and result.get("results"): | |
if isinstance(result["results"], dict): | |
# Check if we have top models with required fields | |
if "top_models" in result["results"] and len(result["results"]["top_models"]) > 0: | |
valid_models = True | |
for model_info in result["results"]["top_models"]: | |
# Each model must have at least rank and name | |
if not model_info.get("rank") or not model_info.get("name"): | |
valid_models = False | |
break | |
# Check if we have evaluation criteria | |
if valid_models and "evaluation_criteria" in result["results"] and result["results"]["evaluation_criteria"]: | |
valid_result = True | |
else: | |
print(f"Invalid results format: {type(result['results']).__name__}, expected dict") | |
else: | |
print(f"Missing or empty results in agent response") | |
# If we have valid results, extract the data | |
if valid_result: | |
leaderboard_result["parsing_status"] = "approved" | |
leaderboard_result["top_models"] = [] | |
leaderboard_result["evaluation_criteria"] = result["results"]["evaluation_criteria"] | |
# Extract top models | |
for model_info in result["results"]["top_models"]: | |
model_entry = { | |
"rank": model_info.get("rank"), | |
"name": model_info.get("name"), | |
"url": model_info.get("url", None) | |
} | |
leaderboard_result["top_models"].append(model_entry) | |
else: | |
print(f"Leaderboard rejected: {uid} - Incomplete or invalid information") | |
# Check if this UID already exists in the results | |
for i, existing_result in enumerate(all_results): | |
if existing_result["uid"] == result_uid: | |
# Replace the existing result | |
all_results[i] = leaderboard_result | |
print(f"Result updated for UID: {result_uid}") | |
return all_results | |
# ADDITIONAL CHECK: Make sure there's no confusion with other categories | |
# for the same original_uid | |
for existing_result in all_results: | |
if existing_result["original_uid"] == uid and existing_result["category"] != normalized_category: | |
print(f"WARNING: A result already exists for original_uid {uid} but with a different category:") | |
print(f" - Existing category: {existing_result['category']}, UID: {existing_result['uid']}") | |
print(f" - New category: {normalized_category}, UID: {result_uid}") | |
# We continue anyway, as it's a valid case to have the same leaderboard in different categories | |
# If we get here, this is a new result | |
all_results.append(leaderboard_result) | |
print(f"New result added for UID: {result_uid}") | |
return all_results |