leaderboard-parser-agent / src /leaderboard_processor.py
tfrere's picture
add model factory
b0bf659
raw
history blame
6.54 kB
"""
Leaderboard processing.
"""
import datetime
import os
from src.agents.parser_agent import process_leaderboard
from src.file_utils import create_category_slug, split_combined_id
def normalize_category(category_name):
"""
Normalizes a category name by replacing spaces and underscores with hyphens and converting to lowercase.
Args:
category_name: The category name to normalize
Returns:
The normalized category
"""
# Use the create_category_slug function from file_utils.py
return create_category_slug(category_name)
def process_single_leaderboard(uid, host, model, index, all_results, additional_rules=None, category=None):
"""
Process a single leaderboard and update the results.
Args:
uid: The UID of the leaderboard to process
host: The URL of the leaderboard
model: The model to use
index: The index of the leaderboard
all_results: The list of all results
additional_rules: Additional specific rules for this leaderboard
category: The category of the leaderboard (for combined identifier)
Returns:
The updated list of results
"""
print(f"\n\nProcessing leaderboard: {uid} - {host}")
if additional_rules:
print(f"Additional rules for this leaderboard: {additional_rules}")
if category:
normalized_category = normalize_category(category)
print(f"Category: {category} (normalized: {normalized_category})")
else:
normalized_category = None
# Get the maximum number of retries from environment variables
max_retries = int(os.getenv("LEADERBOARD_MAX_RETRIES", "3"))
print(f"Maximum number of retries configured: {max_retries}")
attempt = 0
last_error = None
# Try to process the leaderboard multiple times
while attempt < max_retries:
attempt += 1
if attempt > 1:
print(f"Retry attempt {attempt}/{max_retries} for leaderboard {uid} - {host}")
# Process the leaderboard
result = process_leaderboard(host, model, index, uid, additional_rules)
# If the parsing was successful or we've reached the maximum number of retries
if result.get("parsing_status") == "success" or attempt >= max_retries:
break
# If there was an error, save it for later
if result.get("parsing_status") == "error":
last_error = result.get("parsing_message", "Unknown error")
print(f"Error during attempt {attempt}: {last_error}")
# Get parsing date from result or generate a new one if not available
if result and "parsed_at" in result:
parsed_at = result["parsed_at"]
else:
# Fallback to current time if not provided by process_leaderboard
now = datetime.datetime.now()
parsed_at = now.isoformat()
# Create combined ID if category is provided
result_uid = uid
if normalized_category:
# Format of the combined UID: category_uid
# The category is already normalized (slugified) by normalize_category
# The underscore "_" is the ONLY separator between the category and the UID
result_uid = f"{normalized_category}_{uid}"
# Create base result object with uid, host, and thumbnail
leaderboard_result = {
"uid": result_uid,
"original_uid": uid,
"category": normalized_category,
"host": host,
"parsing_status": "rejected", # Default to rejected
"parsed_at": parsed_at
}
# Check if we have valid results
valid_result = False
if result and result.get("results"):
if isinstance(result["results"], dict):
# Check if we have top models with required fields
if "top_models" in result["results"] and len(result["results"]["top_models"]) > 0:
valid_models = True
for model_info in result["results"]["top_models"]:
# Each model must have at least rank and name
if not model_info.get("rank") or not model_info.get("name"):
valid_models = False
break
# Check if we have evaluation criteria
if valid_models and "evaluation_criteria" in result["results"] and result["results"]["evaluation_criteria"]:
valid_result = True
else:
print(f"Invalid results format: {type(result['results']).__name__}, expected dict")
else:
print(f"Missing or empty results in agent response")
# If we have valid results, extract the data
if valid_result:
leaderboard_result["parsing_status"] = "approved"
leaderboard_result["top_models"] = []
leaderboard_result["evaluation_criteria"] = result["results"]["evaluation_criteria"]
# Extract top models
for model_info in result["results"]["top_models"]:
model_entry = {
"rank": model_info.get("rank"),
"name": model_info.get("name"),
"url": model_info.get("url", None)
}
leaderboard_result["top_models"].append(model_entry)
else:
print(f"Leaderboard rejected: {uid} - Incomplete or invalid information")
# Check if this UID already exists in the results
for i, existing_result in enumerate(all_results):
if existing_result["uid"] == result_uid:
# Replace the existing result
all_results[i] = leaderboard_result
print(f"Result updated for UID: {result_uid}")
return all_results
# ADDITIONAL CHECK: Make sure there's no confusion with other categories
# for the same original_uid
for existing_result in all_results:
if existing_result["original_uid"] == uid and existing_result["category"] != normalized_category:
print(f"WARNING: A result already exists for original_uid {uid} but with a different category:")
print(f" - Existing category: {existing_result['category']}, UID: {existing_result['uid']}")
print(f" - New category: {normalized_category}, UID: {result_uid}")
# We continue anyway, as it's a valid case to have the same leaderboard in different categories
# If we get here, this is a new result
all_results.append(leaderboard_result)
print(f"New result added for UID: {result_uid}")
return all_results