""" Leaderboard processing. """ import datetime import os from src.agents.parser_agent import process_leaderboard from src.file_utils import create_category_slug, split_combined_id def normalize_category(category_name): """ Normalizes a category name by replacing spaces and underscores with hyphens and converting to lowercase. Args: category_name: The category name to normalize Returns: The normalized category """ # Use the create_category_slug function from file_utils.py return create_category_slug(category_name) def process_single_leaderboard(uid, host, model, index, all_results, additional_rules=None, category=None): """ Process a single leaderboard and update the results. Args: uid: The UID of the leaderboard to process host: The URL of the leaderboard model: The model to use index: The index of the leaderboard all_results: The list of all results additional_rules: Additional specific rules for this leaderboard category: The category of the leaderboard (for combined identifier) Returns: The updated list of results """ print(f"\n\nProcessing leaderboard: {uid} - {host}") if additional_rules: print(f"Additional rules for this leaderboard: {additional_rules}") if category: normalized_category = normalize_category(category) print(f"Category: {category} (normalized: {normalized_category})") else: normalized_category = None # Get the maximum number of retries from environment variables max_retries = int(os.getenv("LEADERBOARD_MAX_RETRIES", "3")) print(f"Maximum number of retries configured: {max_retries}") attempt = 0 last_error = None # Try to process the leaderboard multiple times while attempt < max_retries: attempt += 1 if attempt > 1: print(f"Retry attempt {attempt}/{max_retries} for leaderboard {uid} - {host}") # Process the leaderboard result = process_leaderboard(host, model, index, uid, additional_rules) # If the parsing was successful or we've reached the maximum number of retries if result.get("parsing_status") == "success" or attempt >= max_retries: break # If there was an error, save it for later if result.get("parsing_status") == "error": last_error = result.get("parsing_message", "Unknown error") print(f"Error during attempt {attempt}: {last_error}") # Get parsing date from result or generate a new one if not available if result and "parsed_at" in result: parsed_at = result["parsed_at"] else: # Fallback to current time if not provided by process_leaderboard now = datetime.datetime.now() parsed_at = now.isoformat() # Create combined ID if category is provided result_uid = uid if normalized_category: # Format of the combined UID: category_uid # The category is already normalized (slugified) by normalize_category # The underscore "_" is the ONLY separator between the category and the UID result_uid = f"{normalized_category}_{uid}" # Create base result object with uid, host, and thumbnail leaderboard_result = { "uid": result_uid, "original_uid": uid, "category": normalized_category, "host": host, "parsing_status": "rejected", # Default to rejected "parsed_at": parsed_at } # Check if we have valid results valid_result = False if result and result.get("results"): if isinstance(result["results"], dict): # Check if we have top models with required fields if "top_models" in result["results"] and len(result["results"]["top_models"]) > 0: valid_models = True for model_info in result["results"]["top_models"]: # Each model must have at least rank and name if not model_info.get("rank") or not model_info.get("name"): valid_models = False break # Check if we have evaluation criteria if valid_models and "evaluation_criteria" in result["results"] and result["results"]["evaluation_criteria"]: valid_result = True else: print(f"Invalid results format: {type(result['results']).__name__}, expected dict") else: print(f"Missing or empty results in agent response") # If we have valid results, extract the data if valid_result: leaderboard_result["parsing_status"] = "approved" leaderboard_result["top_models"] = [] leaderboard_result["evaluation_criteria"] = result["results"]["evaluation_criteria"] # Extract top models for model_info in result["results"]["top_models"]: model_entry = { "rank": model_info.get("rank"), "name": model_info.get("name"), "url": model_info.get("url", None) } leaderboard_result["top_models"].append(model_entry) else: print(f"Leaderboard rejected: {uid} - Incomplete or invalid information") # Check if this UID already exists in the results for i, existing_result in enumerate(all_results): if existing_result["uid"] == result_uid: # Replace the existing result all_results[i] = leaderboard_result print(f"Result updated for UID: {result_uid}") return all_results # ADDITIONAL CHECK: Make sure there's no confusion with other categories # for the same original_uid for existing_result in all_results: if existing_result["original_uid"] == uid and existing_result["category"] != normalized_category: print(f"WARNING: A result already exists for original_uid {uid} but with a different category:") print(f" - Existing category: {existing_result['category']}, UID: {existing_result['uid']}") print(f" - New category: {normalized_category}, UID: {result_uid}") # We continue anyway, as it's a valid case to have the same leaderboard in different categories # If we get here, this is a new result all_results.append(leaderboard_result) print(f"New result added for UID: {result_uid}") return all_results