Spaces:

tfrere
/

leaderboard-parser-agent

Build error

File size: 6,539 Bytes

"""
Leaderboard processing.
"""
import datetime
import os
from src.agents.parser_agent import process_leaderboard
from src.file_utils import create_category_slug, split_combined_id


def normalize_category(category_name):
    """
    Normalizes a category name by replacing spaces and underscores with hyphens and converting to lowercase.
    
    Args:
        category_name: The category name to normalize
        
    Returns:
        The normalized category
    """
    # Use the create_category_slug function from file_utils.py
    return create_category_slug(category_name)


def process_single_leaderboard(uid, host, model, index, all_results, additional_rules=None, category=None):
    """
    Process a single leaderboard and update the results.
    
    Args:
        uid: The UID of the leaderboard to process
        host: The URL of the leaderboard
        model: The model to use
        index: The index of the leaderboard
        all_results: The list of all results
        additional_rules: Additional specific rules for this leaderboard
        category: The category of the leaderboard (for combined identifier)
        
    Returns:
        The updated list of results
    """
    print(f"\n\nProcessing leaderboard: {uid} - {host}")
    if additional_rules:
        print(f"Additional rules for this leaderboard: {additional_rules}")
    if category:
        normalized_category = normalize_category(category)
        print(f"Category: {category} (normalized: {normalized_category})")
    else:
        normalized_category = None
    
    # Get the maximum number of retries from environment variables
    max_retries = int(os.getenv("LEADERBOARD_MAX_RETRIES", "3"))
    print(f"Maximum number of retries configured: {max_retries}")
    
    attempt = 0
    last_error = None
    
    # Try to process the leaderboard multiple times
    while attempt < max_retries:
        attempt += 1
        if attempt > 1:
            print(f"Retry attempt {attempt}/{max_retries} for leaderboard {uid} - {host}")
        
        # Process the leaderboard
        result = process_leaderboard(host, model, index, uid, additional_rules)
        
        # If the parsing was successful or we've reached the maximum number of retries
        if result.get("parsing_status") == "success" or attempt >= max_retries:
            break
        
        # If there was an error, save it for later
        if result.get("parsing_status") == "error":
            last_error = result.get("parsing_message", "Unknown error")
            print(f"Error during attempt {attempt}: {last_error}")
        
    # Get parsing date from result or generate a new one if not available
    if result and "parsed_at" in result:
        parsed_at = result["parsed_at"]
    else:
        # Fallback to current time if not provided by process_leaderboard
        now = datetime.datetime.now()
        parsed_at = now.isoformat()
    
    # Create combined ID if category is provided
    result_uid = uid
    if normalized_category:
        # Format of the combined UID: category_uid
        # The category is already normalized (slugified) by normalize_category
        # The underscore "_" is the ONLY separator between the category and the UID
        result_uid = f"{normalized_category}_{uid}"
    
    # Create base result object with uid, host, and thumbnail
    leaderboard_result = {
        "uid": result_uid,
        "original_uid": uid,
        "category": normalized_category,
        "host": host,
        "parsing_status": "rejected",  # Default to rejected
        "parsed_at": parsed_at
    }
    
    # Check if we have valid results
    valid_result = False
    if result and result.get("results"):
        if isinstance(result["results"], dict):
            # Check if we have top models with required fields
            if "top_models" in result["results"] and len(result["results"]["top_models"]) > 0:
                valid_models = True
                for model_info in result["results"]["top_models"]:
                    # Each model must have at least rank and name
                    if not model_info.get("rank") or not model_info.get("name"):
                        valid_models = False
                        break
                
                # Check if we have evaluation criteria
                if valid_models and "evaluation_criteria" in result["results"] and result["results"]["evaluation_criteria"]:
                    valid_result = True
        else:
            print(f"Invalid results format: {type(result['results']).__name__}, expected dict")
    else:
        print(f"Missing or empty results in agent response")
    
    # If we have valid results, extract the data
    if valid_result:
        leaderboard_result["parsing_status"] = "approved"
        leaderboard_result["top_models"] = []
        leaderboard_result["evaluation_criteria"] = result["results"]["evaluation_criteria"]
        
        # Extract top models
        for model_info in result["results"]["top_models"]:
            model_entry = {
                "rank": model_info.get("rank"),
                "name": model_info.get("name"),
                "url": model_info.get("url", None)
            }
            leaderboard_result["top_models"].append(model_entry)
    else:
        print(f"Leaderboard rejected: {uid} - Incomplete or invalid information")
    
    # Check if this UID already exists in the results
    for i, existing_result in enumerate(all_results):
        if existing_result["uid"] == result_uid:
            # Replace the existing result
            all_results[i] = leaderboard_result
            print(f"Result updated for UID: {result_uid}")
            return all_results
    
    # ADDITIONAL CHECK: Make sure there's no confusion with other categories
    # for the same original_uid
    for existing_result in all_results:
        if existing_result["original_uid"] == uid and existing_result["category"] != normalized_category:
            print(f"WARNING: A result already exists for original_uid {uid} but with a different category:")
            print(f"  - Existing category: {existing_result['category']}, UID: {existing_result['uid']}")
            print(f"  - New category: {normalized_category}, UID: {result_uid}")
            # We continue anyway, as it's a valid case to have the same leaderboard in different categories
    
    # If we get here, this is a new result
    all_results.append(leaderboard_result)
    print(f"New result added for UID: {result_uid}")
    return all_results