File size: 6,539 Bytes
0821095
 
 
 
 
b0bf659
0821095
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
"""
Leaderboard processing.
"""
import datetime
import os
from src.agents.parser_agent import process_leaderboard
from src.file_utils import create_category_slug, split_combined_id


def normalize_category(category_name):
    """
    Normalizes a category name by replacing spaces and underscores with hyphens and converting to lowercase.
    
    Args:
        category_name: The category name to normalize
        
    Returns:
        The normalized category
    """
    # Use the create_category_slug function from file_utils.py
    return create_category_slug(category_name)


def process_single_leaderboard(uid, host, model, index, all_results, additional_rules=None, category=None):
    """
    Process a single leaderboard and update the results.
    
    Args:
        uid: The UID of the leaderboard to process
        host: The URL of the leaderboard
        model: The model to use
        index: The index of the leaderboard
        all_results: The list of all results
        additional_rules: Additional specific rules for this leaderboard
        category: The category of the leaderboard (for combined identifier)
        
    Returns:
        The updated list of results
    """
    print(f"\n\nProcessing leaderboard: {uid} - {host}")
    if additional_rules:
        print(f"Additional rules for this leaderboard: {additional_rules}")
    if category:
        normalized_category = normalize_category(category)
        print(f"Category: {category} (normalized: {normalized_category})")
    else:
        normalized_category = None
    
    # Get the maximum number of retries from environment variables
    max_retries = int(os.getenv("LEADERBOARD_MAX_RETRIES", "3"))
    print(f"Maximum number of retries configured: {max_retries}")
    
    attempt = 0
    last_error = None
    
    # Try to process the leaderboard multiple times
    while attempt < max_retries:
        attempt += 1
        if attempt > 1:
            print(f"Retry attempt {attempt}/{max_retries} for leaderboard {uid} - {host}")
        
        # Process the leaderboard
        result = process_leaderboard(host, model, index, uid, additional_rules)
        
        # If the parsing was successful or we've reached the maximum number of retries
        if result.get("parsing_status") == "success" or attempt >= max_retries:
            break
        
        # If there was an error, save it for later
        if result.get("parsing_status") == "error":
            last_error = result.get("parsing_message", "Unknown error")
            print(f"Error during attempt {attempt}: {last_error}")
        
    # Get parsing date from result or generate a new one if not available
    if result and "parsed_at" in result:
        parsed_at = result["parsed_at"]
    else:
        # Fallback to current time if not provided by process_leaderboard
        now = datetime.datetime.now()
        parsed_at = now.isoformat()
    
    # Create combined ID if category is provided
    result_uid = uid
    if normalized_category:
        # Format of the combined UID: category_uid
        # The category is already normalized (slugified) by normalize_category
        # The underscore "_" is the ONLY separator between the category and the UID
        result_uid = f"{normalized_category}_{uid}"
    
    # Create base result object with uid, host, and thumbnail
    leaderboard_result = {
        "uid": result_uid,
        "original_uid": uid,
        "category": normalized_category,
        "host": host,
        "parsing_status": "rejected",  # Default to rejected
        "parsed_at": parsed_at
    }
    
    # Check if we have valid results
    valid_result = False
    if result and result.get("results"):
        if isinstance(result["results"], dict):
            # Check if we have top models with required fields
            if "top_models" in result["results"] and len(result["results"]["top_models"]) > 0:
                valid_models = True
                for model_info in result["results"]["top_models"]:
                    # Each model must have at least rank and name
                    if not model_info.get("rank") or not model_info.get("name"):
                        valid_models = False
                        break
                
                # Check if we have evaluation criteria
                if valid_models and "evaluation_criteria" in result["results"] and result["results"]["evaluation_criteria"]:
                    valid_result = True
        else:
            print(f"Invalid results format: {type(result['results']).__name__}, expected dict")
    else:
        print(f"Missing or empty results in agent response")
    
    # If we have valid results, extract the data
    if valid_result:
        leaderboard_result["parsing_status"] = "approved"
        leaderboard_result["top_models"] = []
        leaderboard_result["evaluation_criteria"] = result["results"]["evaluation_criteria"]
        
        # Extract top models
        for model_info in result["results"]["top_models"]:
            model_entry = {
                "rank": model_info.get("rank"),
                "name": model_info.get("name"),
                "url": model_info.get("url", None)
            }
            leaderboard_result["top_models"].append(model_entry)
    else:
        print(f"Leaderboard rejected: {uid} - Incomplete or invalid information")
    
    # Check if this UID already exists in the results
    for i, existing_result in enumerate(all_results):
        if existing_result["uid"] == result_uid:
            # Replace the existing result
            all_results[i] = leaderboard_result
            print(f"Result updated for UID: {result_uid}")
            return all_results
    
    # ADDITIONAL CHECK: Make sure there's no confusion with other categories
    # for the same original_uid
    for existing_result in all_results:
        if existing_result["original_uid"] == uid and existing_result["category"] != normalized_category:
            print(f"WARNING: A result already exists for original_uid {uid} but with a different category:")
            print(f"  - Existing category: {existing_result['category']}, UID: {existing_result['uid']}")
            print(f"  - New category: {normalized_category}, UID: {result_uid}")
            # We continue anyway, as it's a valid case to have the same leaderboard in different categories
    
    # If we get here, this is a new result
    all_results.append(leaderboard_result)
    print(f"New result added for UID: {result_uid}")
    return all_results