File size: 10,627 Bytes
0821095
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
"""
Utilities for file management.
"""
import json
import os
import datetime
import shutil
import time
import random
import tempfile
import logging
from filelock import FileLock

logger = logging.getLogger("leaderboard-parser")

def save_results(results, file_path):
    """
    Save results to a JSON file.
    
    Args:
        results: The results to save
        file_path: The path to the file
    """
    with open(file_path, "w") as f:
        json.dump(results, f, indent=2)


def create_category_slug(category_name):
    """
    Creates a slug from a category name.
    The slug uses only hyphens as separators (no underscore).
    
    Args:
        category_name: The category name
        
    Returns:
        The category slug
    """
    if not category_name:
        return ""
    # Convert to lowercase and replace spaces with hyphens
    # Ensure no underscores are used in the category slug
    return category_name.lower().replace(" ", "-").replace("_", "-")


def create_combined_id(category, uid):
    """
    Creates a normalized combined identifier from a category and UID.
    First normalizes the category using create_category_slug.
    
    Args:
        category: The category name
        uid: The UID of the leaderboard
        
    Returns:
        The combined identifier in the format category_slug_uid
    """
    normalized_category = create_category_slug(category)
    return f"{normalized_category}_{uid}"


def validate_leaderboard_result(result):
    """
    Validates and corrects if necessary a leaderboard result to ensure identifier consistency.
    
    This function checks:
    1. That 'uid' is present and correctly formatted (category_original_uid)
    2. That 'original_uid' is present
    3. That 'category' is present and normalized
    4. That 'uid' corresponds to the combination of category and original_uid
    
    Args:
        result: The leaderboard result to validate (dict)
        
    Returns:
        The validated and corrected result, or None if validation is impossible
    """
    if not isinstance(result, dict):
        logger.error(f"Validation error: the result is not a dictionary")
        return None
        
    # Check if required fields are present
    if "original_uid" not in result:
        logger.error(f"Validation error: original_uid missing from result")
        return None
    
    if "category" not in result:
        logger.error(f"Validation error: category missing from result")
        return None
    
    original_uid = result["original_uid"]
    category = result["category"]
    
    # Normalize the category if necessary
    normalized_category = create_category_slug(category)
    if normalized_category != category:
        logger.warning(f"Category not normalized: '{category}' -> '{normalized_category}'")
        result["category"] = normalized_category
    
    # Recalculate the correct combined uid
    correct_uid = create_combined_id(normalized_category, original_uid)
    
    # Check if existing uid is correct
    if "uid" not in result:
        logger.warning(f"uid missing, adding calculated uid: {correct_uid}")
        result["uid"] = correct_uid
    elif result["uid"] != correct_uid:
        logger.warning(f"uid inconsistent: '{result['uid']}' does not match '{correct_uid}', correction applied")
        result["uid"] = correct_uid
    
    return result


def load_and_validate_results(file_path):
    """
    Loads results from the file without strict validation.
    
    Args:
        file_path: Path to the results file
        
    Returns:
        List of results, or empty list in case of error
    """
    try:
        # Load results from the file
        try:
            with open(file_path, "r", encoding="utf-8") as f:
                results_data = json.load(f)
        except (FileNotFoundError, json.JSONDecodeError) as e:
            logger.warning(f"Unable to load file {file_path}: {str(e)}")
            return []
            
        # Convert from dict with "leaderboards" to array if necessary
        if isinstance(results_data, dict) and "leaderboards" in results_data:
            array_results = []
            for uid, item in results_data["leaderboards"].items():
                item_copy = item.copy()
                item_copy["uid"] = uid
                array_results.append(item_copy)
            results_data = array_results
        
        # Ensure results_data is a list
        if not isinstance(results_data, list):
            logger.warning(f"Invalid data format in {file_path}, initializing empty list")
            return []
        
        # Sort results
        results_data.sort(key=lambda x: (x.get("category", ""), x.get("original_uid", "")))
        
        logger.info(f"Load successful: {len(results_data)} results")
        return results_data
        
    except Exception as e:
        logger.error(f"Error loading results: {str(e)}")
        return []


def update_leaderboard_result(leaderboard_result, file_path, max_wait_seconds=30):
    """
    Updates a leaderboard result in the specified file.
    If an entry with the same uid already exists, it is updated.
    Otherwise, a new entry is added.
    
    Args:
        leaderboard_result: The leaderboard result to update (must contain a uid)
        file_path: Path to the results file
        max_wait_seconds: Maximum wait time for file lock (in seconds)
        
    Returns:
        Updated results list or None in case of error
    """
    if not leaderboard_result or "uid" not in leaderboard_result:
        logger.error("Unable to update: invalid or missing leaderboard result or uid")
        return None
        
    # Create parent directory if necessary
    os.makedirs(os.path.dirname(file_path), exist_ok=True)
    
    # Use a lock to avoid concurrent writes
    lock_path = f"{file_path}.lock"
    lock = FileLock(lock_path, timeout=max_wait_seconds)
    
    try:
        with lock:
            # Load existing results
            current_results = load_and_validate_results(file_path)
            
            # Index by uid for easy update
            results_by_uid = {r.get("uid", ""): r for r in current_results if "uid" in r}
            
            # Update or add result
            uid = leaderboard_result["uid"]
            if uid in results_by_uid:
                # Update existing result
                results_by_uid[uid].update(leaderboard_result)
                logger.info(f"Result updated for uid: {uid}")
            else:
                # Add new result
                results_by_uid[uid] = leaderboard_result
                logger.info(f"New result added for uid: {uid}")
                
            # Convert to list for writing
            updated_results = list(results_by_uid.values())
            
            # Sort results
            updated_results.sort(key=lambda x: (x.get("category", ""), x.get("original_uid", "")))
            
            # Write to temporary file then rename for atomicity
            fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(file_path))
            try:
                with os.fdopen(fd, 'w', encoding='utf-8') as f:
                    json.dump(updated_results, f, indent=2, ensure_ascii=False)
                    
                # Replace original file with temporary file
                shutil.move(temp_path, file_path)
                logger.info(f"File updated successfully: {file_path}")
                
                return updated_results
            except Exception as e:
                # Clean up in case of error
                if os.path.exists(temp_path):
                    os.unlink(temp_path)
                raise e
                
    except Exception as e:
        logger.error(f"Error updating file {file_path}: {str(e)}")
        return None


def split_combined_id(combined_id):
    """
    Splits a combined identifier (category_uid) into its components.
    Uses only the first underscore "_" as separator.
    
    Args:
        combined_id: The combined identifier (category_uid)
        
    Returns:
        A tuple (category, uid) or (None, combined_id) if no underscore
    """
    if not combined_id:
        return None, None
    
    # Search for the first underscore to separate category and uid
    parts = combined_id.split("_", 1)
    if len(parts) == 2:
        return parts[0], parts[1]
    else:
        # If no underscore, consider it as just a uid without category
        return None, combined_id


def format_datetime(dt_str):
    """
    Format a datetime string to a human readable format.
    
    Args:
        dt_str: The datetime string to format
        
    Returns:
        A formatted datetime string
    """
    try:
        # Check if input is already a datetime object
        if isinstance(dt_str, datetime.datetime):
            dt = dt_str
        else:
            # Convert ISO format to datetime object
            # Handle different formats of ISO dates including fractional seconds and timezone
            try:
                dt = datetime.datetime.fromisoformat(dt_str)
            except ValueError:
                # Handle other common formats
                formats = [
                    "%Y-%m-%dT%H:%M:%S.%f%z",
                    "%Y-%m-%dT%H:%M:%S.%f",
                    "%Y-%m-%dT%H:%M:%S%z",
                    "%Y-%m-%dT%H:%M:%S",
                    "%Y-%m-%d %H:%M:%S",
                    "%Y-%m-%d"
                ]
                
                for fmt in formats:
                    try:
                        dt = datetime.datetime.strptime(dt_str, fmt)
                        break
                    except ValueError:
                        continue
                else:
                    # If no format matches
                    return dt_str
                    
        # Format the datetime object
        return dt.strftime("%d/%m/%Y à %H:%M:%S")
    except (ValueError, TypeError) as e:
        print(f"Error formatting date {dt_str}: {e}")
        return dt_str


def clean_output_files(results_file):
    """
    Clean the output files, but keep a backup of the original.
    
    Args:
        results_file: The results file to clean
    """
    # If results file exists, make a backup
    if os.path.exists(results_file):
        backup_file = f"{results_file}.backup"
        shutil.copy2(results_file, backup_file)
        print(f"Backup of {results_file} created in {backup_file}")
        
        # Create an empty results file
        with open(results_file, "w") as f:
            json.dump([], f, indent=2)
        print(f"File {results_file} cleaned")