Spaces:
Running
Running
Shiyu Zhao
commited on
Commit
·
5bf37f4
1
Parent(s):
5f78509
Update space
Browse files
app.py
CHANGED
|
@@ -242,11 +242,9 @@ def read_json_from_hub(api: HfApi, repo_id: str, file_path: str) -> dict:
|
|
| 242 |
|
| 243 |
def scan_submissions_directory():
|
| 244 |
"""
|
| 245 |
-
Scans the submissions directory and updates the
|
| 246 |
-
|
| 247 |
"""
|
| 248 |
-
global df_synthesized_full, df_synthesized_10, df_human_generated
|
| 249 |
-
|
| 250 |
try:
|
| 251 |
# Initialize HuggingFace API
|
| 252 |
api = HfApi()
|
|
@@ -264,117 +262,39 @@ def scan_submissions_directory():
|
|
| 264 |
repo_id=REPO_ID,
|
| 265 |
repo_type="space"
|
| 266 |
)
|
| 267 |
-
# Filter for files in submissions directory
|
| 268 |
repo_files = [f for f in all_files if f.startswith('submissions/')]
|
| 269 |
except Exception as e:
|
| 270 |
print(f"Error listing repository contents: {str(e)}")
|
| 271 |
return submissions_by_split
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
print("No submissions directory found or empty")
|
| 275 |
-
return submissions_by_split
|
| 276 |
-
|
| 277 |
-
# Group files by team folders
|
| 278 |
-
folder_files = {}
|
| 279 |
for filepath in repo_files:
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
folder_files[folder_name] = []
|
| 287 |
-
folder_files[folder_name].append(filepath)
|
| 288 |
-
|
| 289 |
-
# Process each team folder
|
| 290 |
-
for folder_name, files in folder_files.items():
|
| 291 |
-
try:
|
| 292 |
-
# Find latest.json in this folder
|
| 293 |
-
latest_file = next((f for f in files if f.endswith('latest.json')), None)
|
| 294 |
-
if not latest_file:
|
| 295 |
-
print(f"No latest.json found in {folder_name}")
|
| 296 |
-
continue
|
| 297 |
-
|
| 298 |
-
# Read latest.json
|
| 299 |
-
latest_info = read_json_from_hub(api, REPO_ID, latest_file)
|
| 300 |
-
if not latest_info:
|
| 301 |
-
print(f"Failed to read latest.json for {folder_name}")
|
| 302 |
-
continue
|
| 303 |
-
|
| 304 |
-
# Check submission status
|
| 305 |
-
if latest_info.get('status') != 'approved':
|
| 306 |
-
print(f"Skipping unapproved submission in {folder_name}")
|
| 307 |
-
continue
|
| 308 |
-
|
| 309 |
-
timestamp = latest_info.get('latest_submission')
|
| 310 |
-
if not timestamp:
|
| 311 |
-
print(f"No timestamp found in latest.json for {folder_name}")
|
| 312 |
-
continue
|
| 313 |
-
|
| 314 |
-
# Find metadata file
|
| 315 |
-
metadata_file = next(
|
| 316 |
-
(f for f in files if f.endswith(f'metadata_{timestamp}.json')),
|
| 317 |
-
None
|
| 318 |
-
)
|
| 319 |
-
if not metadata_file:
|
| 320 |
-
print(f"No matching metadata file found for {folder_name} timestamp {timestamp}")
|
| 321 |
-
continue
|
| 322 |
-
|
| 323 |
-
# Read metadata file
|
| 324 |
-
submission_data = read_json_from_hub(api, REPO_ID, metadata_file)
|
| 325 |
-
if not submission_data:
|
| 326 |
-
print(f"Failed to read metadata for {folder_name}")
|
| 327 |
-
continue
|
| 328 |
-
|
| 329 |
-
# Map the split name if necessary
|
| 330 |
-
split = submission_data.get('Split')
|
| 331 |
-
if split in submissions_by_split:
|
| 332 |
-
submissions_by_split[split].append(submission_data)
|
| 333 |
-
|
| 334 |
-
# Update the appropriate DataFrame based on the split
|
| 335 |
-
if split == 'test':
|
| 336 |
-
df_to_update = df_synthesized_full
|
| 337 |
-
elif split == 'test-0.1':
|
| 338 |
-
df_to_update = df_synthesized_10
|
| 339 |
-
else: # human_generated_eval
|
| 340 |
-
df_to_update = df_human_generated
|
| 341 |
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
else:
|
| 357 |
-
df_to_update.loc[len(df_to_update)] = new_row
|
| 358 |
-
|
| 359 |
-
print(f"Successfully added submission from {folder_name} to {split} leaderboard")
|
| 360 |
-
else:
|
| 361 |
-
print(f"Invalid split '{split}' found in {folder_name}")
|
| 362 |
-
|
| 363 |
-
except Exception as e:
|
| 364 |
-
print(f"Error processing folder {folder_name}: {str(e)}")
|
| 365 |
-
continue
|
| 366 |
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
df.sort_values(by=mrr_cols[0], ascending=False, inplace=True)
|
| 372 |
-
|
| 373 |
-
# Print summary
|
| 374 |
-
print("\nLeaderboard initialization summary:")
|
| 375 |
-
for split, submissions in submissions_by_split.items():
|
| 376 |
-
print(f"{split}: {len(submissions)} submissions")
|
| 377 |
-
|
| 378 |
return submissions_by_split
|
| 379 |
|
| 380 |
except Exception as e:
|
|
@@ -579,7 +499,38 @@ def format_evaluation_results(results):
|
|
| 579 |
"""
|
| 580 |
result_lines = [f"{metric}: {value}" for metric, value in results.items()]
|
| 581 |
return "\n".join(result_lines)
|
| 582 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 583 |
def process_submission(
|
| 584 |
method_name, team_name, dataset, split, contact_email,
|
| 585 |
code_repo, csv_file, model_description, hardware, paper_link, model_type
|
|
@@ -591,15 +542,10 @@ def process_submission(
|
|
| 591 |
if not all([method_name, team_name, dataset, split, contact_email, code_repo, csv_file, model_type]):
|
| 592 |
return "Error: Please fill in all required fields"
|
| 593 |
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
method_exists = any(method_name in models for models in model_types.values())
|
| 599 |
-
if not method_exists and model_type != 'Others':
|
| 600 |
-
return "Error: New models must be submitted under 'Others' category"
|
| 601 |
-
elif not method_exists and model_type == 'Others':
|
| 602 |
-
model_types['Others'].append(method_name)
|
| 603 |
|
| 604 |
# Create metadata
|
| 605 |
meta_data = {
|
|
@@ -774,6 +720,13 @@ def format_dataframe(df, dataset):
|
|
| 774 |
return filtered_df
|
| 775 |
|
| 776 |
def update_tables(selected_types):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 777 |
filtered_df_full = filter_by_model_type(df_synthesized_full, selected_types)
|
| 778 |
filtered_df_10 = filter_by_model_type(df_synthesized_10, selected_types)
|
| 779 |
filtered_df_human = filter_by_model_type(df_human_generated, selected_types)
|
|
@@ -785,7 +738,6 @@ def update_tables(selected_types):
|
|
| 785 |
|
| 786 |
return outputs
|
| 787 |
|
| 788 |
-
|
| 789 |
css = """
|
| 790 |
table > thead {
|
| 791 |
white-space: normal
|
|
@@ -878,26 +830,27 @@ with gr.Blocks(css=css) as demo:
|
|
| 878 |
label="Contact Email(s)*",
|
| 879 |
placeholder="[email protected]; [email protected]"
|
| 880 |
)
|
| 881 |
-
|
| 882 |
-
with gr.Column():
|
| 883 |
model_type = gr.Dropdown(
|
| 884 |
choices=list(model_types.keys()),
|
| 885 |
label="Model Type*",
|
| 886 |
-
value="Others"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 887 |
)
|
| 888 |
code_repo = gr.Textbox(
|
| 889 |
label="Code Repository*",
|
| 890 |
placeholder="https://github.com/snap-stanford/stark-leaderboard"
|
| 891 |
)
|
|
|
|
|
|
|
| 892 |
csv_file = gr.File(
|
| 893 |
label="Prediction CSV*",
|
| 894 |
file_types=[".csv"],
|
| 895 |
-
type="filepath"
|
| 896 |
-
)
|
| 897 |
-
model_description = gr.Textbox(
|
| 898 |
-
label="Model Description*",
|
| 899 |
-
lines=3,
|
| 900 |
-
placeholder="Briefly describe how your retriever model works..."
|
| 901 |
)
|
| 902 |
hardware = gr.Textbox(
|
| 903 |
label="Hardware Specifications*",
|
|
|
|
| 242 |
|
| 243 |
def scan_submissions_directory():
|
| 244 |
"""
|
| 245 |
+
Scans the submissions directory and updates the model types dictionary
|
| 246 |
+
with submitted models.
|
| 247 |
"""
|
|
|
|
|
|
|
| 248 |
try:
|
| 249 |
# Initialize HuggingFace API
|
| 250 |
api = HfApi()
|
|
|
|
| 262 |
repo_id=REPO_ID,
|
| 263 |
repo_type="space"
|
| 264 |
)
|
|
|
|
| 265 |
repo_files = [f for f in all_files if f.startswith('submissions/')]
|
| 266 |
except Exception as e:
|
| 267 |
print(f"Error listing repository contents: {str(e)}")
|
| 268 |
return submissions_by_split
|
| 269 |
+
|
| 270 |
+
# Process submissions and update model types
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
for filepath in repo_files:
|
| 272 |
+
if filepath.endswith('metadata.json'):
|
| 273 |
+
try:
|
| 274 |
+
submission_data = read_json_from_hub(api, REPO_ID, filepath)
|
| 275 |
+
if submission_data:
|
| 276 |
+
method_name = submission_data.get('Method Name')
|
| 277 |
+
model_type = submission_data.get('Model Type')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
|
| 279 |
+
# If model type is specified in metadata, use it
|
| 280 |
+
if method_name and model_type:
|
| 281 |
+
# Check if method exists in any other category
|
| 282 |
+
existing_type = get_model_type_for_method(method_name)
|
| 283 |
+
|
| 284 |
+
# If method doesn't exist in any category, add it to the specified category
|
| 285 |
+
if existing_type == 'Others' and model_type in model_types:
|
| 286 |
+
if method_name not in model_types[model_type]:
|
| 287 |
+
model_types[model_type].append(method_name)
|
| 288 |
+
|
| 289 |
+
# Add submission to appropriate split
|
| 290 |
+
split = submission_data.get('Split')
|
| 291 |
+
if split in submissions_by_split:
|
| 292 |
+
submissions_by_split[split].append(submission_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
|
| 294 |
+
except Exception as e:
|
| 295 |
+
print(f"Error processing metadata file {filepath}: {str(e)}")
|
| 296 |
+
continue
|
| 297 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
return submissions_by_split
|
| 299 |
|
| 300 |
except Exception as e:
|
|
|
|
| 499 |
"""
|
| 500 |
result_lines = [f"{metric}: {value}" for metric, value in results.items()]
|
| 501 |
return "\n".join(result_lines)
|
| 502 |
+
|
| 503 |
+
def get_model_type_for_method(method_name):
|
| 504 |
+
"""
|
| 505 |
+
Find the model type category for a given method name.
|
| 506 |
+
Returns 'Others' if not found in predefined categories.
|
| 507 |
+
"""
|
| 508 |
+
for type_name, methods in model_types.items():
|
| 509 |
+
if method_name in methods:
|
| 510 |
+
return type_name
|
| 511 |
+
return 'Others'
|
| 512 |
+
|
| 513 |
+
def validate_model_type(method_name, selected_type):
|
| 514 |
+
"""
|
| 515 |
+
Validate if the selected model type is appropriate for the method name.
|
| 516 |
+
Returns (is_valid, message).
|
| 517 |
+
"""
|
| 518 |
+
# Check if method exists in any category
|
| 519 |
+
existing_type = None
|
| 520 |
+
for type_name, methods in model_types.items():
|
| 521 |
+
if method_name in methods:
|
| 522 |
+
existing_type = type_name
|
| 523 |
+
break
|
| 524 |
+
|
| 525 |
+
# If method exists, it must be submitted under its predefined category
|
| 526 |
+
if existing_type:
|
| 527 |
+
if existing_type != selected_type:
|
| 528 |
+
return False, f"This method name is already registered under '{existing_type}'. Please use the correct category."
|
| 529 |
+
return True, "Valid model type"
|
| 530 |
+
|
| 531 |
+
# For new methods, any category is valid
|
| 532 |
+
return True, "Valid model type"
|
| 533 |
+
|
| 534 |
def process_submission(
|
| 535 |
method_name, team_name, dataset, split, contact_email,
|
| 536 |
code_repo, csv_file, model_description, hardware, paper_link, model_type
|
|
|
|
| 542 |
if not all([method_name, team_name, dataset, split, contact_email, code_repo, csv_file, model_type]):
|
| 543 |
return "Error: Please fill in all required fields"
|
| 544 |
|
| 545 |
+
# Validate model type
|
| 546 |
+
is_valid, message = validate_model_type(method_name, model_type)
|
| 547 |
+
if not is_valid:
|
| 548 |
+
return f"Error: {message}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 549 |
|
| 550 |
# Create metadata
|
| 551 |
meta_data = {
|
|
|
|
| 720 |
return filtered_df
|
| 721 |
|
| 722 |
def update_tables(selected_types):
|
| 723 |
+
"""
|
| 724 |
+
Update tables based on selected model types.
|
| 725 |
+
Include all models from selected categories.
|
| 726 |
+
"""
|
| 727 |
+
if not selected_types:
|
| 728 |
+
return [df.head(0) for df in [df_synthesized_full, df_synthesized_10, df_human_generated]]
|
| 729 |
+
|
| 730 |
filtered_df_full = filter_by_model_type(df_synthesized_full, selected_types)
|
| 731 |
filtered_df_10 = filter_by_model_type(df_synthesized_10, selected_types)
|
| 732 |
filtered_df_human = filter_by_model_type(df_human_generated, selected_types)
|
|
|
|
| 738 |
|
| 739 |
return outputs
|
| 740 |
|
|
|
|
| 741 |
css = """
|
| 742 |
table > thead {
|
| 743 |
white-space: normal
|
|
|
|
| 830 |
label="Contact Email(s)*",
|
| 831 |
placeholder="[email protected]; [email protected]"
|
| 832 |
)
|
|
|
|
|
|
|
| 833 |
model_type = gr.Dropdown(
|
| 834 |
choices=list(model_types.keys()),
|
| 835 |
label="Model Type*",
|
| 836 |
+
value="Others",
|
| 837 |
+
info="Select the appropriate category for your model"
|
| 838 |
+
)
|
| 839 |
+
model_description = gr.Textbox(
|
| 840 |
+
label="Model Description*",
|
| 841 |
+
lines=3,
|
| 842 |
+
placeholder="Briefly describe how your retriever model works..."
|
| 843 |
)
|
| 844 |
code_repo = gr.Textbox(
|
| 845 |
label="Code Repository*",
|
| 846 |
placeholder="https://github.com/snap-stanford/stark-leaderboard"
|
| 847 |
)
|
| 848 |
+
|
| 849 |
+
with gr.Column():
|
| 850 |
csv_file = gr.File(
|
| 851 |
label="Prediction CSV*",
|
| 852 |
file_types=[".csv"],
|
| 853 |
+
type="filepath"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 854 |
)
|
| 855 |
hardware = gr.Textbox(
|
| 856 |
label="Hardware Specifications*",
|