stark-leaderboard

Running

App Files Files Community

Shiyu Zhao commited on Oct 24, 2024

Commit

5bf37f4

1 Parent(s): 5f78509

Update space

Browse files

Files changed (1) hide show

app.py +81 -128

app.py CHANGED Viewed

@@ -242,11 +242,9 @@ def read_json_from_hub(api: HfApi, repo_id: str, file_path: str) -> dict:
 def scan_submissions_directory():
     """
-    Scans the submissions directory and updates the leaderboard tables with all submitted results.
-    Returns a dictionary mapping split names to lists of submissions.
     """
-    global df_synthesized_full, df_synthesized_10, df_human_generated
     try:
         # Initialize HuggingFace API
         api = HfApi()
@@ -264,117 +262,39 @@ def scan_submissions_directory():
                 repo_id=REPO_ID,
                 repo_type="space"
             )
-            # Filter for files in submissions directory
             repo_files = [f for f in all_files if f.startswith('submissions/')]
         except Exception as e:
             print(f"Error listing repository contents: {str(e)}")
             return submissions_by_split
-        if not repo_files:
-            print("No submissions directory found or empty")
-            return submissions_by_split
-        # Group files by team folders
-        folder_files = {}
         for filepath in repo_files:
-            parts = filepath.split('/')
-            if len(parts) < 3:  # Need at least submissions/team_folder/file
-                continue
-            folder_name = parts[1]  # team_folder name
-            if folder_name not in folder_files:
-                folder_files[folder_name] = []
-            folder_files[folder_name].append(filepath)
-        # Process each team folder
-        for folder_name, files in folder_files.items():
-            try:
-                # Find latest.json in this folder
-                latest_file = next((f for f in files if f.endswith('latest.json')), None)
-                if not latest_file:
-                    print(f"No latest.json found in {folder_name}")
-                    continue
-                # Read latest.json
-                latest_info = read_json_from_hub(api, REPO_ID, latest_file)
-                if not latest_info:
-                    print(f"Failed to read latest.json for {folder_name}")
-                    continue
-                # Check submission status
-                if latest_info.get('status') != 'approved':
-                    print(f"Skipping unapproved submission in {folder_name}")
-                    continue
-                timestamp = latest_info.get('latest_submission')
-                if not timestamp:
-                    print(f"No timestamp found in latest.json for {folder_name}")
-                    continue
-                # Find metadata file
-                metadata_file = next(
-                    (f for f in files if f.endswith(f'metadata_{timestamp}.json')),
-                    None
-                )
-                if not metadata_file:
-                    print(f"No matching metadata file found for {folder_name} timestamp {timestamp}")
-                    continue
-                # Read metadata file
-                submission_data = read_json_from_hub(api, REPO_ID, metadata_file)
-                if not submission_data:
-                    print(f"Failed to read metadata for {folder_name}")
-                    continue
-                # Map the split name if necessary
-                split = submission_data.get('Split')
-                if split in submissions_by_split:
-                    submissions_by_split[split].append(submission_data)
-                    # Update the appropriate DataFrame based on the split
-                    if split == 'test':
-                        df_to_update = df_synthesized_full
-                    elif split == 'test-0.1':
-                        df_to_update = df_synthesized_10
-                    else:  # human_generated_eval
-                        df_to_update = df_human_generated
-                    # Add row to DataFrame
-                    new_row = {
-                        'Method': submission_data['Method Name'],
-                        f'STARK-{submission_data["Dataset"].upper()}_Hit@1': submission_data['results']['hit@1'],
-                        f'STARK-{submission_data["Dataset"].upper()}_Hit@5': submission_data['results']['hit@5'],
-                        f'STARK-{submission_data["Dataset"].upper()}_R@20': submission_data['results']['recall@20'],
-                        f'STARK-{submission_data["Dataset"].upper()}_MRR': submission_data['results']['mrr']
-                    }
-                    # Update existing row or add new one
-                    method_mask = df_to_update['Method'] == submission_data['Method Name']
-                    if method_mask.any():
-                        for col in new_row:
-                            df_to_update.loc[method_mask, col] = new_row[col]
-                    else:
-                        df_to_update.loc[len(df_to_update)] = new_row
-                    print(f"Successfully added submission from {folder_name} to {split} leaderboard")
-                else:
-                    print(f"Invalid split '{split}' found in {folder_name}")
-            except Exception as e:
-                print(f"Error processing folder {folder_name}: {str(e)}")
-                continue
-        # Sort each DataFrame by MRR score
-        for df in [df_synthesized_full, df_synthesized_10, df_human_generated]:
-            mrr_cols = [col for col in df.columns if col.endswith('_MRR')]
-            if mrr_cols:
-                df.sort_values(by=mrr_cols[0], ascending=False, inplace=True)
-        # Print summary
-        print("\nLeaderboard initialization summary:")
-        for split, submissions in submissions_by_split.items():
-            print(f"{split}: {len(submissions)} submissions")
         return submissions_by_split
     except Exception as e:
@@ -579,7 +499,38 @@ def format_evaluation_results(results):
     """
     result_lines = [f"{metric}: {value}" for metric, value in results.items()]
     return "\n".join(result_lines)
 def process_submission(
     method_name, team_name, dataset, split, contact_email,
     code_repo, csv_file, model_description, hardware, paper_link, model_type
@@ -591,15 +542,10 @@ def process_submission(
         if not all([method_name, team_name, dataset, split, contact_email, code_repo, csv_file, model_type]):
             return "Error: Please fill in all required fields"
-        if model_type not in model_types:
-            return "Error: Invalid model type selected"
-        # Add model to 'Others' category if it's a new model
-        method_exists = any(method_name in models for models in model_types.values())
-        if not method_exists and model_type != 'Others':
-            return "Error: New models must be submitted under 'Others' category"
-        elif not method_exists and model_type == 'Others':
-            model_types['Others'].append(method_name)
         # Create metadata
         meta_data = {
@@ -774,6 +720,13 @@ def format_dataframe(df, dataset):
     return filtered_df
 def update_tables(selected_types):
     filtered_df_full = filter_by_model_type(df_synthesized_full, selected_types)
     filtered_df_10 = filter_by_model_type(df_synthesized_10, selected_types)
     filtered_df_human = filter_by_model_type(df_human_generated, selected_types)
@@ -785,7 +738,6 @@ def update_tables(selected_types):
     return outputs
 css = """
 table > thead {
     white-space: normal
@@ -878,26 +830,27 @@ with gr.Blocks(css=css) as demo:
                 label="Contact Email(s)*",
                 placeholder="[email protected]; [email protected]"
             )
-        with gr.Column():
             model_type = gr.Dropdown(
                 choices=list(model_types.keys()),
                 label="Model Type*",
-                value="Others"
             )
             code_repo = gr.Textbox(
                 label="Code Repository*",
                 placeholder="https://github.com/snap-stanford/stark-leaderboard"
             )
             csv_file = gr.File(
                 label="Prediction CSV*",
                 file_types=[".csv"],
-                type="filepath"  # Important: specify type as filepath
-            )
-            model_description = gr.Textbox(
-                label="Model Description*",
-                lines=3,
-                placeholder="Briefly describe how your retriever model works..."
             )
             hardware = gr.Textbox(
                 label="Hardware Specifications*",

 def scan_submissions_directory():
     """
+    Scans the submissions directory and updates the model types dictionary
+    with submitted models.
     """
     try:
         # Initialize HuggingFace API
         api = HfApi()
                 repo_id=REPO_ID,
                 repo_type="space"
             )
             repo_files = [f for f in all_files if f.startswith('submissions/')]
         except Exception as e:
             print(f"Error listing repository contents: {str(e)}")
             return submissions_by_split
+        # Process submissions and update model types
         for filepath in repo_files:
+            if filepath.endswith('metadata.json'):
+                try:
+                    submission_data = read_json_from_hub(api, REPO_ID, filepath)
+                    if submission_data:
+                        method_name = submission_data.get('Method Name')
+                        model_type = submission_data.get('Model Type')
+                        # If model type is specified in metadata, use it
+                        if method_name and model_type:
+                            # Check if method exists in any other category
+                            existing_type = get_model_type_for_method(method_name)
+                            # If method doesn't exist in any category, add it to the specified category
+                            if existing_type == 'Others' and model_type in model_types:
+                                if method_name not in model_types[model_type]:
+                                    model_types[model_type].append(method_name)
+                            # Add submission to appropriate split
+                            split = submission_data.get('Split')
+                            if split in submissions_by_split:
+                                submissions_by_split[split].append(submission_data)
+                except Exception as e:
+                    print(f"Error processing metadata file {filepath}: {str(e)}")
+                    continue
         return submissions_by_split
     except Exception as e:
     """
     result_lines = [f"{metric}: {value}" for metric, value in results.items()]
     return "\n".join(result_lines)
+def get_model_type_for_method(method_name):
+    """
+    Find the model type category for a given method name.
+    Returns 'Others' if not found in predefined categories.
+    """
+    for type_name, methods in model_types.items():
+        if method_name in methods:
+            return type_name
+    return 'Others'
+def validate_model_type(method_name, selected_type):
+    """
+    Validate if the selected model type is appropriate for the method name.
+    Returns (is_valid, message).
+    """
+    # Check if method exists in any category
+    existing_type = None
+    for type_name, methods in model_types.items():
+        if method_name in methods:
+            existing_type = type_name
+            break
+    # If method exists, it must be submitted under its predefined category
+    if existing_type:
+        if existing_type != selected_type:
+            return False, f"This method name is already registered under '{existing_type}'. Please use the correct category."
+        return True, "Valid model type"
+    # For new methods, any category is valid
+    return True, "Valid model type"
 def process_submission(
     method_name, team_name, dataset, split, contact_email,
     code_repo, csv_file, model_description, hardware, paper_link, model_type
         if not all([method_name, team_name, dataset, split, contact_email, code_repo, csv_file, model_type]):
             return "Error: Please fill in all required fields"
+        # Validate model type
+        is_valid, message = validate_model_type(method_name, model_type)
+        if not is_valid:
+            return f"Error: {message}"
         # Create metadata
         meta_data = {
     return filtered_df
 def update_tables(selected_types):
+    """
+    Update tables based on selected model types.
+    Include all models from selected categories.
+    """
+    if not selected_types:
+        return [df.head(0) for df in [df_synthesized_full, df_synthesized_10, df_human_generated]]
     filtered_df_full = filter_by_model_type(df_synthesized_full, selected_types)
     filtered_df_10 = filter_by_model_type(df_synthesized_10, selected_types)
     filtered_df_human = filter_by_model_type(df_human_generated, selected_types)
     return outputs
 css = """
 table > thead {
     white-space: normal
                 label="Contact Email(s)*",
                 placeholder="[email protected]; [email protected]"
             )
             model_type = gr.Dropdown(
                 choices=list(model_types.keys()),
                 label="Model Type*",
+                value="Others",
+                info="Select the appropriate category for your model"
+            )
+            model_description = gr.Textbox(
+                label="Model Description*",
+                lines=3,
+                placeholder="Briefly describe how your retriever model works..."
             )
             code_repo = gr.Textbox(
                 label="Code Repository*",
                 placeholder="https://github.com/snap-stanford/stark-leaderboard"
             )
+        with gr.Column():
             csv_file = gr.File(
                 label="Prediction CSV*",
                 file_types=[".csv"],
+                type="filepath"
             )
             hardware = gr.Textbox(
                 label="Hardware Specifications*",