stark-leaderboard

Running

App Files Files Community

Shiyu Zhao commited on Oct 22, 2024

Commit

b65f2ef

1 Parent(s): 6542cc6

Update space

Browse files

Files changed (1) hide show

app.py +142 -95

app.py CHANGED Viewed

@@ -11,10 +11,57 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
 import smtplib
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from stark_qa import load_qa
 from stark_qa.evaluator import Evaluator
 def process_single_instance(args):
     idx, eval_csv, qa_dataset, evaluator, eval_metrics = args
@@ -465,7 +512,7 @@ def send_submission_confirmation(meta_data, eval_results):
     except Exception as e:
         print(f"Failed to send submission confirmation: {e}")
 def process_submission(
     method_name, team_name, dataset, split, contact_email,
     code_repo, csv_file, model_description, hardware, paper_link
@@ -499,112 +546,112 @@ def process_submission(
             "(Optional) Paper link": paper_link
         }
-        # Save CSV file
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        model_name_clean = sanitize_name(method_name)
-        team_name_clean = sanitize_name(team_name)
-        # Create directory structure in the HuggingFace space
-        base_dir = "submissions"  # This will be in the HF space root
-        submission_dir = os.path.join(base_dir, f"{model_name_clean}_{team_name_clean}")
-        os.makedirs(submission_dir, exist_ok=True)
         # Save CSV file
         csv_filename = f"predictions_{timestamp}.csv"
-        csv_path = os.path.join(submission_dir, csv_filename)
-        if hasattr(csv_file, 'name'):
-            with open(csv_file.name, 'rb') as source, open(csv_path, 'wb') as target:
-                target.write(source.read())
-        # Validate CSV file
-        csv_valid, csv_message = validate_csv(csv_file)
-        if not csv_valid:
-            error_message = f"Error with CSV file: {csv_message}"
-            send_error_notification(meta_data, error_message)
-            return error_message
-        # Process CSV file through evaluation pipeline
         try:
-            results = compute_metrics(
-                csv_file.name,
-                dataset=dataset.lower(),
-                split=split,
-                num_workers=4
             )
-            if isinstance(results, str) and results.startswith("Error"):
-                send_error_notification(meta_data, results)
-                return f"Evaluation error: {results}"
-            # Multiply results by 100 and round to 2 decimal places
-            processed_results = {
-                "hit@1": round(results['hit@1'] * 100, 2),
-                "hit@5": round(results['hit@5'] * 100, 2),
-                "recall@20": round(results['recall@20'] * 100, 2),
-                "mrr": round(results['mrr'] * 100, 2)
-            }
-            # Prepare submission data
-            submission_data = {
-                "method_name": method_name,
-                "team_name": team_name,
-                "dataset": dataset,
-                "split": split,
-                "contact_email": contact_email,
-                "code_repo": code_repo,
-                "model_description": model_description,
-                "hardware": hardware,
-                "paper_link": paper_link,
-                "results": processed_results,
-                "status": "pending_review",
-                "submission_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
-                "csv_path": csv_path
-            }
-            # Save metadata
-            metadata_path = os.path.join(submission_dir, f"metadata_{timestamp}.json")
-            with open(metadata_path, 'w') as f:
-                json.dump(submission_data, f, indent=4)
-            # Save latest.json
-            latest_path = os.path.join(submission_dir, "latest.json")
-            with open(latest_path, 'w') as f:
-                json.dump({
-                    "latest_submission": timestamp,
-                    "status": "pending_review",
-                    "method_name": method_name
-                }, f, indent=4)
-            # Send email confirmation
-            send_submission_confirmation(meta_data, processed_results)
-            # Update leaderboard data
-            update_leaderboard_data(submission_data)
-            return f"""
-            Submission successful!
-            Evaluation Results:
-            Hit@1: {processed_results['hit@1']:.2f}%
-            Hit@5: {processed_results['hit@5']:.2f}%
-            Recall@20: {processed_results['recall@20']:.2f}%
-            MRR: {processed_results['mrr']:.2f}%
-            Your submission has been saved and is pending review.
-            A confirmation email has been sent to {contact_email}.
-            Once approved, your results will appear in the leaderboard under the method name: {method_name}
-            """
-        except Exception as e:
-            error_message = f"Error processing submission: {str(e)}"
-            send_error_notification(meta_data, error_message)
-            return error_message
     except Exception as e:
         error_message = f"Error processing submission: {str(e)}"
-        send_error_notification(meta_data, error_message)
         return error_message
 def filter_by_model_type(df, selected_types):
     if not selected_types:
         return df.head(0)

 import smtplib
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
+from huggingface_hub import HfApi
+from tempfile import NamedTemporaryFile
+import shutil
 from stark_qa import load_qa
 from stark_qa.evaluator import Evaluator
+from utils.hub_storage import HubStorage
+from utils.token_handler import TokenHandler
+# Initialize storage once at startup
+try:
+    REPO_ID = "your-username/your-space-name"  # Replace with your space name
+    hub_storage = HubStorage(REPO_ID)
+except Exception as e:
+    raise RuntimeError(f"Failed to initialize HuggingFace Hub storage: {e}")
+def process_submission(
+    method_name, team_name, dataset, split, contact_email,
+    code_repo, csv_file, model_description, hardware, paper_link
+):
+    """Process and validate submission"""
+    try:
+        # Your existing validation code here...
+        # Save CSV file using hub_storage
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        folder_name = f"{sanitize_name(method_name)}_{sanitize_name(team_name)}"
+        csv_filename = f"predictions_{timestamp}.csv"
+        csv_path_in_repo = f"submissions/{folder_name}/{csv_filename}"
+        with NamedTemporaryFile(mode='wb', delete=False) as tmp_file:
+            shutil.copyfileobj(csv_file.file, tmp_file)
+        try:
+            hub_storage.save_to_hub(
+                file_content=tmp_file.name,
+                path_in_repo=csv_path_in_repo,
+                commit_message=f"Add submission: {method_name} by {team_name}"
+            )
+        finally:
+            os.unlink(tmp_file.name)
+        # Rest of your submission processing code...
+    except Exception as e:
+        error_message = f"Error processing submission: {str(e)}"
+        if 'meta_data' in locals():
+            send_error_notification(meta_data, error_message)
+        return error_message
 def process_single_instance(args):
     idx, eval_csv, qa_dataset, evaluator, eval_metrics = args
     except Exception as e:
         print(f"Failed to send submission confirmation: {e}")
 def process_submission(
     method_name, team_name, dataset, split, contact_email,
     code_repo, csv_file, model_description, hardware, paper_link
             "(Optional) Paper link": paper_link
         }
+        # Save and process files
+        api = HfApi()
+        REPO_ID = "snap-stanford/stark-leaderboard"  # Replace with your space name
+        HF_TOKEN = os.getenv("HF_TOKEN")
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        folder_name = f"{sanitize_name(method_name)}_{sanitize_name(team_name)}"
         # Save CSV file
         csv_filename = f"predictions_{timestamp}.csv"
+        csv_path_in_repo = f"submissions/{folder_name}/{csv_filename}"
+        with NamedTemporaryFile(mode='wb', delete=False) as tmp_file:
+            shutil.copyfileobj(csv_file.file, tmp_file)
         try:
+            # api.upload_file(
+            #     path_or_fileobj=tmp_file.name,
+            #     path_in_repo=csv_path_in_repo,
+            #     repo_id=REPO_ID,
+            #     repo_type="space",
+            #     token=HF_TOKEN
+            # )
+            hub_storage.save_to_hub(
+                file_content=tmp_file.name,
+                path_in_repo=csv_path_in_repo,
+                commit_message=f"Add submission: {method_name} by {team_name}"
             )
+        finally:
+            os.unlink(tmp_file.name)
+        # Process evaluation
+        results = compute_metrics(
+            csv_file.name,
+            dataset=dataset.lower(),
+            split=split,
+            num_workers=4
+        )
+        if isinstance(results, str):
+            send_error_notification(meta_data, results)
+            return f"Evaluation error: {results}"
+        # Process results (multiply by 100)
+        processed_results = {
+            "hit@1": round(results['hit@1'] * 100, 2),
+            "hit@5": round(results['hit@5'] * 100, 2),
+            "recall@20": round(results['recall@20'] * 100, 2),
+            "mrr": round(results['mrr'] * 100, 2)
+        }
+        # Save metadata
+        submission_data = {
+            **meta_data,
+            "results": processed_results,
+            "status": "pending_review",
+            "submission_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+            "csv_path": csv_path_in_repo
+        }
+        metadata_path = f"submissions/{folder_name}/metadata_{timestamp}.json"
+        with NamedTemporaryFile(mode='w', delete=False) as tmp_file:
+            json.dump(submission_data, tmp_file, indent=4)
+        try:
+            # api.upload_file(
+            #     path_or_fileobj=tmp_file.name,
+            #     path_in_repo=metadata_path,
+            #     repo_id=REPO_ID,
+            #     repo_type="space",
+            #     token=HF_TOKEN
+            # )
+            hub_storage.save_to_hub(
+                file_content=tmp_file.name,
+                path_in_repo=metadata_path,
+                commit_message=f"Add metadata: {method_name} by {team_name}"
+            )
+        finally:
+            os.unlink(tmp_file.name)
+        # Send confirmation email and update leaderboard
+        send_submission_confirmation(meta_data, processed_results)
+        update_leaderboard_data(submission_data)
+        return f"""
+        Submission successful!
+        Evaluation Results:
+        Hit@1: {processed_results['hit@1']:.2f}%
+        Hit@5: {processed_results['hit@5']:.2f}%
+        Recall@20: {processed_results['recall@20']:.2f}%
+        MRR: {processed_results['mrr']:.2f}%
+        Your submission has been saved and a confirmation email has been sent to {contact_email}.
+        Once approved, your results will appear in the leaderboard under: {method_name}
+        You can find your submission at:
+        https://huggingface.co/spaces/{REPO_ID}/tree/main/submissions/{folder_name}
+        """
     except Exception as e:
         error_message = f"Error processing submission: {str(e)}"
+        if 'meta_data' in locals():
+            send_error_notification(meta_data, error_message)
         return error_message
 def filter_by_model_type(df, selected_types):
     if not selected_types:
         return df.head(0)