Spaces:

openadmet
/

OpenADMET-ExpansionRx-Challenge

Running

App Files Files Community

Maria Castellanos commited on Aug 28

Commit

6fc2c2d

1 Parent(s): 960994d

add latest submission

Browse files

Files changed (2) hide show

app.py +37 -29
evaluate.py +5 -2

app.py CHANGED Viewed

@@ -7,18 +7,30 @@ from evaluate import submit_data, evaluate_data
 from datasets import load_dataset
 from datetime import datetime
-from about import ENDPOINTS
 def get_leaderboard(endpoint):
     dset = load_dataset(results_repo, split='train', download_mode="force_redownload")
     full_df = dset.to_pandas()
     to_show = full_df.copy(deep=True)
     to_show = to_show[to_show['endpoint'] == endpoint]
     # The columns to display publicly
-    to_show = to_show[["user", "MAE", "R2", "Spearman R", "Kendall's Tau"]]
-    return to_show
 def gradio_interface():
     with gr.Blocks(title="OpenADMET ADMET Challenge") as demo:
@@ -91,38 +103,23 @@ def gradio_interface():
         # --- Gradio Interface ---
         with gr.Tabs(elem_classes="tab-buttons"):
             with gr.TabItem("📝About"):
                 gr.Markdown(welcome_md)
             with gr.TabItem("🚀Leaderboard"):
                 gr.Markdown("View the leaderboard for each ADMET endpoint by selecting the appropiate tab.")
-                df1 = pd.DataFrame({
-                    "user": ["User1", "User2", "User3"],
-                    "MAE": [0.1, 0.2, 0.15],
-                    "R2": [0.94, 0.92, 0.89],
-                    "Spearman R": [0.93, 0.91, 0.88],
-                    "Kendall's Tau": [0.90, 0.89, 0.85],
-                })
-                df2 = pd.DataFrame({
-                    "user": ["User1", "User2", "User3"],
-                    "MAE": [0.2, 0.3, 0.15],
-                    "R2": [0.2, 0.72, 0.89],
-                    "Spearman R": [0.91, 0.71, 0.68],
-                    "Kendall's Tau": [0.90, 0.4, 0.7],
-                })
-                # Make separate leaderboards in separate tabs
-                mock_data = [df1, df1, df2, df1, df2, df1, df1, df2, df1, df2]
-                for i, endpoint in enumerate(ENDPOINTS):
-                    df = mock_data[i]
                     with gr.TabItem(endpoint):
-                        Leaderboard(
                             value=get_leaderboard(endpoint),
-                            datatype=['str', 'number', 'number', 'number', 'number'],
-                            select_columns=["user", "MAE", "R2", "Spearman R", "Kendall's Tau"],
                             search_columns=["user"],
-                            every=60,
                         )
             with gr.TabItem("Submit Predictions"):
                 gr.Markdown(
@@ -141,10 +138,14 @@ def gradio_interface():
                         gr.Markdown(
                             """
                             ## Participant Information
-                            To participate, you must enter a Hugging Face username, or alias, which will be displayed on the leaderboard.
-                            Other information is optional but helps us track participation.
                             If you wish to be included in Challenge discussions, please provide your Discord username and email.
                             If you wish to be included in a future publication with the Challenge results, please provide your name and affiliation.
                             """
                             )
                     #    endpoint_type = gr.CheckboxGroup(
@@ -177,13 +178,19 @@ def gradio_interface():
                             label="Affiliation",
                             placeholder="Enter your school/company affiliation (optional)",
                         )
                 with gr.Row():
                     with gr.Column():
                         gr.Markdown(
                             """
                             ## Submission Instructions
-                            Upload a single CSV file containing your predictions for all ligands in the test set.
                             You can download the ligand test set here (lik/to/download/smiles/csv).
                             """
                         )
@@ -201,9 +208,10 @@ def gradio_interface():
                 submit_btn = gr.Button("Submit Predictions")
                 message = gr.Textbox(label="Status", lines=1, visible=False)
                 submit_btn.click(
                     submit_data,
-                    inputs=[predictions_file, user_state, participant_name, discord_username, email, affiliation],
                     outputs=[message, filename],
                 ).success(
                     fn=lambda m: gr.update(value=m, visible=True),

 from datasets import load_dataset
 from datetime import datetime
+from about import ENDPOINTS, API
+def make_user_clickable(name):
+    link =f'https://huggingface.co/{name}'
+    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{name}</a>'
+def make_tag_clickable(tag):
+    return f'<a target="_blank" href="{tag}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">Report</a>'
 def get_leaderboard(endpoint):
     dset = load_dataset(results_repo, split='train', download_mode="force_redownload")
     full_df = dset.to_pandas()
     to_show = full_df.copy(deep=True)
     to_show = to_show[to_show['endpoint'] == endpoint]
+    to_show['user'] = to_show['user'].apply(lambda x: make_user_clickable(x)).astype(str)
+    # Get the most recent submission
+    to_show["submission_time"] = pd.to_datetime(to_show["submission_time"])
+    latest_per_user = to_show.loc[to_show.groupby("user")["submission_time"].idxmax()].reset_index(drop=True)
+    latest_per_user.rename(columns={"submission_time": "submission time"}, inplace=True)
     # The columns to display publicly
+    df = latest_per_user[["user", "MAE", "R2", "Spearman R", "Kendall's Tau", "submission time"]]
+    # TODO: Also display the column with report link and make it clickable with make_tag_clickable
+    return df
 def gradio_interface():
     with gr.Blocks(title="OpenADMET ADMET Challenge") as demo:
         # --- Gradio Interface ---
         with gr.Tabs(elem_classes="tab-buttons"):
+            lboard_dict = {}
             with gr.TabItem("📝About"):
                 gr.Markdown(welcome_md)
             with gr.TabItem("🚀Leaderboard"):
                 gr.Markdown("View the leaderboard for each ADMET endpoint by selecting the appropiate tab.")
+                # Make separate leaderboards in separate tabs
+                for endpoint in ENDPOINTS:
                     with gr.TabItem(endpoint):
+                        lboard_dict[endpoint] = Leaderboard(
                             value=get_leaderboard(endpoint),
+                            datatype=['markdown', 'number', 'number', 'number', 'number'],
+                            select_columns=["user", "MAE", "R2", "Spearman R", "Kendall's Tau", "submission time"],
                             search_columns=["user"],
                         )
+                # TODO: Make aggregated leaderboard and display on first tab
             with gr.TabItem("Submit Predictions"):
                 gr.Markdown(
                         gr.Markdown(
                             """
                             ## Participant Information
+                            To participate, we **only** require a Hugging Face username, which will be displayed on the leaderboard.
+                            Other information is optional but helps us track participation.
                             If you wish to be included in Challenge discussions, please provide your Discord username and email.
                             If you wish to be included in a future publication with the Challenge results, please provide your name and affiliation.
+                            We also ask you to provide a link to a report decribing your method. While not mandatory at the time of participation,
+                            you need to submit the link before the challenge deadline in order to be considered for the final leaderboard.
                             """
                             )
                     #    endpoint_type = gr.CheckboxGroup(
                             label="Affiliation",
                             placeholder="Enter your school/company affiliation (optional)",
                         )
+                        model_tag = gr.Textbox(
+                            label="Model Report",
+                            placeholder="Link to a report describing your method (optional)",
+                        )
                 with gr.Row():
                     with gr.Column():
                         gr.Markdown(
                             """
                             ## Submission Instructions
+                            Upload a single CSV file containing your predictions for all ligands in the test set.
+                            Only your latest submission will be considered.
                             You can download the ligand test set here (lik/to/download/smiles/csv).
                             """
                         )
                 submit_btn = gr.Button("Submit Predictions")
                 message = gr.Textbox(label="Status", lines=1, visible=False)
+                # TODO: Refresh leaderboard every time a submission is received
                 submit_btn.click(
                     submit_data,
+                    inputs=[predictions_file, user_state, participant_name, discord_username, email, affiliation, model_tag],
                     outputs=[message, filename],
                 ).success(
                     fn=lambda m: gr.update(value=m, visible=True),

evaluate.py CHANGED Viewed

@@ -18,7 +18,7 @@ class ParticipantRecord(pydantic.BaseModel):
     discord_username: Optional[str] = pydantic.Field(default=None, description="Discord username")
     email: Optional[str] = pydantic.Field(default=None, description="Email address")
     affiliation: Optional[str] = pydantic.Field(default=None, description="Affiliation")
-    #model_tag: Optional[str] = pydantic.Field(default=None, description="Model tag")
 class SubmissionMetadata(pydantic.BaseModel):
@@ -40,7 +40,8 @@ def submit_data(predictions_file: str,
                 participant_name: str = "",
                 discord_username: str = "",
                 email: str = "",
-                affiliation: str = ""
 ):
     if user_state is None:
@@ -88,6 +89,7 @@ def submit_data(predictions_file: str,
             discord_username=discord_username,
             email=email,
             affiliation=affiliation,
         )
     except pydantic.ValidationError as e:
         return f"❌ Error in participant information: {str(e)}"
@@ -165,6 +167,7 @@ def evaluate_data(filename: str) -> None:
     # Write results to results dataset
     results_df['user'] = username
     safe_user = _unsafify_username(username)
     destination_path = f"results/{safe_user}_{timestamp}_results.csv"
     tmp_name = None

     discord_username: Optional[str] = pydantic.Field(default=None, description="Discord username")
     email: Optional[str] = pydantic.Field(default=None, description="Email address")
     affiliation: Optional[str] = pydantic.Field(default=None, description="Affiliation")
+    model_tag: Optional[str] = pydantic.Field(default=None, description="Model tag")
 class SubmissionMetadata(pydantic.BaseModel):
                 participant_name: str = "",
                 discord_username: str = "",
                 email: str = "",
+                affiliation: str = "",
+                model_tag: str = "",
 ):
     if user_state is None:
             discord_username=discord_username,
             email=email,
             affiliation=affiliation,
+            model_tag=model_tag,
         )
     except pydantic.ValidationError as e:
         return f"❌ Error in participant information: {str(e)}"
     # Write results to results dataset
     results_df['user'] = username
+    results_df['submission_time'] = timestamp
     safe_user = _unsafify_username(username)
     destination_path = f"results/{safe_user}_{timestamp}_results.csv"
     tmp_name = None