Spaces:

openadmet
/

OpenADMET-ExpansionRx-Challenge

Running

App Files Files Community

improving HF space

by mariacm12 - opened Aug 25

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+67

-41

Files changed (4) hide show

about.py +1 -0
app.py +3 -4
evaluate.py +60 -36
requirements.txt +3 -1

about.py CHANGED Viewed

@@ -11,6 +11,7 @@ ENDPOINTS = ["LogD",
              "MBPB",
              "RLM CLint",
              "MGMB"]
 TOKEN = os.environ.get("HF_TOKEN")
 CACHE_PATH=os.getenv("HF_HOME", ".")
 API = HfApi(token=TOKEN)

              "MBPB",
              "RLM CLint",
              "MGMB"]
 TOKEN = os.environ.get("HF_TOKEN")
 CACHE_PATH=os.getenv("HF_HOME", ".")
 API = HfApi(token=TOKEN)

app.py CHANGED Viewed

@@ -200,21 +200,20 @@ def gradio_interface():
                 submit_btn = gr.Button("Submit Predictions")
                 message = gr.Textbox(label="Status", lines=1, visible=False)
-                '''
                 submit_btn.click(
                     submit_data,
                     inputs=[predictions_file, user_state, participant_name, discord_username, email, affiliation],
                     outputs=[message],
-                ).then(
                     fn=lambda m: gr.update(value=m, visible=True),
                     inputs=[message],
                     outputs=[message],
-                ).then(
                     fn=evaluate_data,
                     inputs=[filename],
                     outputs=[eval_state]
                 )
-                '''
     return demo
 if __name__ == "__main__":

                 submit_btn = gr.Button("Submit Predictions")
                 message = gr.Textbox(label="Status", lines=1, visible=False)
                 submit_btn.click(
                     submit_data,
                     inputs=[predictions_file, user_state, participant_name, discord_username, email, affiliation],
                     outputs=[message],
+                ).success(
                     fn=lambda m: gr.update(value=m, visible=True),
                     inputs=[message],
                     outputs=[message],
+                ).success(
                     fn=evaluate_data,
                     inputs=[filename],
                     outputs=[eval_state]
                 )
     return demo
 if __name__ == "__main__":

evaluate.py CHANGED Viewed

@@ -3,28 +3,40 @@ import pandas as pd
 from pathlib import Path
 from scipy.stats import spearmanr, kendalltau
 from sklearn.metrics import mean_absolute_error, r2_score
-from typing import List
 from about import ENDPOINTS, API, submissions_repo, results_repo, test_repo
 from huggingface_hub import hf_hub_download
 import datetime
 import io
 import json, tempfile
-def _compact_dict(d: dict) -> dict:
-    """Drop None/empty-string values; strip whitespace for strings."""
-    out = {}
-    for k, v in d.items():
-        if isinstance(v, str):
-            v = v.strip()
-        if v not in (None, "", []):
-            out[k] = v
-    return out
 def submit_data(predictions_file: str,
                 user_state,
-                *,
                 participant_name: str = "",
                 discord_username: str = "",
                 email: str = "",
@@ -46,14 +58,16 @@ def submit_data(predictions_file: str,
         return f"❌ Error reading results file: {str(e)}"
     if results_df.empty:
-        raise gr.Error("The uploaded file is empty.")
     if not set(ENDPOINTS).issubset(set(results_df.columns)):
-        raise gr.Error(f"The uploaded file must contain all endpoint predictions")
     # Build destination filename in the dataset
-    ts = datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="seconds").replace(":", "-")
-    safe_user = str(user_state.strip()).replace("/", "_").replace(" ", "_")
     destination_csv = f"submissions/{safe_user}_{ts}.csv"
     destination_json = destination_csv.replace(".csv", ".json")
     # Upload the CSV file
@@ -66,22 +80,31 @@ def submit_data(predictions_file: str,
     )
     # Optional participant record
-    participant_fields = _compact_dict({
-        "participant_name": participant_name,
-        "discord_username": discord_username,
-        "email": email,
-        "affiliation": affiliation,
-    })
-    # Metadata JSON
-    meta = {
-        "submission_time_utc": ts,
-        "user": user_state,
-        "original_filename": file_path.name,
-        "evaluated": False,
-        **participant_fields,  # merged here
-    }
-    meta_bytes = io.BytesIO(json.dumps(meta, indent=2).encode("utf-8"))
     API.upload_file(
         path_or_fileobj=meta_bytes,
         path_in_repo=destination_json,
@@ -132,15 +155,16 @@ def evaluate_data(filename: str) -> None:
                 filename=meta_filename,
             )
         with open(meta_path, "r", encoding="utf-8") as f:
-            meta = json.load(f)
-        username = meta.get("user")
-        timestamp = meta.get("submission_time_utc")
     except Exception as e:
         raise gr.Error(f"Failed to load metadata file: {e}. No results written to results dataset.")
     # Write results to results dataset
     results_df['user'] = username
-    safe_user = str(username).replace("/", "_").replace(" ", "_")
     destination_path = f"results/{safe_user}_{timestamp}_results.csv"
     tmp_name = None
     with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as tmp:

 from pathlib import Path
 from scipy.stats import spearmanr, kendalltau
 from sklearn.metrics import mean_absolute_error, r2_score
+from typing import Optional
 from about import ENDPOINTS, API, submissions_repo, results_repo, test_repo
 from huggingface_hub import hf_hub_download
 import datetime
 import io
 import json, tempfile
+import pydantic
+class ParticipantRecord(pydantic.BaseModel):
+    hf_username: Optional[str] = pydantic.Field(default=None, description="Hugging Face username")
+    participant_name: Optional[str] = pydantic.Field(default=None, description="Participant's real name")
+    discord_username: Optional[str] = pydantic.Field(default=None, description="Discord username")
+    email: Optional[str] = pydantic.Field(default=None, description="Email address")
+    affiliation: Optional[str] = pydantic.Field(default=None, description="Affiliation")
+    model_tag: Optional[str] = pydantic.Field(default=None, description="Model tag")
+class SubmissionMetadata(pydantic.BaseModel):
+    submission_time_utc: datetime.datetime
+    user: str
+    original_filename: str
+    evaluated: bool
+    participant: ParticipantRecord
+def _safeify_username(username: str) -> str:
+    return str(username.strip()).replace("/", "_").replace(" ", "_")
+def _unsafify_username(username: str) -> str:
+    return str(username.strip()).replace("/", "_").replace(" ", "_")
 def submit_data(predictions_file: str,
                 user_state,
                 participant_name: str = "",
                 discord_username: str = "",
                 email: str = "",
         return f"❌ Error reading results file: {str(e)}"
     if results_df.empty:
+        return gr.Error("The uploaded file is empty.")
     if not set(ENDPOINTS).issubset(set(results_df.columns)):
+        return gr.Error(f"The uploaded file must contain all endpoint predictions {ENDPOINTS} as columns.")
+    # TODO, much more validation logic needed depending on the state of final data
     # Build destination filename in the dataset
+    ts = datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="seconds") # should keep default time so can be deserialized correctly
+    safe_user = _safeify_username(user_state)
     destination_csv = f"submissions/{safe_user}_{ts}.csv"
     destination_json = destination_csv.replace(".csv", ".json")
     # Upload the CSV file
     )
     # Optional participant record
+    try:
+        participant_record = ParticipantRecord(
+            hf_username=user_state,
+            participant_name=participant_name,
+            discord_username=discord_username,
+            email=email,
+            affiliation=affiliation,
+        )
+    except pydantic.ValidationError as e:
+        return f"❌ Error in participant information: {str(e)}"
+    try:
+        meta = SubmissionMetadata(
+            submission_time_utc=ts,
+            original_filename=file_path.name,
+            evaluated=False,
+            participant=participant_record
+        )
+    except pydantic.ValidationError as e:
+        return f"❌ Error in metadata information: {str(e)}"
+    meta_bytes = io.BytesIO(json.dumps(meta.model_dump(), indent=2).encode("utf-8"))
     API.upload_file(
         path_or_fileobj=meta_bytes,
         path_in_repo=destination_json,
                 filename=meta_filename,
             )
         with open(meta_path, "r", encoding="utf-8") as f:
+            _meta = json.load(f)
+        meta = SubmissionMetadata(**_meta)
+        username = meta.participant.hf_username
+        timestamp = meta.submission_time_utc
     except Exception as e:
         raise gr.Error(f"Failed to load metadata file: {e}. No results written to results dataset.")
     # Write results to results dataset
     results_df['user'] = username
+    safe_user = _unsafify_username(username)
     destination_path = f"results/{safe_user}_{timestamp}_results.csv"
     tmp_name = None
     with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as tmp:

requirements.txt CHANGED Viewed

@@ -2,4 +2,6 @@ gradio
 datasets
 huggingface_hub
 gradio-leaderboard
-plotly

 datasets
 huggingface_hub
 gradio-leaderboard
+plotly
+scipy
+scikit-learn