hmacdope commited on
Commit
3687c40
Β·
1 Parent(s): b4b15c9

HMO updates

Browse files
Files changed (4) hide show
  1. about.py +1 -0
  2. app.py +3 -4
  3. evaluate.py +60 -36
  4. requirements.txt +3 -1
about.py CHANGED
@@ -11,6 +11,7 @@ ENDPOINTS = ["LogD",
11
  "MBPB",
12
  "RLM CLint",
13
  "MGMB"]
 
14
  TOKEN = os.environ.get("HF_TOKEN")
15
  CACHE_PATH=os.getenv("HF_HOME", ".")
16
  API = HfApi(token=TOKEN)
 
11
  "MBPB",
12
  "RLM CLint",
13
  "MGMB"]
14
+
15
  TOKEN = os.environ.get("HF_TOKEN")
16
  CACHE_PATH=os.getenv("HF_HOME", ".")
17
  API = HfApi(token=TOKEN)
app.py CHANGED
@@ -200,21 +200,20 @@ def gradio_interface():
200
 
201
  submit_btn = gr.Button("Submit Predictions")
202
  message = gr.Textbox(label="Status", lines=1, visible=False)
203
- '''
204
  submit_btn.click(
205
  submit_data,
206
  inputs=[predictions_file, user_state, participant_name, discord_username, email, affiliation],
207
  outputs=[message],
208
- ).then(
209
  fn=lambda m: gr.update(value=m, visible=True),
210
  inputs=[message],
211
  outputs=[message],
212
- ).then(
213
  fn=evaluate_data,
214
  inputs=[filename],
215
  outputs=[eval_state]
216
  )
217
- '''
218
  return demo
219
 
220
  if __name__ == "__main__":
 
200
 
201
  submit_btn = gr.Button("Submit Predictions")
202
  message = gr.Textbox(label="Status", lines=1, visible=False)
203
+
204
  submit_btn.click(
205
  submit_data,
206
  inputs=[predictions_file, user_state, participant_name, discord_username, email, affiliation],
207
  outputs=[message],
208
+ ).success(
209
  fn=lambda m: gr.update(value=m, visible=True),
210
  inputs=[message],
211
  outputs=[message],
212
+ ).success(
213
  fn=evaluate_data,
214
  inputs=[filename],
215
  outputs=[eval_state]
216
  )
 
217
  return demo
218
 
219
  if __name__ == "__main__":
evaluate.py CHANGED
@@ -3,28 +3,40 @@ import pandas as pd
3
  from pathlib import Path
4
  from scipy.stats import spearmanr, kendalltau
5
  from sklearn.metrics import mean_absolute_error, r2_score
6
- from typing import List
7
  from about import ENDPOINTS, API, submissions_repo, results_repo, test_repo
8
  from huggingface_hub import hf_hub_download
9
  import datetime
10
  import io
11
  import json, tempfile
 
12
 
13
 
14
- def _compact_dict(d: dict) -> dict:
15
- """Drop None/empty-string values; strip whitespace for strings."""
16
- out = {}
17
- for k, v in d.items():
18
- if isinstance(v, str):
19
- v = v.strip()
20
- if v not in (None, "", []):
21
- out[k] = v
22
- return out
23
 
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def submit_data(predictions_file: str,
26
  user_state,
27
- *,
28
  participant_name: str = "",
29
  discord_username: str = "",
30
  email: str = "",
@@ -46,14 +58,16 @@ def submit_data(predictions_file: str,
46
  return f"❌ Error reading results file: {str(e)}"
47
 
48
  if results_df.empty:
49
- raise gr.Error("The uploaded file is empty.")
50
  if not set(ENDPOINTS).issubset(set(results_df.columns)):
51
- raise gr.Error(f"The uploaded file must contain all endpoint predictions")
 
 
52
 
53
  # Build destination filename in the dataset
54
- ts = datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="seconds").replace(":", "-")
55
- safe_user = str(user_state.strip()).replace("/", "_").replace(" ", "_")
56
-
57
  destination_csv = f"submissions/{safe_user}_{ts}.csv"
58
  destination_json = destination_csv.replace(".csv", ".json")
59
  # Upload the CSV file
@@ -66,22 +80,31 @@ def submit_data(predictions_file: str,
66
  )
67
 
68
  # Optional participant record
69
- participant_fields = _compact_dict({
70
- "participant_name": participant_name,
71
- "discord_username": discord_username,
72
- "email": email,
73
- "affiliation": affiliation,
74
- })
75
- # Metadata JSON
76
- meta = {
77
- "submission_time_utc": ts,
78
- "user": user_state,
79
- "original_filename": file_path.name,
80
- "evaluated": False,
81
- **participant_fields, # merged here
82
- }
83
-
84
- meta_bytes = io.BytesIO(json.dumps(meta, indent=2).encode("utf-8"))
 
 
 
 
 
 
 
 
 
85
  API.upload_file(
86
  path_or_fileobj=meta_bytes,
87
  path_in_repo=destination_json,
@@ -132,15 +155,16 @@ def evaluate_data(filename: str) -> None:
132
  filename=meta_filename,
133
  )
134
  with open(meta_path, "r", encoding="utf-8") as f:
135
- meta = json.load(f)
136
- username = meta.get("user")
137
- timestamp = meta.get("submission_time_utc")
 
138
  except Exception as e:
139
  raise gr.Error(f"Failed to load metadata file: {e}. No results written to results dataset.")
140
 
141
  # Write results to results dataset
142
  results_df['user'] = username
143
- safe_user = str(username).replace("/", "_").replace(" ", "_")
144
  destination_path = f"results/{safe_user}_{timestamp}_results.csv"
145
  tmp_name = None
146
  with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as tmp:
 
3
  from pathlib import Path
4
  from scipy.stats import spearmanr, kendalltau
5
  from sklearn.metrics import mean_absolute_error, r2_score
6
+ from typing import Optional
7
  from about import ENDPOINTS, API, submissions_repo, results_repo, test_repo
8
  from huggingface_hub import hf_hub_download
9
  import datetime
10
  import io
11
  import json, tempfile
12
+ import pydantic
13
 
14
 
15
+ class ParticipantRecord(pydantic.BaseModel):
16
+ hf_username: Optional[str] = pydantic.Field(default=None, description="Hugging Face username")
17
+ participant_name: Optional[str] = pydantic.Field(default=None, description="Participant's real name")
18
+ discord_username: Optional[str] = pydantic.Field(default=None, description="Discord username")
19
+ email: Optional[str] = pydantic.Field(default=None, description="Email address")
20
+ affiliation: Optional[str] = pydantic.Field(default=None, description="Affiliation")
21
+ model_tag: Optional[str] = pydantic.Field(default=None, description="Model tag")
 
 
22
 
23
 
24
+ class SubmissionMetadata(pydantic.BaseModel):
25
+ submission_time_utc: datetime.datetime
26
+ user: str
27
+ original_filename: str
28
+ evaluated: bool
29
+ participant: ParticipantRecord
30
+
31
+
32
+ def _safeify_username(username: str) -> str:
33
+ return str(username.strip()).replace("/", "_").replace(" ", "_")
34
+
35
+ def _unsafify_username(username: str) -> str:
36
+ return str(username.strip()).replace("/", "_").replace(" ", "_")
37
+
38
  def submit_data(predictions_file: str,
39
  user_state,
 
40
  participant_name: str = "",
41
  discord_username: str = "",
42
  email: str = "",
 
58
  return f"❌ Error reading results file: {str(e)}"
59
 
60
  if results_df.empty:
61
+ return gr.Error("The uploaded file is empty.")
62
  if not set(ENDPOINTS).issubset(set(results_df.columns)):
63
+ return gr.Error(f"The uploaded file must contain all endpoint predictions {ENDPOINTS} as columns.")
64
+
65
+ # TODO, much more validation logic needed depending on the state of final data
66
 
67
  # Build destination filename in the dataset
68
+ ts = datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="seconds") # should keep default time so can be deserialized correctly
69
+ safe_user = _safeify_username(user_state)
70
+
71
  destination_csv = f"submissions/{safe_user}_{ts}.csv"
72
  destination_json = destination_csv.replace(".csv", ".json")
73
  # Upload the CSV file
 
80
  )
81
 
82
  # Optional participant record
83
+ try:
84
+
85
+ participant_record = ParticipantRecord(
86
+ hf_username=user_state,
87
+ participant_name=participant_name,
88
+ discord_username=discord_username,
89
+ email=email,
90
+ affiliation=affiliation,
91
+ )
92
+ except pydantic.ValidationError as e:
93
+ return f"❌ Error in participant information: {str(e)}"
94
+
95
+
96
+ try:
97
+ meta = SubmissionMetadata(
98
+ submission_time_utc=ts,
99
+ original_filename=file_path.name,
100
+ evaluated=False,
101
+ participant=participant_record
102
+ )
103
+ except pydantic.ValidationError as e:
104
+ return f"❌ Error in metadata information: {str(e)}"
105
+
106
+ meta_bytes = io.BytesIO(json.dumps(meta.model_dump(), indent=2).encode("utf-8"))
107
+
108
  API.upload_file(
109
  path_or_fileobj=meta_bytes,
110
  path_in_repo=destination_json,
 
155
  filename=meta_filename,
156
  )
157
  with open(meta_path, "r", encoding="utf-8") as f:
158
+ _meta = json.load(f)
159
+ meta = SubmissionMetadata(**_meta)
160
+ username = meta.participant.hf_username
161
+ timestamp = meta.submission_time_utc
162
  except Exception as e:
163
  raise gr.Error(f"Failed to load metadata file: {e}. No results written to results dataset.")
164
 
165
  # Write results to results dataset
166
  results_df['user'] = username
167
+ safe_user = _unsafify_username(username)
168
  destination_path = f"results/{safe_user}_{timestamp}_results.csv"
169
  tmp_name = None
170
  with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as tmp:
requirements.txt CHANGED
@@ -2,4 +2,6 @@ gradio
2
  datasets
3
  huggingface_hub
4
  gradio-leaderboard
5
- plotly
 
 
 
2
  datasets
3
  huggingface_hub
4
  gradio-leaderboard
5
+ plotly
6
+ scipy
7
+ scikit-learn