frimelle HF Staff commited on
Commit
715bd5c
·
1 Parent(s): cea76ed

experiment with app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -185
app.py CHANGED
@@ -1,113 +1,47 @@
1
  import gradio as gr
2
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
- from apscheduler.schedulers.background import BackgroundScheduler
5
- from huggingface_hub import snapshot_download
6
- from pathlib import Path # ⬅️ for local JSON
7
 
8
  from src.about import (
9
  CITATION_BUTTON_LABEL,
10
  CITATION_BUTTON_TEXT,
11
- EVALUATION_QUEUE_TEXT,
12
  INTRODUCTION_TEXT,
13
  LLM_BENCHMARKS_TEXT,
14
  TITLE,
15
  )
16
  from src.display.css_html_js import custom_css
17
- from src.display.utils import (
18
- BENCHMARK_COLS,
19
- COLS,
20
- EVAL_COLS,
21
- EVAL_TYPES,
22
- AutoEvalColumn,
23
- ModelType,
24
- fields,
25
- WeightType,
26
- Precision
27
- )
28
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
29
- from src.populate import get_evaluation_queue_df, get_leaderboard_df
30
- from src.submission.submit import add_new_eval
31
-
32
 
33
- def restart_space():
34
- API.restart_space(repo_id=REPO_ID)
35
-
36
- ### Space initialisation (pull queue/results datasets like before)
37
- try:
38
- print(EVAL_REQUESTS_PATH)
39
- snapshot_download(
40
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
41
- )
42
- except Exception:
43
- restart_space()
44
- try:
45
- print(EVAL_RESULTS_PATH)
46
- snapshot_download(
47
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
48
- )
49
- except Exception:
50
- restart_space()
51
-
52
- # Original leaderboard (unchanged)
53
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
54
-
55
- (
56
- finished_eval_queue_df,
57
- running_eval_queue_df,
58
- pending_eval_queue_df,
59
- ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
60
-
61
- def init_leaderboard(dataframe):
62
- if dataframe is None or dataframe.empty:
63
- raise ValueError("Leaderboard DataFrame is empty or None.")
64
- return Leaderboard(
65
- value=dataframe,
66
- datatype=[c.type for c in fields(AutoEvalColumn)],
67
- select_columns=SelectColumns(
68
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
69
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
70
- label="Select Columns to Display:",
71
- ),
72
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
73
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
74
- filter_columns=[
75
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
76
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
77
- ColumnFilter(
78
- AutoEvalColumn.params.name,
79
- type="slider",
80
- min=0.01,
81
- max=150,
82
- label="Select the number of parameters (B)",
83
- ),
84
- ColumnFilter(
85
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
86
- ),
87
- ],
88
- bool_checkboxgroup_label="Hide models",
89
- interactive=False,
90
- )
91
 
92
  # -----------------------------
93
- # NEW: Load your local JSON and a simple leaderboard (no AutoEvalColumn coupling)
94
  # -----------------------------
95
  USER_JSON = Path(__file__).parent / "leaderboard_data.json"
96
  try:
97
  USER_DF = pd.read_json(USER_JSON)
98
- except Exception as e:
99
- # Create an empty DF with the expected columns if file missing, so Space still builds.
100
- USER_DF = pd.DataFrame(columns=["Model", "Average", "Assistant Traits", "Relationship & Intimacy", "Emotional Investment", "User Vulnerabilities"])
 
 
 
 
 
 
 
 
 
101
 
102
- # Ensure types (Model=str, others=float) and clean column order
103
  if "Model" in USER_DF.columns:
104
  USER_DF["Model"] = USER_DF["Model"].astype(str)
105
  for col in USER_DF.columns:
106
  if col != "Model":
107
  USER_DF[col] = pd.to_numeric(USER_DF[col], errors="coerce")
108
 
 
109
  def init_simple_leaderboard(df: pd.DataFrame):
110
- # Show Model + up to first 6 metric columns by default
111
  metrics = [c for c in df.columns if c != "Model"]
112
  default_cols = ["Model"] + metrics[:6] if "Model" in df.columns else list(df.columns)[:7]
113
  cant_hide = ["Model"] if "Model" in df.columns else []
@@ -120,11 +54,12 @@ def init_simple_leaderboard(df: pd.DataFrame):
120
  label="Select Columns to Display:",
121
  ),
122
  search_columns=["Model"] if "Model" in df.columns else [],
123
- hide_columns=[], # keep everything visible
124
- filter_columns=[], # no filters for now
125
  interactive=False,
126
  )
127
 
 
128
  # -----------------------------
129
  # UI
130
  # -----------------------------
@@ -133,107 +68,16 @@ with demo:
133
  gr.HTML(TITLE)
134
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
135
 
136
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
137
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
138
- leaderboard = init_leaderboard(LEADERBOARD_DF)
139
-
140
- # NEW TAB: renders your leaderboard_data.json
141
- with gr.TabItem("📊 INTIMA Leaderboard", elem_id="intima-leaderboard-tab", id=1):
142
  _ = init_simple_leaderboard(USER_DF)
143
 
144
- with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
145
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
146
 
147
- with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
148
- with gr.Column():
149
- with gr.Row():
150
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
151
-
152
- with gr.Column():
153
- with gr.Accordion(
154
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
155
- open=False,
156
- ):
157
- with gr.Row():
158
- finished_eval_table = gr.components.Dataframe(
159
- value=finished_eval_queue_df,
160
- headers=EVAL_COLS,
161
- datatype=EVAL_TYPES,
162
- row_count=5,
163
- )
164
- with gr.Accordion(
165
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
166
- open=False,
167
- ):
168
- with gr.Row():
169
- running_eval_table = gr.components.Dataframe(
170
- value=running_eval_queue_df,
171
- headers=EVAL_COLS,
172
- datatype=EVAL_TYPES,
173
- row_count=5,
174
- )
175
-
176
- with gr.Accordion(
177
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
178
- open=False,
179
- ):
180
- with gr.Row():
181
- pending_eval_table = gr.components.Dataframe(
182
- value=pending_eval_queue_df,
183
- headers=EVAL_COLS,
184
- datatype=EVAL_TYPES,
185
- row_count=5,
186
- )
187
- with gr.Row():
188
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
189
-
190
- with gr.Row():
191
- with gr.Column():
192
- model_name_textbox = gr.Textbox(label="Model name")
193
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
194
- model_type = gr.Dropdown(
195
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
196
- label="Model type",
197
- multiselect=False,
198
- value=None,
199
- interactive=True,
200
- )
201
-
202
- with gr.Column():
203
- precision = gr.Dropdown(
204
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
205
- label="Precision",
206
- multiselect=False,
207
- value="float16",
208
- interactive=True,
209
- )
210
- weight_type = gr.Dropdown(
211
- choices=[i.value.name for i in WeightType],
212
- label="Weights type",
213
- multiselect=False,
214
- value="Original",
215
- interactive=True,
216
- )
217
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
218
-
219
- submit_button = gr.Button("Submit Eval")
220
- submission_result = gr.Markdown()
221
- submit_button.click(
222
- add_new_eval,
223
- [
224
- model_name_textbox,
225
- base_model_name_textbox,
226
- revision_name_textbox,
227
- precision,
228
- weight_type,
229
- model_type,
230
- ],
231
- submission_result,
232
- )
233
-
234
  with gr.Row():
235
  with gr.Accordion("📙 Citation", open=False):
236
- citation_button = gr.Textbox(
237
  value=CITATION_BUTTON_TEXT,
238
  label=CITATION_BUTTON_LABEL,
239
  lines=20,
@@ -241,7 +85,5 @@ with demo:
241
  show_copy_button=True,
242
  )
243
 
244
- scheduler = BackgroundScheduler()
245
- scheduler.add_job(restart_space, "interval", seconds=1800)
246
- scheduler.start()
247
- demo.queue(default_concurrency_limit=40).launch()
 
1
  import gradio as gr
2
+ from gradio_leaderboard import Leaderboard, SelectColumns
3
  import pandas as pd
4
+ from pathlib import Path
 
 
5
 
6
  from src.about import (
7
  CITATION_BUTTON_LABEL,
8
  CITATION_BUTTON_TEXT,
 
9
  INTRODUCTION_TEXT,
10
  LLM_BENCHMARKS_TEXT,
11
  TITLE,
12
  )
13
  from src.display.css_html_js import custom_css
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # -----------------------------
17
+ # Load your local JSON
18
  # -----------------------------
19
  USER_JSON = Path(__file__).parent / "leaderboard_data.json"
20
  try:
21
  USER_DF = pd.read_json(USER_JSON)
22
+ except Exception:
23
+ # Build with an empty frame if file missing so the Space still loads
24
+ USER_DF = pd.DataFrame(
25
+ columns=[
26
+ "Model",
27
+ "Average",
28
+ "Assistant Traits",
29
+ "Relationship & Intimacy",
30
+ "Emotional Investment",
31
+ "User Vulnerabilities",
32
+ ]
33
+ )
34
 
35
+ # Ensure types (Model=str, others=float)
36
  if "Model" in USER_DF.columns:
37
  USER_DF["Model"] = USER_DF["Model"].astype(str)
38
  for col in USER_DF.columns:
39
  if col != "Model":
40
  USER_DF[col] = pd.to_numeric(USER_DF[col], errors="coerce")
41
 
42
+
43
  def init_simple_leaderboard(df: pd.DataFrame):
44
+ # Show Model + up to 6 metrics by default
45
  metrics = [c for c in df.columns if c != "Model"]
46
  default_cols = ["Model"] + metrics[:6] if "Model" in df.columns else list(df.columns)[:7]
47
  cant_hide = ["Model"] if "Model" in df.columns else []
 
54
  label="Select Columns to Display:",
55
  ),
56
  search_columns=["Model"] if "Model" in df.columns else [],
57
+ hide_columns=[], # keep everything visible
58
+ filter_columns=[], # add later if you introduce typed columns to filter on
59
  interactive=False,
60
  )
61
 
62
+
63
  # -----------------------------
64
  # UI
65
  # -----------------------------
 
68
  gr.HTML(TITLE)
69
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
70
 
71
+ with gr.Tabs(elem_classes="tab-buttons"):
72
+ with gr.TabItem("📊 INTIMA Leaderboard", elem_id="intima-leaderboard-tab", id=0):
 
 
 
 
73
  _ = init_simple_leaderboard(USER_DF)
74
 
75
+ with gr.TabItem("📝 About", elem_id="about-tab", id=1):
76
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  with gr.Row():
79
  with gr.Accordion("📙 Citation", open=False):
80
+ gr.Textbox(
81
  value=CITATION_BUTTON_TEXT,
82
  label=CITATION_BUTTON_LABEL,
83
  lines=20,
 
85
  show_copy_button=True,
86
  )
87
 
88
+ if __name__ == "__main__":
89
+ demo.launch()