daiqi commited on
Commit
96c4b54
·
verified ·
1 Parent(s): 5b280bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -34
app.py CHANGED
@@ -107,7 +107,7 @@ def init_leaderboard(dataframe):
107
  interactive=False,
108
  )
109
 
110
-
111
 
112
  demo = gr.Blocks(css=custom_css)
113
  with demo:
@@ -121,7 +121,9 @@ with demo:
121
 
122
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
123
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
124
-
 
 
125
  with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
126
  with gr.Column():
127
  with gr.Row():
@@ -151,40 +153,59 @@ with demo:
151
 
152
  link_input = gr.Textbox(label="Link (str)", placeholder="请输入链接")
153
 
154
- submit_button = gr.Button("Submit Eval")
155
- submission_result = gr.Markdown()
156
-
157
- def submit_eval(score, name, base_model, env, target_research, subset, link):
158
- # 在这里处理提交逻辑,可以将信息保存到数据库或进行其他处理
159
- result = {
160
- "Score": score,
161
- "Name": name,
162
- "BaseModel": base_model,
163
- "Env": env,
164
- "Target-research": target_research,
165
- "Subset": subset,
166
- "Link": link,
167
- "State": "Checking"
168
- }
169
- out_path = "test-output.json"
170
- with open(out_path, "w") as f:
171
- f.write(json.dumps(result))
172
-
173
- print("Uploading eval file")
174
- API.upload_file(
175
- path_or_fileobj=out_path,
176
- path_in_repo=out_path,
177
- repo_id="microsoft/MageBench-Leaderboard",
178
- repo_type="space",
179
- commit_message=f"Add {result} to checking queue",
180
- )
181
- return result
182
 
183
- submit_button.click(
184
- submit_eval,
185
- [score_input, name_input, base_model_input, env_dropdown, target_research_dropdown, subset_dropdown, link_input],
186
- submission_result
 
 
 
187
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  # with gr.Column():
189
  # with gr.Accordion(
190
  # f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
 
107
  interactive=False,
108
  )
109
 
110
+ all_submissions = []
111
 
112
  demo = gr.Blocks(css=custom_css)
113
  with demo:
 
121
 
122
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
123
  gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
124
+
125
+
126
+
127
  with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
128
  with gr.Column():
129
  with gr.Row():
 
153
 
154
  link_input = gr.Textbox(label="Link (str)", placeholder="请输入链接")
155
 
156
+ submit_button = gr.Button("Submit One Eval")
157
+ submit_all_button = gr.Button("Submit All")
158
+
159
+ submission_result = gr.Markdown("## Submited results")
160
+
161
+ def submit_eval(score, name, base_model, env, target_research, subset, link):
162
+ # 处理单条数据提交
163
+ result = {
164
+ "Score": score,
165
+ "Name": name,
166
+ "BaseModel": base_model,
167
+ "Env": env,
168
+ "Target-research": target_research,
169
+ "Subset": subset,
170
+ "Link": link,
171
+ "State": "Checking"
172
+ }
173
+ # 将结果添加到全局变量中
174
+ all_submissions.append(result)
175
+ # 更新页面展示
176
+ display_text = "\n".join([json.dumps(submission) for submission in all_submissions])
177
+ return display_text
178
+
179
+ def submit_all():
180
+ # 将所有结果一次性上传
181
+ out_path = "test-output.json"
182
+ with open(out_path, "w") as f:
183
+ f.write(json.dumps(all_submissions))
184
 
185
+ print("Uploading eval file")
186
+ API.upload_file(
187
+ path_or_fileobj=out_path,
188
+ path_in_repo=out_path,
189
+ repo_id="microsoft/MageBench-Leaderboard",
190
+ repo_type="space",
191
+ commit_message=f"Add submissions to checking queue",
192
  )
193
+ all_submissions = []
194
+ return "All submissions uploaded successfully!"
195
+
196
+ # 单条数据提交按钮点击事件
197
+ submit_button.click(
198
+ submit_eval,
199
+ [score_input, name_input, base_model_input, env_dropdown, target_research_dropdown, subset_dropdown, link_input],
200
+ submission_result
201
+ )
202
+
203
+ # 所有数据提交按钮点击事件
204
+ submit_all_button.click(
205
+ submit_all,
206
+ inputs=[],
207
+ outputs=submission_result
208
+ )
209
  # with gr.Column():
210
  # with gr.Accordion(
211
  # f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",