Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -107,7 +107,7 @@ def init_leaderboard(dataframe):
|
|
107 |
interactive=False,
|
108 |
)
|
109 |
|
110 |
-
|
111 |
|
112 |
demo = gr.Blocks(css=custom_css)
|
113 |
with demo:
|
@@ -121,7 +121,9 @@ with demo:
|
|
121 |
|
122 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
123 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
124 |
-
|
|
|
|
|
125 |
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
|
126 |
with gr.Column():
|
127 |
with gr.Row():
|
@@ -151,40 +153,59 @@ with demo:
|
|
151 |
|
152 |
link_input = gr.Textbox(label="Link (str)", placeholder="请输入链接")
|
153 |
|
154 |
-
submit_button = gr.Button("Submit Eval")
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
|
|
|
|
|
|
187 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
# with gr.Column():
|
189 |
# with gr.Accordion(
|
190 |
# f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
|
|
|
107 |
interactive=False,
|
108 |
)
|
109 |
|
110 |
+
all_submissions = []
|
111 |
|
112 |
demo = gr.Blocks(css=custom_css)
|
113 |
with demo:
|
|
|
121 |
|
122 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
|
123 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
124 |
+
|
125 |
+
|
126 |
+
|
127 |
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
|
128 |
with gr.Column():
|
129 |
with gr.Row():
|
|
|
153 |
|
154 |
link_input = gr.Textbox(label="Link (str)", placeholder="请输入链接")
|
155 |
|
156 |
+
submit_button = gr.Button("Submit One Eval")
|
157 |
+
submit_all_button = gr.Button("Submit All")
|
158 |
+
|
159 |
+
submission_result = gr.Markdown("## Submited results")
|
160 |
+
|
161 |
+
def submit_eval(score, name, base_model, env, target_research, subset, link):
|
162 |
+
# 处理单条数据提交
|
163 |
+
result = {
|
164 |
+
"Score": score,
|
165 |
+
"Name": name,
|
166 |
+
"BaseModel": base_model,
|
167 |
+
"Env": env,
|
168 |
+
"Target-research": target_research,
|
169 |
+
"Subset": subset,
|
170 |
+
"Link": link,
|
171 |
+
"State": "Checking"
|
172 |
+
}
|
173 |
+
# 将结果添加到全局变量中
|
174 |
+
all_submissions.append(result)
|
175 |
+
# 更新页面展示
|
176 |
+
display_text = "\n".join([json.dumps(submission) for submission in all_submissions])
|
177 |
+
return display_text
|
178 |
+
|
179 |
+
def submit_all():
|
180 |
+
# 将所有结果一次性上传
|
181 |
+
out_path = "test-output.json"
|
182 |
+
with open(out_path, "w") as f:
|
183 |
+
f.write(json.dumps(all_submissions))
|
184 |
|
185 |
+
print("Uploading eval file")
|
186 |
+
API.upload_file(
|
187 |
+
path_or_fileobj=out_path,
|
188 |
+
path_in_repo=out_path,
|
189 |
+
repo_id="microsoft/MageBench-Leaderboard",
|
190 |
+
repo_type="space",
|
191 |
+
commit_message=f"Add submissions to checking queue",
|
192 |
)
|
193 |
+
all_submissions = []
|
194 |
+
return "All submissions uploaded successfully!"
|
195 |
+
|
196 |
+
# 单条数据提交按钮点击事件
|
197 |
+
submit_button.click(
|
198 |
+
submit_eval,
|
199 |
+
[score_input, name_input, base_model_input, env_dropdown, target_research_dropdown, subset_dropdown, link_input],
|
200 |
+
submission_result
|
201 |
+
)
|
202 |
+
|
203 |
+
# 所有数据提交按钮点击事件
|
204 |
+
submit_all_button.click(
|
205 |
+
submit_all,
|
206 |
+
inputs=[],
|
207 |
+
outputs=submission_result
|
208 |
+
)
|
209 |
# with gr.Column():
|
210 |
# with gr.Accordion(
|
211 |
# f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
|