xhluca
commited on
Commit
·
1b1be3e
1
Parent(s):
6a43f54
add demo header
Browse files
demo.py
CHANGED
@@ -485,6 +485,15 @@ base_screenshot_dir = Path(base_screenshot_dir)
|
|
485 |
hl_action_parser = _build_highlevel_action_parser()
|
486 |
|
487 |
with gr.Blocks(title="AgentRewardBench Demo") as demo, gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
488 |
with gr.Column(scale=4):
|
489 |
benchmark_default = "WebArena"
|
490 |
benchmark_dd = gr.Dropdown(
|
|
|
485 |
hl_action_parser = _build_highlevel_action_parser()
|
486 |
|
487 |
with gr.Blocks(title="AgentRewardBench Demo") as demo, gr.Row():
|
488 |
+
gr.Markdown(
|
489 |
+
"""
|
490 |
+
# AgentRewardBench Leaderboard
|
491 |
+
| [**🤗Dataset**](https://huggingface.co/datasets/McGill-NLP/agent-reward-bench) | **📄Paper (TBA)** | [**🌐Website**](https://agent-reward-bench.github.io) | [**🏆Leaderboard**](https://huggingface.co/spaces/McGill-NLP/agent-reward-bench-leaderboard) | [**💻Demo**](https://huggingface.co/spaces/McGill-NLP/agent-reward-bench-demo)
|
492 |
+
| :--: | :--: | :--: | :--: | :--: |
|
493 |
+
|
494 |
+
This is the leaderboard for the AgentRewardBench. The scores are based on the results of the agents on the benchmark. We report the *precision* score.
|
495 |
+
"""
|
496 |
+
)
|
497 |
with gr.Column(scale=4):
|
498 |
benchmark_default = "WebArena"
|
499 |
benchmark_dd = gr.Dropdown(
|