Spaces:

McGill-NLP
/

agent-reward-bench-demo

Running

xhluca commited on Apr 14

Commit

1b1be3e

1 Parent(s): 6a43f54

add demo header

Files changed (1) hide show

demo.py CHANGED Viewed

@@ -485,6 +485,15 @@ base_screenshot_dir = Path(base_screenshot_dir)
 hl_action_parser = _build_highlevel_action_parser()
 with gr.Blocks(title="AgentRewardBench Demo") as demo, gr.Row():
     with gr.Column(scale=4):
         benchmark_default = "WebArena"
         benchmark_dd = gr.Dropdown(

 hl_action_parser = _build_highlevel_action_parser()
 with gr.Blocks(title="AgentRewardBench Demo") as demo, gr.Row():
+    gr.Markdown(
+        """
+        # AgentRewardBench Leaderboard
+        | [**🤗Dataset**](https://huggingface.co/datasets/McGill-NLP/agent-reward-bench) | **📄Paper (TBA)** | [**🌐Website**](https://agent-reward-bench.github.io) | [**🏆Leaderboard**](https://huggingface.co/spaces/McGill-NLP/agent-reward-bench-leaderboard) | [**💻Demo**](https://huggingface.co/spaces/McGill-NLP/agent-reward-bench-demo)
+        | :--: | :--: | :--: | :--: | :--: |
+        This is the leaderboard for the AgentRewardBench. The scores are based on the results of the agents on the benchmark. We report the *precision* score.
+        """
+    )
     with gr.Column(scale=4):
         benchmark_default = "WebArena"
         benchmark_dd = gr.Dropdown(