File size: 4,219 Bytes
034ac91
5fc1f4b
 
034ac91
 
 
5fc1f4b
 
 
034ac91
79359ac
 
 
 
 
 
 
 
034ac91
 
 
 
 
 
 
 
 
 
727eb6f
79359ac
 
727eb6f
 
 
79359ac
727eb6f
 
 
 
 
 
 
 
79359ac
727eb6f
 
 
 
 
034ac91
 
79359ac
034ac91
 
 
 
 
 
 
 
79359ac
034ac91
 
 
 
 
 
 
 
 
 
 
 
5fc1f4b
034ac91
5fc1f4b
 
034ac91
 
 
 
 
5fc1f4b
034ac91
 
 
 
5fc1f4b
f477fda
 
 
5fc1f4b
 
034ac91
5fc1f4b
034ac91
 
 
 
 
 
 
5fc1f4b
 
 
 
034ac91
7014cfe
034ac91
 
5fc1f4b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import os
import gradio as gr

from apscheduler.schedulers.background import BackgroundScheduler
from dabstep_benchmark.content import TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_TEXT, CITATION_BUTTON_LABEL
from dabstep_benchmark.leaderboard import *


def restart_space():
    HF_API.restart_space(repo_id=HF_LEADERBOARD)
    
# Helper function to update both tables
def update_tables():
    leaderboard_df = generate_leaderboard_df()
    validated = leaderboard_df[leaderboard_df["validated"] == True].drop(columns=["validated"])
    unvalidated = leaderboard_df[leaderboard_df["validated"] == False].drop(columns=["validated"])
    return validated, unvalidated



if __name__ == "__main__":
    os.makedirs("data/task_scores", exist_ok=True)
    refresh(only_leaderboard=False)

    demo = gr.Blocks()
    with demo:
        gr.Markdown(TITLE)
        gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
        
        # Generate initial leaderboard data
        validated, unvalidated = update_tables()
        

        with gr.Tab("Validated"):
            verified_table = gr.Dataframe(
                value=validated,
                datatype=["markdown", "str", "str", "str", "markdown", "str", "str", "str"],
                interactive=False,
                column_widths=["20%"],
                wrap=True,
        )
        
        with gr.Tab("Unvalidated"):
            unverified_table = gr.Dataframe(
                value=unvalidated,
                datatype=["markdown", "str", "str", "str", "markdown", "str", "str", "str"],
                interactive=False,
                column_widths=["20%"],
                wrap=True,
        )
        # create a Gradio event listener that runs when the page is loaded to populate the dataframe
        demo.load(update_tables, inputs=None, outputs=[verified_table, unverified_table])

        refresh_button = gr.Button("Refresh")
        refresh_button.click(
            refresh,
            inputs=[
                gr.Checkbox(value=True, visible=False)
            ],
            outputs=[
                verified_table, unverified_table
            ],
        )
        with gr.Row():
            with gr.Accordion("📙 Citation", open=False):
                citation_button = gr.Textbox(
                    value=CITATION_BUTTON_TEXT,
                    label=CITATION_BUTTON_LABEL,
                    lines=len(CITATION_BUTTON_TEXT.split("\n")),
                    elem_id="citation-button",
                )  # .style(show_copy_button=True)

        with gr.Accordion("Submit new agent answers for evaluation"):
            with gr.Row():
                gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text")
            with gr.Row():
                with gr.Column():
                    split = gr.Radio(["all"], value="all", label="Split", visible=False)
                    agent_name_textbox = gr.Textbox(label="Agent name")
                    model_family_textbox = gr.Textbox(label="Model family")
                    system_prompt_textbox = gr.Textbox(label="System prompt example")
                    repo_url_textbox = gr.Textbox(label="Repo URL with agent code")
                with gr.Column():
                    organisation = gr.Textbox(label="Organisation")
                    mail = gr.Textbox(
                        label="Contact email (will be stored privately, & used if there is an issue with your submission)")
                    file_output = gr.File()

            with gr.Row():
                gr.LoginButton()
                submit_button = gr.Button("Submit answers")
            submission_result = gr.Markdown()
            submit_button.click(
                process_submission,
                [
                    split,
                    agent_name_textbox,
                    model_family_textbox,
                    repo_url_textbox,
                    file_output,
                    organisation,
                    mail
                ],
                submission_result,
            )

    scheduler = BackgroundScheduler()
    scheduler.add_job(restart_space, "interval", seconds=3600*24)
    scheduler.start()
    demo.launch(debug=True)