Spaces:
Running
Running
| __all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions'] | |
| import gradio as gr | |
| import pandas as pd | |
| import re | |
| import pandas as pd | |
| import os | |
| import json | |
| import yaml | |
| from src.about import * | |
| from src.bin.PROBE import run_probe | |
| global data_component, filter_component | |
| def get_baseline_df(): | |
| df = pd.read_csv(CSV_RESULT_PATH) | |
| present_columns = ["Method"] + checkbox_group.value | |
| df = df[present_columns] | |
| return df | |
| def add_new_eval( | |
| human_file, | |
| skempi_file, | |
| model_name_textbox: str, | |
| revision_name_textbox: str, | |
| benchmark_type: str, | |
| ): | |
| representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox | |
| run_probe(benchmark_type, representation_name, human_file, skempi_file) | |
| return None | |
| block = gr.Blocks() | |
| with block: | |
| gr.Markdown( | |
| LEADERBOARD_INTRODUCTION | |
| ) | |
| with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
| # table jmmmu bench | |
| with gr.TabItem("π PROBE Benchmark", elem_id="probe-benchmark-tab-table", id=1): | |
| # selection for column part: | |
| checkbox_group = gr.CheckboxGroup( | |
| choices=TASK_INFO, | |
| label="Benchmark Type", | |
| interactive=True, | |
| ) # user can select the evaluation dimension | |
| baseline_value = get_baseline_df() | |
| baseline_header = ["Method"] + checkbox_group.value | |
| baseline_datatype = ['markdown'] + ['number'] * len(checkbox_group.value) | |
| data_component = gr.components.Dataframe( | |
| value=baseline_value, | |
| headers=baseline_header, | |
| type="pandas", | |
| datatype=baseline_datatype, | |
| interactive=False, | |
| visible=True, | |
| ) | |
| # table 5 | |
| with gr.TabItem("π About", elem_id="probe-benchmark-tab-table", id=2): | |
| with gr.Row(): | |
| gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | |
| with gr.TabItem("π Submit here! ", elem_id="probe-benchmark-tab-table", id=3): | |
| with gr.Row(): | |
| gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") | |
| with gr.Row(): | |
| gr.Markdown("# βοΈβ¨ Submit your model's representation files here!", elem_classes="markdown-text") | |
| with gr.Row(): | |
| with gr.Column(): | |
| model_name_textbox = gr.Textbox( | |
| label="Model name", | |
| ) | |
| revision_name_textbox = gr.Textbox( | |
| label="Revision Model Name", | |
| ) | |
| # Selection for benchmark type from (similartiy, family, function, affinity) to eval the representations (chekbox) | |
| benchmark_type = gr.CheckboxGroup( | |
| choices=TASK_INFO, | |
| label="Benchmark Type", | |
| interactive=True, | |
| ) | |
| with gr.Column(): | |
| human_file = gr.components.File(label="Click to Upload the representation file (csv) for Human dataset", file_count="single", type='filepath') | |
| skempi_file = gr.components.File(label="Click to Upload the representation file (csv) for SKEMPI dataset", file_count="single", type='filepath') | |
| submit_button = gr.Button("Submit Eval") | |
| submission_result = gr.Markdown() | |
| submit_button.click( | |
| add_new_eval, | |
| inputs = [ | |
| human_file, | |
| skempi_file, | |
| model_name_textbox, | |
| revision_name_textbox, | |
| benchmark_type | |
| ], | |
| ) | |
| def refresh_data(): | |
| value = get_baseline_df() | |
| return value | |
| with gr.Row(): | |
| data_run = gr.Button("Refresh") | |
| data_run.click( | |
| refresh_data, outputs=[data_component] | |
| ) | |
| with gr.Accordion("Citation", open=False): | |
| citation_button = gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| label=CITATION_BUTTON_LABEL, | |
| elem_id="citation-button", | |
| show_copy_button=True, | |
| ) | |
| block.launch() | |