Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	| __all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions'] | |
| import gradio as gr | |
| import pandas as pd | |
| import re | |
| import os | |
| import json | |
| import yaml | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import plotnine as p9 | |
| import sys | |
| sys.path.append('./src') | |
| sys.path.append('.') | |
| from src.about import * | |
| from src.saving_utils import * | |
| from src.vis_utils import * | |
| from src.bin.PROBE import run_probe | |
| def add_new_eval( | |
| human_file, | |
| skempi_file, | |
| model_name_textbox: str, | |
| revision_name_textbox: str, | |
| benchmark_types, | |
| similarity_tasks, | |
| function_prediction_aspect, | |
| function_prediction_dataset, | |
| family_prediction_dataset, | |
| save, | |
| ): | |
| representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox | |
| results = run_probe(benchmark_types, representation_name, human_file, skempi_file, similarity_tasks, function_prediction_aspect, function_prediction_dataset, family_prediction_dataset) | |
| print(results) | |
| if save: | |
| save_results(representation_name, benchmark_types, results) | |
| print("Results are saved!") | |
| return 0 | |
| # Function to update leaderboard dynamically based on user selection | |
| def update_leaderboard(selected_methods, selected_metrics): | |
| return get_baseline_df(selected_methods, selected_metrics) | |
| block = gr.Blocks() | |
| with block: | |
| gr.Markdown(LEADERBOARD_INTRODUCTION) | |
| with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
| with gr.TabItem("π PROBE Leaderboard", elem_id="probe-benchmark-tab-table", id=1): | |
| leaderboard = get_baseline_df(None, None) #get baseline leaderboard without filtering | |
| method_names = leaderboard['method_name'].unique().tolist() | |
| metric_names = leaderboard(CSV_RESULT_PATH).columns.tolist() | |
| metrics_with_method = metric_names.copy() | |
| metric_names.remove('method_name') # Remove method_name from the metric options | |
| # Leaderboard section with method and metric selectors | |
| leaderboard_method_selector = gr.CheckboxGroup( | |
| choices=method_names, label="Select Methods for the Leaderboard", value=method_names, interactive=True | |
| ) | |
| leaderboard_metric_selector = gr.CheckboxGroup( | |
| choices=metric_names, label="Select Metrics for the Leaderboard", value=metric_names, interactive=True | |
| ) | |
| # Display the filtered leaderboard | |
| baseline_value = get_baseline_df(method_names, metric_names) | |
| baseline_header = ["method_name"] + metric_names | |
| baseline_datatype = ['markdown'] + ['number'] * len(metric_names) | |
| with gr.Row(show_progress=True, variant='panel'): | |
| data_component = gr.components.Dataframe( | |
| value=baseline_value, | |
| headers=baseline_header, | |
| type="pandas", | |
| datatype=baseline_datatype, | |
| interactive=False, | |
| visible=True, | |
| ) | |
| # Update leaderboard when method/metric selection changes | |
| leaderboard_method_selector.change( | |
| update_leaderboard, | |
| inputs=[leaderboard_method_selector, leaderboard_metric_selector], | |
| outputs=data_component | |
| ) | |
| leaderboard_metric_selector.change( | |
| update_leaderboard, | |
| inputs=[leaderboard_method_selector, leaderboard_metric_selector], | |
| outputs=data_component | |
| ) | |
| with gr.Row(): | |
| gr.Markdown( | |
| """ | |
| ## **Below, you can visualize the results displayed in the Leaderboard.** | |
| ### Once you choose a benchmark type, the related options for metrics, datasets, and other parameters will become visible. Select the methods and metrics of interest from the options to generate visualizations. | |
| """ | |
| ) | |
| # Dropdown for benchmark type | |
| benchmark_type_selector = gr.Dropdown(choices=list(benchmark_specific_metrics.keys()), label="Select Benchmark Type", value=None) | |
| with gr.Row(): | |
| # Dynamic selectors | |
| x_metric_selector = gr.Dropdown(choices=[], label="Select X-axis Metric", visible=False) | |
| y_metric_selector = gr.Dropdown(choices=[], label="Select Y-axis Metric", visible=False) | |
| aspect_type_selector = gr.Dropdown(choices=[], label="Select Aspect Type", visible=False) | |
| dataset_type_selector = gr.Dropdown(choices=[], label="Select Dataset Type", visible=False) | |
| dataset_selector = gr.Dropdown(choices=[], label="Select Dataset", visible=False) | |
| single_metric_selector = gr.Dropdown(choices=[], label="Select Metric", visible=False) | |
| method_selector = gr.CheckboxGroup(choices=method_names, label="Select methods to visualize", interactive=True, value=method_names) | |
| # Button to draw the plot for the selected benchmark | |
| plot_button = gr.Button("Plot") | |
| with gr.Row(show_progress=True, variant='panel'): | |
| plot_output = gr.Image(label="Plot") | |
| # Update selectors when benchmark type changes | |
| benchmark_type_selector.change( | |
| update_metric_choices, | |
| inputs=[benchmark_type_selector], | |
| outputs=[x_metric_selector, y_metric_selector, aspect_type_selector, dataset_type_selector, dataset_selector, single_metric_selector] | |
| ) | |
| plot_button.click( | |
| benchmark_plot, | |
| inputs=[benchmark_type_selector, method_selector, x_metric_selector, y_metric_selector, aspect_type_selector, dataset_type_selector, dataset_selector, single_metric_selector], | |
| outputs=plot_output | |
| ) | |
| with gr.TabItem("π About", elem_id="probe-benchmark-tab-table", id=2): | |
| with gr.Row(): | |
| gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | |
| with gr.TabItem("π Submit here! ", elem_id="probe-benchmark-tab-table", id=3): | |
| with gr.Row(): | |
| gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") | |
| with gr.Row(): | |
| gr.Markdown("# βοΈβ¨ Submit your model's representation files here!", elem_classes="markdown-text") | |
| with gr.Row(): | |
| with gr.Column(): | |
| model_name_textbox = gr.Textbox( | |
| label="Method name", | |
| ) | |
| revision_name_textbox = gr.Textbox( | |
| label="Revision Method Name", | |
| ) | |
| benchmark_types = gr.CheckboxGroup( | |
| choices=TASK_INFO, | |
| label="Benchmark Types", | |
| interactive=True, | |
| ) | |
| similarity_tasks = gr.CheckboxGroup( | |
| choices=similarity_tasks_options, | |
| label="Similarity Tasks", | |
| interactive=True, | |
| ) | |
| function_prediction_aspect = gr.Radio( | |
| choices=function_prediction_aspect_options, | |
| label="Function Prediction Aspects", | |
| interactive=True, | |
| ) | |
| family_prediction_dataset = gr.CheckboxGroup( | |
| choices=family_prediction_dataset_options, | |
| label="Family Prediction Datasets", | |
| interactive=True, | |
| ) | |
| function_dataset = gr.Textbox( | |
| label="Function Prediction Datasets", | |
| visible=False, | |
| value="All_Data_Sets" | |
| ) | |
| save_checkbox = gr.Checkbox( | |
| label="Save results for leaderboard and visualization", | |
| value=True | |
| ) | |
| #with gr.Column(): | |
| with gr.Row(): | |
| human_file = gr.components.File(label="The representation file (csv) for Human dataset", file_count="single", type='filepath') | |
| skempi_file = gr.components.File(label="The representation file (csv) for SKEMPI dataset", file_count="single", type='filepath') | |
| submit_button = gr.Button("Submit Eval") | |
| submission_result = gr.Markdown() | |
| submit_button.click( | |
| add_new_eval, | |
| inputs=[ | |
| human_file, | |
| skempi_file, | |
| model_name_textbox, | |
| revision_name_textbox, | |
| benchmark_types, | |
| similarity_tasks, | |
| function_prediction_aspect, | |
| function_dataset, | |
| family_prediction_dataset, | |
| save_checkbox, | |
| ], | |
| ) | |
| def refresh_data(): | |
| value = get_baseline_df(method_names, metric_names) | |
| return value | |
| with gr.Row(): | |
| data_run = gr.Button("Refresh") | |
| data_run.click(refresh_data, outputs=[data_component]) | |
| with gr.Accordion("Citation", open=False): | |
| citation_button = gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| label=CITATION_BUTTON_LABEL, | |
| elem_id="citation-button", | |
| show_copy_button=True, | |
| ) | |
| block.launch() | |
