Spaces:
Running
Running
| __all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions'] | |
| import gradio as gr | |
| import pandas as pd | |
| import re | |
| import os | |
| import json | |
| import yaml | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import plotnine as p9 | |
| from src.about import * | |
| from src.bin.PROBE import run_probe | |
| global data_component, filter_component | |
| def get_method_color(method): | |
| return color_dict.get(method, 'black') # If method is not in color_dict, use black | |
| def draw_scatter_plot_similarity(methods_selected, x_metric, y_metric, title): | |
| df = pd.read_csv(CSV_RESULT_PATH) | |
| # Filter the dataframe based on selected methods | |
| filtered_df = df[df['method_name'].isin(methods_selected)] | |
| def get_method_color(method): | |
| return color_dict.get(method.upper(), 'black') | |
| # Add a new column to the dataframe for the color | |
| filtered_df['color'] = filtered_df['method_name'].apply(get_method_color) | |
| adjust_text_dict = { | |
| 'expand_text': (1.15, 1.4), 'expand_points': (1.15, 1.25), 'expand_objects': (1.05, 1.5), | |
| 'expand_align': (1.05, 1.2), 'autoalign': 'xy', 'va': 'center', 'ha': 'center', | |
| 'force_text': (.0, 1.), 'force_objects': (.0, 1.), | |
| 'lim': 500000, 'precision': 1., 'avoid_points': True, 'avoid_text': True | |
| } | |
| # Create the scatter plot using plotnine (ggplot) | |
| g = (p9.ggplot(data=filtered_df, | |
| mapping=p9.aes(x=x_metric, # Use the selected x_metric | |
| y=y_metric, # Use the selected y_metric | |
| color='color', # Use the dynamically generated color | |
| label='method_names')) # Label each point by the method name | |
| + p9.geom_point(size=3) # Add points with no jitter, set point size | |
| + p9.geom_text(nudge_y=0.02, size=8) # Add method names as labels, nudge slightly above the points | |
| + p9.labs(title=title, x=f"{x_metric}", y=f"{y_metric}") # Dynamic labels for X and Y axes | |
| + p9.scale_color_identity() # Use colors directly from the dataframe | |
| + p9.theme(legend_position='none', | |
| figure_size=(8, 8), # Set figure size | |
| axis_text=p9.element_text(size=10), | |
| axis_title_x=p9.element_text(size=12), | |
| axis_title_y=p9.element_text(size=12)) | |
| ) | |
| # Save the plot as an image | |
| save_path = "./plot_images" # Ensure this folder exists or adjust the path | |
| os.makedirs(save_path, exist_ok=True) # Create directory if it doesn't exist | |
| filename = os.path.join(save_path, title.replace(" ", "_") + "_Similarity_Scatter.png") | |
| g.save(filename=filename, dpi=400) | |
| return filename | |
| def benchmark_plot(benchmark_type, methods_selected, x_metric, y_metric): | |
| if benchmark_type == 'flexible': | |
| # Use general visualizer logic | |
| return general_visualizer_plot(methods_selected, x_metric=x_metric, y_metric=y_metric) | |
| elif benchmark_type == 'similarity': | |
| title = f"{x_metric} vs {y_metric}" | |
| return draw_scatter_plot_similarity(methods_selected, x_metric, y_metric, title) | |
| elif benchmark_type == 'Benchmark 3': | |
| return benchmark_3_plot(x_metric, y_metric) | |
| elif benchmark_type == 'Benchmark 4': | |
| return benchmark_4_plot(x_metric, y_metric) | |
| else: | |
| return "Invalid benchmark type selected." | |
| def get_baseline_df(selected_methods, selected_metrics): | |
| df = pd.read_csv(CSV_RESULT_PATH) | |
| present_columns = ["method_name"] + selected_metrics | |
| df = df[df['method_name'].isin(selected_methods)][present_columns] | |
| return df | |
| def general_visualizer(methods_selected, x_metric, y_metric): | |
| df = pd.read_csv(CSV_RESULT_PATH) | |
| filtered_df = df[df['method_name'].isin(methods_selected)] | |
| # Create a Seaborn lineplot with method as hue | |
| plt.figure(figsize=(10, 8)) # Increase figure size | |
| sns.lineplot( | |
| data=filtered_df, | |
| x=x_metric, | |
| y=y_metric, | |
| hue="method_name", # Different colors for different methods | |
| marker="o", # Add markers to the line plot | |
| ) | |
| # Add labels and title | |
| plt.xlabel(x_metric) | |
| plt.ylabel(y_metric) | |
| plt.title(f'{y_metric} vs {x_metric} for selected methods') | |
| plt.grid(True) | |
| # Save the plot to display it in Gradio | |
| plot_path = "plot.png" | |
| plt.savefig(plot_path) | |
| plt.close() | |
| return plot_path | |
| def add_new_eval( | |
| human_file, | |
| skempi_file, | |
| model_name_textbox: str, | |
| revision_name_textbox: str, | |
| benchmark_type, | |
| similarity_tasks, | |
| function_prediction_aspect, | |
| function_prediction_dataset, | |
| family_prediction_dataset, | |
| ): | |
| representation_name = model_name_textbox if revision_name_textbox == '' else revision_name_textbox | |
| results = run_probe(benchmark_type, representation_name, human_file, skempi_file, similarity_tasks, function_prediction_aspect, function_prediction_dataset, family_prediction_dataset) | |
| return None | |
| # Function to update leaderboard dynamically based on user selection | |
| def update_leaderboard(selected_methods, selected_metrics): | |
| return get_baseline_df(selected_methods, selected_metrics) | |
| block = gr.Blocks() | |
| with block: | |
| gr.Markdown(LEADERBOARD_INTRODUCTION) | |
| with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
| # table jmmmu bench | |
| with gr.TabItem("🏅 PROBE Leaderboard", elem_id="probe-benchmark-tab-table", id=1): | |
| method_names = pd.read_csv(CSV_RESULT_PATH)['method_name'].unique().tolist() | |
| metric_names = pd.read_csv(CSV_RESULT_PATH).columns.tolist() | |
| metrics_with_method = metric_names.copy() | |
| metric_names.remove('method_name') # Remove method_name from the metric options | |
| # Leaderboard section with method and metric selectors | |
| with gr.Row(): | |
| # Add method and metric selectors for leaderboard | |
| leaderboard_method_selector = gr.CheckboxGroup( | |
| choices=method_names, label="Select method_names for Leaderboard", value=method_names, interactive=True | |
| ) | |
| leaderboard_metric_selector = gr.CheckboxGroup( | |
| choices=metric_names, label="Select Metrics for Leaderboard", value=metric_names, interactive=True | |
| ) | |
| # Display the filtered leaderboard | |
| baseline_value = get_baseline_df(method_names, metric_names) | |
| baseline_header = ["method_name"] + metric_names | |
| baseline_datatype = ['markdown'] + ['number'] * len(metric_names) | |
| data_component = gr.components.Dataframe( | |
| value=baseline_value, | |
| headers=baseline_header, | |
| type="pandas", | |
| datatype=baseline_datatype, | |
| interactive=False, | |
| visible=True, | |
| ) | |
| # Update leaderboard when method/metric selection changes | |
| leaderboard_method_selector.change( | |
| update_leaderboard, | |
| inputs=[leaderboard_method_selector, leaderboard_metric_selector], | |
| outputs=data_component | |
| ) | |
| leaderboard_metric_selector.change( | |
| update_leaderboard, | |
| inputs=[leaderboard_method_selector, leaderboard_metric_selector], | |
| outputs=data_component | |
| ) | |
| # Dropdown for benchmark type | |
| benchmark_types = TASK_INFO + ['flexible'] | |
| benchmark_type_selector = gr.Dropdown(choices=benchmark_types, label="Select Benchmark Type for Visualization", value="flexible") | |
| # Dynamic metric selectors (will be updated based on benchmark type) | |
| x_metric_selector = gr.Dropdown(choices=[], label="Select X-axis Metric") | |
| y_metric_selector = gr.Dropdown(choices=[], label="Select Y-axis Metric") | |
| method_selector = gr.CheckboxGroup(choices=method_names, label="Select methods to visualize", interactive=True, value=method_names) | |
| # Button to draw the plot for the selected benchmark | |
| plot_button = gr.Button("Plot Visualization") | |
| plot_output = gr.Image(label="Plot") | |
| # Update metric selectors when benchmark type is chosen | |
| def update_metric_choices(benchmark_type): | |
| if benchmark_type == 'flexible': | |
| # Show all metrics for the flexible visualizer | |
| metric_names = df.columns.tolist() | |
| return gr.update(choices=metric_names, value=metric_names[0]), gr.update(choices=metric_names, value=metric_names[1]) | |
| elif benchmark_type in benchmark_specific_metrics: | |
| metrics = benchmark_specific_metrics[benchmark_type] | |
| return gr.update(choices=metrics, value=metrics[0]), gr.update(choices=metrics) | |
| return gr.update(choices=[]), gr.update(choices=[]) | |
| benchmark_type_selector.change( | |
| update_metric_choices, | |
| inputs=[benchmark_type_selector], | |
| outputs=[x_metric_selector, y_metric_selector] | |
| ) | |
| # Generate the plot based on user input | |
| plot_button.click( | |
| benchmark_plot, | |
| inputs=[benchmark_type_selector, method_selector, x_metric_selector, y_metric_selector], | |
| outputs=plot_output | |
| ) | |
| with gr.TabItem("📝 About", elem_id="probe-benchmark-tab-table", id=2): | |
| with gr.Row(): | |
| gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | |
| with gr.TabItem("🚀 Submit here! ", elem_id="probe-benchmark-tab-table", id=3): | |
| with gr.Row(): | |
| gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") | |
| with gr.Row(): | |
| gr.Markdown("# ✉️✨ Submit your model's representation files here!", elem_classes="markdown-text") | |
| with gr.Row(): | |
| with gr.Column(): | |
| model_name_textbox = gr.Textbox( | |
| label="Model name", | |
| ) | |
| revision_name_textbox = gr.Textbox( | |
| label="Revision Model Name", | |
| ) | |
| benchmark_type = gr.CheckboxGroup( | |
| choices=TASK_INFO, | |
| label="Benchmark Type", | |
| interactive=True, | |
| ) | |
| similarity_tasks = gr.CheckboxGroup( | |
| choices=similarity_tasks_options, | |
| label="Select Similarity Tasks", | |
| interactive=True, | |
| ) | |
| function_prediction_aspect = gr.Radio( | |
| choices=function_prediction_aspect_options, | |
| label="Select Function Prediction Aspect", | |
| interactive=True, | |
| ) | |
| function_prediction_dataset = gr.Radio( | |
| choices=function_prediction_dataset_options, | |
| label="Select Function Prediction Dataset", | |
| interactive=True, | |
| ) | |
| family_prediction_dataset = gr.CheckboxGroup( | |
| choices=family_prediction_dataset_options, | |
| label="Select Family Prediction Dataset", | |
| interactive=True, | |
| ) | |
| with gr.Column(): | |
| human_file = gr.components.File(label="Click to Upload the representation file (csv) for Human dataset", file_count="single", type='filepath') | |
| skempi_file = gr.components.File(label="Click to Upload the representation file (csv) for SKEMPI dataset", file_count="single", type='filepath') | |
| submit_button = gr.Button("Submit Eval") | |
| submission_result = gr.Markdown() | |
| submit_button.click( | |
| add_new_eval, | |
| inputs=[ | |
| human_file, | |
| skempi_file, | |
| model_name_textbox, | |
| revision_name_textbox, | |
| benchmark_type, | |
| similarity_tasks, | |
| function_prediction_aspect, | |
| function_prediction_dataset, | |
| family_prediction_dataset, | |
| ], | |
| ) | |
| def refresh_data(): | |
| value = get_baseline_df(method_names, metric_names) | |
| return value | |
| with gr.Row(): | |
| data_run = gr.Button("Refresh") | |
| data_run.click(refresh_data, outputs=[data_component]) | |
| with gr.Accordion("Citation", open=False): | |
| citation_button = gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| label=CITATION_BUTTON_LABEL, | |
| elem_id="citation-button", | |
| show_copy_button=True, | |
| ) | |
| block.launch() | |