Spaces:
Running
Running
| import os | |
| import base64 | |
| import gradio as gr | |
| import pandas as pd | |
| from apscheduler.schedulers.background import BackgroundScheduler | |
| import numpy as np | |
| from src.about import ( | |
| CITATION_BUTTON_LABEL, | |
| CITATION_BUTTON_TEXT, | |
| ) | |
| from src.display.css_html_js import custom_css | |
| from src.envs import API, REPO_ID | |
| current_dir = os.path.dirname(os.path.realpath(__file__)) | |
| with open(os.path.join(current_dir, "images/pb_logo.png"), "rb") as image_file: | |
| main_logo = base64.b64encode(image_file.read()).decode('utf-8') | |
| def restart_space(): | |
| API.restart_space(repo_id=REPO_ID) | |
| TITLE=""" | |
| # ProteinBench: A Holistic Evaluation of Protein Foundation Models""" | |
| INTRO_TEXT=""" | |
| Recent years have witnessed a surge in the development of protein foundation models, | |
| significantly improving performance in protein prediction and generative tasks | |
| ranging from 3D structure prediction and protein design to conformational dynamics. | |
| However, the capabilities and limitations associated with these models remain poorly understood due to the absence of a unified evaluation framework. | |
| To fill this gap, we introduce <b>ProteinBench</b>, | |
| a holistic evaluation framework designed to enhance the transparency of protein foundation models. | |
| Our approach consists of three key components: | |
| (i) A taxonomic classification of tasks that broadly encompass the main challenges in the protein domain, | |
| based on the relationships between different protein modalities; | |
| (ii) A multi-metric evaluation approach that assesses performance across four key dimensions: quality, novelty, diversity, and robustness; | |
| and (iii) In-depth analyses from various user objectives, providing a holistic view of model performance. | |
| Our comprehensive evaluation of protein foundation models reveals several key findings that shed light on their current capabilities and limitations. | |
| To promote transparency and facilitate further research, we release the evaluation dataset, code, and a public leaderboard publicly for further analysis | |
| and a general modular toolkit. We intend for ProteinBench to be a living benchmark for establishing a standardized, | |
| in-depth evaluation framework for protein foundation models, driving their development and application while fostering collaboration within the field. | |
| ## [Paper](https://www.arxiv.org/pdf/2409.06744) | [Website](https://proteinbench.github.io/) | |
| """ | |
| def convert_to_float(df): | |
| columns = df.columns | |
| for col in columns[1:]: | |
| df[col] = df[col].astype('float') | |
| return df | |
| # ### Space initialisation | |
| demo = gr.Blocks(css=custom_css) | |
| with demo: | |
| with gr.Row(): | |
| with gr.Column(scale=6): | |
| gr.Markdown(TITLE) | |
| with gr.Row(): | |
| with gr.Column(scale=6): | |
| gr.Markdown(INTRO_TEXT) | |
| with gr.Column(scale=1): | |
| gr.HTML(f'<img src="data:image/jpeg;base64,{main_logo}" style="width:16em;vertical-align: middle"/>') | |
| with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
| with gr.TabItem("π Inverse Folding Leaderboard", elem_id='inverse-folding-table', id=0,): | |
| with gr.Row(): | |
| inverse_folding_csv = pd.read_csv('data/inverse_folding.csv') | |
| print(convert_to_float(inverse_folding_csv)) | |
| inverse_folding_table = gr.components.DataFrame( | |
| value=convert_to_float(inverse_folding_csv).values, | |
| height=99999, | |
| interactive=False, | |
| headers=inverse_folding_csv.columns.to_list(), | |
| datatype=['markdown'] + (len(inverse_folding_csv.columns)-1) * ['number'], | |
| ) | |
| with gr.TabItem("π Structure Design Leaderboard", elem_id='structure-design-table', id=1,): | |
| with gr.Row(): | |
| structure_design_csv = pd.read_csv('data/structure_design.csv') | |
| structure_design_table = gr.components.DataFrame( | |
| value=convert_to_float(structure_design_csv).values, | |
| height=99999, | |
| interactive=False, | |
| headers=structure_design_csv.columns.to_list(), | |
| datatype=['markdown'] + (len(structure_design_csv.columns)-1) * ['number'], | |
| ) | |
| with gr.TabItem("π Sequence Design Leaderboard", elem_id='sequence-design-table', id=2,): | |
| with gr.Row(): | |
| sequence_design_csv = pd.read_csv('data/sequence_design.csv') | |
| sequence_design_table = gr.components.DataFrame( | |
| value=convert_to_float(sequence_design_csv).values, | |
| height=99999, | |
| interactive=False, | |
| headers=sequence_design_csv.columns.to_list(), | |
| datatype=['markdown'] + (len(sequence_design_csv.columns)-1) * ['number'], | |
| ) | |
| with gr.TabItem("π Sequence-Structure Co-Design Leaderboard", elem_id='co-design-table', id=3,): | |
| with gr.Row(): | |
| co_design_csv = pd.read_csv('data/co_design.csv') | |
| co_design_table = gr.components.DataFrame( | |
| value=convert_to_float(co_design_csv).values, | |
| height=99999, | |
| interactive=False, | |
| headers=co_design_csv.columns.to_list(), | |
| datatype=['markdown'] + (len(co_design_csv.columns)-1) * ['number'], | |
| ) | |
| with gr.TabItem("π Motif Scaffolding Leaderboard", elem_id='motif-scaffolding-table', id=4,): | |
| with gr.Row(): | |
| motif_scaffolding_csv = pd.read_csv('data/motif_scaffolding.csv') | |
| motif_scaffolding_table = gr.components.DataFrame( | |
| value=convert_to_float(motif_scaffolding_csv).values, | |
| height=99999, | |
| interactive=False, | |
| headers=motif_scaffolding_csv.columns.to_list(), | |
| datatype=['markdown'] + (len(motif_scaffolding_csv.columns)-1) * ['number'], | |
| ) | |
| with gr.TabItem("π Antibody Design Leaderboard", elem_id='antibody-design-table', id=5,): | |
| with gr.Row(): | |
| antibody_design_csv = pd.read_csv('data/antibody_design.csv') | |
| antibody_design_table = gr.components.DataFrame( | |
| value=convert_to_float(antibody_design_csv).values, | |
| height=99999, | |
| interactive=False, | |
| headers=antibody_design_csv.columns.to_list(), | |
| datatype=['markdown'] + (len(antibody_design_csv.columns)-1) * ['number'], | |
| ) | |
| with gr.TabItem("π Protein Folding Leaderboard", elem_id='protein-folding-table', id=6,): | |
| with gr.Row(): | |
| protein_folding_csv = pd.read_csv('data/protein_folding.csv') | |
| protein_folding_table = gr.components.DataFrame( | |
| value=convert_to_float(protein_folding_csv).values, | |
| height=99999, | |
| interactive=False, | |
| headers=protein_folding_csv.columns.to_list(), | |
| datatype=['markdown'] + (len(protein_folding_csv.columns)-1) * ['number'], | |
| ) | |
| with gr.TabItem("π Multi-State Prediction Leaderboard", elem_id='multi-state-prediction-table', id=7,): | |
| with gr.Row(): | |
| multi_state_prediction_csv = pd.read_csv('data/multi_state_prediction.csv') | |
| multi_state_prediction_table = gr.components.DataFrame( | |
| value=convert_to_float(multi_state_prediction_csv).values, | |
| height=99999, | |
| interactive=False, | |
| headers=multi_state_prediction_csv.columns.to_list(), | |
| datatype=['markdown'] + (len(multi_state_prediction_csv.columns)-1) * ['number'], | |
| ) | |
| with gr.TabItem("π Conformation Prediction Leaderboard", elem_id='conformation-prediction-table', id=8,): | |
| with gr.Row(): | |
| conformation_prediction_csv = pd.read_csv('data/conformation_prediction.csv') | |
| conformation_prediction_table = gr.components.DataFrame( | |
| value=convert_to_float(conformation_prediction_csv).values, | |
| height=99999, | |
| interactive=False, | |
| headers=conformation_prediction_csv.columns.to_list(), | |
| datatype=['markdown'] + (len(conformation_prediction_csv.columns)-1) * ['number'], | |
| ) | |
| with gr.Row(): | |
| with gr.Accordion("π Citation", open=True): | |
| citation_button = gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| label=CITATION_BUTTON_LABEL, | |
| lines=9, | |
| elem_id="citation-button", | |
| show_copy_button=True, | |
| ) | |
| scheduler = BackgroundScheduler() | |
| scheduler.add_job(restart_space, "interval", seconds=1800) | |
| scheduler.start() | |
| demo.queue(default_concurrency_limit=40).launch() |