|
|
import gradio as gr |
|
|
from gradio_leaderboard import Leaderboard, ColumnFilter |
|
|
import pandas as pd |
|
|
|
|
|
from about import submissions_repo, results_repo |
|
|
from evaluate import submit_data, evaluate_data |
|
|
|
|
|
from datasets import load_dataset |
|
|
from datetime import datetime |
|
|
from about import ENDPOINTS, API |
|
|
|
|
|
def make_user_clickable(name): |
|
|
link =f'https://huggingface.co/{name}' |
|
|
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{name}</a>' |
|
|
def make_tag_clickable(tag): |
|
|
return f'<a target="_blank" href="{tag}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">link</a>' |
|
|
|
|
|
def get_leaderboard(endpoint): |
|
|
dset = load_dataset(results_repo, split='train', download_mode="force_redownload") |
|
|
full_df = dset.to_pandas() |
|
|
to_show = full_df.copy(deep=True) |
|
|
to_show = to_show[to_show['endpoint'] == endpoint] |
|
|
to_show = to_show[to_show['user'] != 'test'] |
|
|
to_show['user'] = to_show['user'].apply(lambda x: make_user_clickable(x)).astype(str) |
|
|
to_show['model details'] = to_show['model_report'].apply(lambda x: make_tag_clickable(x)).astype(str) |
|
|
|
|
|
to_show["submission_time"] = pd.to_datetime(to_show["submission_time"]) |
|
|
latest_per_user = to_show.loc[to_show.groupby("user")["submission_time"].idxmax()].reset_index(drop=True) |
|
|
latest_per_user.rename(columns={"submission_time": "submission time"}, inplace=True) |
|
|
|
|
|
df = latest_per_user[["user", "MAE", "R2", "Spearman R", "Kendall's Tau", "submission time", "model details"]] |
|
|
|
|
|
return df |
|
|
|
|
|
|
|
|
def gradio_interface(): |
|
|
with gr.Blocks(title="OpenADMET ADMET Challenge") as demo: |
|
|
|
|
|
gr.Markdown("## Welcome to the OpenADMET + XXX Blind Challenge!") |
|
|
|
|
|
|
|
|
welcome_md = """ |
|
|
# ๐ OpenADMET + XXX |
|
|
## Computational Blind Challenge in ADMET |
|
|
|
|
|
Welcome to the **XXX**, hosted by **OpenADMET** in collaboration with **XXX**. |
|
|
This is a community-driven initiative to benchmark predictive models for ADMET properties in drug discovery. |
|
|
|
|
|
Your task is to develop and submit predictive models for key ADMET properties on a blinded test set of real world drug discovery data. |
|
|
|
|
|
## ADMET Properties: |
|
|
*Absorption*, *Distribution*, *Metabolism*, *Excretion*, *Toxicology*--or **ADMET**--endpoints sit in the middle of the assay cascade and can make or break preclinical candidate molecules. |
|
|
For this blind challenge we selected several crucial endpoints for the community to predict: |
|
|
- LogD |
|
|
- Kinetic Solubility **KSOL**: uM |
|
|
- Mouse Liver Microsomal (**MLM**) *CLint*: mL/min/kg |
|
|
- Human Liver Microsomal (**HLM**) *Clint*: mL/min/kg |
|
|
- Caco-2 Efflux Ratio |
|
|
- Caco-2 Papp A>B (10^-6 cm/s) |
|
|
- Mouse Plasma Protein Binding (**MPPB**): % Unbound |
|
|
- Mouse Brain Protein Binding (**MBPB**): % Unbound |
|
|
- Rat Liver Microsomal (**RLM**) *Clint*: mL/min/kg |
|
|
- Mouse Gastrocnemius Muscle Binding (**MGMB**): % Unbound |
|
|
|
|
|
## โ
How to Participate |
|
|
1. **Register**: Create an account with Hugging Face. |
|
|
2. **Download the Public Dataset**: Clone the XXX dataset [link] |
|
|
3. **Train Your Model**: Use the provided training data for each ADMET property of your choice. |
|
|
4. **Submit Predictions**: Follow the instructions in the *Submit* tab to upload your predictions. |
|
|
5. Join the discussion on the [Challenge Discord](link)! |
|
|
|
|
|
## ๐ Data: |
|
|
|
|
|
The training set will have the following variables: |
|
|
|
|
|
| Column | Unit | data type | Description | |
|
|
|:-----------------------------|-----------|-----------|:-------------| |
|
|
| Molecule Name | | str | Identifier for the molecule | |
|
|
| Smiles | | str | Text representation of the 2D molecular structure | |
|
|
| LogD | | float | LogD calculation | |
|
|
| KSol | uM | float | Kinetic Solubility | |
|
|
| MLM CLint | mL/min/kg | float | Mouse Liver Microsomal | |
|
|
| HLM CLint | mL/min/kg | float | Human Liver Microsomal | |
|
|
| Caco-2 Permeability Efflux | | float | Caco-2 Permeability Efflux | |
|
|
| Caco-2 Permeability Papp A>B | 10^-6 cm/s| float | Caco-2 Permeability Papp A>B | |
|
|
| MPPB | % Unbound | float | Mouse Plasma Protein Binding | |
|
|
| MBPB | % Unbound | float | Mouse Brain Protein Binding | |
|
|
| RLM CLint | mL/min/kg | float | Rat Liver Microsomal Stability | |
|
|
| MGMB. | % Unbound | float | Mouse Gastrocnemius Muscle Binding | |
|
|
|
|
|
At test time, we will only provide the Molecule Name and Smiles. Make sure your submission file has the same columns! |
|
|
|
|
|
## ๐ Evaluation |
|
|
The challenge will be judged based on the judging criteria outlined here. |
|
|
|
|
|
- TBD |
|
|
|
|
|
๐
**Timeline**: |
|
|
- TBD |
|
|
|
|
|
--- |
|
|
|
|
|
""" |
|
|
|
|
|
|
|
|
gr.HTML(""" |
|
|
<style> |
|
|
/* bold only the "Overall" tab label */ |
|
|
#lb_subtabs [role="tab"][aria-controls="all_tab"] { |
|
|
font-weight: 700 !important; |
|
|
} |
|
|
</style> |
|
|
""") |
|
|
with gr.Tabs(elem_classes="tab-buttons"): |
|
|
lboard_dict = {} |
|
|
|
|
|
with gr.TabItem("๐About"): |
|
|
gr.Markdown(welcome_md) |
|
|
|
|
|
with gr.TabItem("๐Leaderboard", elem_id="lb_subtabs"): |
|
|
gr.Markdown("View the leaderboard for each ADMET endpoint by selecting the appropiate tab.") |
|
|
|
|
|
|
|
|
with gr.TabItem('OVERALL', elem_id="all_tab"): |
|
|
lboard_dict['Average'] = Leaderboard( |
|
|
value=get_leaderboard('Average'), |
|
|
datatype=['markdown', 'number', 'number', 'number', 'number', 'str', 'markdown'], |
|
|
select_columns=["user", "MAE", "R2", "Spearman R", "Kendall's Tau", "submission time", "model details"], |
|
|
search_columns=["user"], |
|
|
render=True |
|
|
) |
|
|
|
|
|
for endpoint in ENDPOINTS: |
|
|
with gr.TabItem(endpoint): |
|
|
lboard_dict[endpoint] = Leaderboard( |
|
|
value=get_leaderboard(endpoint), |
|
|
datatype=['markdown', 'number', 'number', 'number', 'number', 'str', 'markdown'], |
|
|
select_columns=["user", "MAE", "R2", "Spearman R", "Kendall's Tau", "submission time", "model details"], |
|
|
search_columns=["user"], |
|
|
render=True |
|
|
) |
|
|
|
|
|
|
|
|
with gr.TabItem("Submit Predictions"): |
|
|
gr.Markdown( |
|
|
""" |
|
|
# ADMET Endpoints Submission |
|
|
Upload your prediction files here as a csv file. |
|
|
""" |
|
|
) |
|
|
filename = gr.State(value=None) |
|
|
eval_state = gr.State(value=None) |
|
|
user_state = gr.State(value=None) |
|
|
|
|
|
with gr.Row(): |
|
|
|
|
|
with gr.Column(): |
|
|
gr.Markdown( |
|
|
""" |
|
|
## Participant Information |
|
|
To participate, we **only** require a Hugging Face username, which will be displayed on the leaderboard. |
|
|
Other information is optional but helps us track participation. |
|
|
If you wish to be included in Challenge discussions, please provide your Discord username and email. |
|
|
If you wish to be included in a future publication with the Challenge results, please provide your name and affiliation. |
|
|
|
|
|
We also ask you to provide a link to a report decribing your method. While not mandatory at the time of participation, |
|
|
you need to submit the link before the challenge deadline in order to be considered for the final leaderboard. |
|
|
|
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
username_input = gr.Textbox( |
|
|
label="Username", |
|
|
placeholder="Enter your Hugging Face username", |
|
|
info="This will be displayed on the leaderboard." |
|
|
) |
|
|
with gr.Column(): |
|
|
|
|
|
participant_name = gr.Textbox( |
|
|
label="Participant Name", |
|
|
placeholder="Enter your name (optional)", |
|
|
info="This will not be displayed on the leaderboard but will be used for tracking participation." |
|
|
) |
|
|
discord_username= gr.Textbox( |
|
|
label="Discord Username", |
|
|
placeholder="Enter your Discord username (optional)", |
|
|
info="Enter the username you will use for the Discord channel (if you are planning to engage in the discussion)." |
|
|
) |
|
|
email = gr.Textbox( |
|
|
label="Email", |
|
|
placeholder="Enter your email (optional)", |
|
|
) |
|
|
affiliation = gr.Textbox( |
|
|
label="Affiliation", |
|
|
placeholder="Enter your school/company affiliation (optional)", |
|
|
) |
|
|
model_tag = gr.Textbox( |
|
|
label="Model Report", |
|
|
placeholder="Link to a report describing your method (optional)", |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
gr.Markdown( |
|
|
""" |
|
|
## Submission Instructions |
|
|
Upload a single CSV file containing your predictions for all ligands in the test set. |
|
|
Only your latest submission will be considered. |
|
|
|
|
|
You can download the ligand test set here (lik/to/download/smiles/csv). |
|
|
""" |
|
|
) |
|
|
with gr.Column(): |
|
|
predictions_file = gr.File(label="Single file with ADMET predictions (.csv)", |
|
|
file_types=[".csv"], |
|
|
file_count="single",) |
|
|
|
|
|
username_input.change( |
|
|
fn=lambda x: x if x.strip() else None, |
|
|
inputs=username_input, |
|
|
outputs=user_state |
|
|
) |
|
|
|
|
|
submit_btn = gr.Button("Submit Predictions") |
|
|
message = gr.Textbox(label="Status", lines=1, visible=False) |
|
|
|
|
|
|
|
|
submit_btn.click( |
|
|
submit_data, |
|
|
inputs=[predictions_file, user_state, participant_name, discord_username, email, affiliation, model_tag], |
|
|
outputs=[message, filename], |
|
|
).success( |
|
|
fn=lambda m: gr.update(value=m, visible=True), |
|
|
inputs=[message], |
|
|
outputs=[message], |
|
|
).success( |
|
|
fn=evaluate_data, |
|
|
inputs=[filename], |
|
|
outputs=[eval_state] |
|
|
) |
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
|
gradio_interface().launch() |