|
import gradio as gr |
|
from gradio_leaderboard import Leaderboard, ColumnFilter |
|
import pandas as pd |
|
|
|
from about import submissions_repo, results_repo |
|
from evaluate import submit_data, evaluate_data |
|
|
|
from datasets import load_dataset |
|
from datetime import datetime |
|
from about import ENDPOINTS |
|
|
|
|
|
def get_leaderboard(dset): |
|
dset = load_dataset(results_repo, split='train', download_mode="force_redownload") |
|
full_df = pd.DataFrame(dset) |
|
to_show = full_df.copy(deep=True) |
|
to_show = to_show[to_show['user'] != 'test'] |
|
|
|
to_show = to_show[["user", "Model", "MAE", "R2", "Spearman R", "Kendall's Tau"]] |
|
|
|
return to_show |
|
|
|
def gradio_interface(): |
|
with gr.Blocks(title="OpenADMET ADMET Challenge") as demo: |
|
|
|
gr.Markdown("## Welcome to the OpenADMET + XXX Blind Challenge!") |
|
|
|
|
|
welcome_md = """ |
|
# ๐ OpenADMET + XXX |
|
## Computational Blind Challenge in ADMET |
|
|
|
Welcome to the **XXX**, hosted by **OpenADMET** in collaboration with **XXX**. |
|
This is a community-driven initiative to benchmark predictive models for ADMET properties in drug discovery. |
|
|
|
Your task is to develop and submit predictive models for key ADMET properties on a blinded test set of real world drug discovery data. |
|
|
|
## ADMET Properties: |
|
*Absorption*, *Distribution*, *Metabolism*, *Excretion*, *Toxicology*--or **ADMET**--endpoints sit in the middle of the assay cascade and can make or break preclinical candidate molecules. |
|
For this blind challenge we selected several crucial endpoints for the community to predict: |
|
- LogD |
|
- Kinetic Solubility **KSOL**: uM |
|
- Mouse Liver Microsomal (**MLM**) *CLint*: mL/min/kg |
|
- Human Liver Microsomal (**HLM**) *Clint*: mL/min/kg |
|
- Caco-2 Efflux Ratio |
|
- Caco-2 Papp A>B (10^-6 cm/s) |
|
- Mouse Plasma Protein Binding (**MPPB**): % Unbound |
|
- Mouse Brain Protein Binding (**MBPB**): % Unbound |
|
- Rat Liver Microsomal (**RLM**) *Clint*: mL/min/kg |
|
- Mouse Gastrocnemius Muscle Binding (**MGMB**): % Unbound |
|
|
|
## โ
How to Participate |
|
1. **Register**: Create an account with Hugging Face. |
|
2. **Download the Public Dataset**: Clone the XXX dataset [link] |
|
3. **Train Your Model**: Use the provided training data for each ADMET property of your choice. |
|
4. **Submit Predictions**: Follow the instructions in the *Submit* tab to upload your predictions. |
|
5. Join the discussion on the [Challenge Discord](link)! |
|
|
|
## ๐ Data: |
|
|
|
The training set will have the following variables: |
|
|
|
| Column | Unit | data type | Description | |
|
|:-----------------------------|-----------|-----------|:-------------| |
|
| Molecule Name | | str | Identifier for the molecule | |
|
| Smiles | | str | Text representation of the 2D molecular structure | |
|
| LogD | | float | LogD calculation | |
|
| KSol | uM | float | Kinetic Solubility | |
|
| MLM CLint | mL/min/kg | float | Mouse Liver Microsomal | |
|
| HLM CLint | mL/min/kg | float | Human Liver Microsomal | |
|
| Caco-2 Permeability Efflux | | float | Caco-2 Permeability Efflux | |
|
| Caco-2 Permeability Papp A>B | 10^-6 cm/s| float | Caco-2 Permeability Papp A>B | |
|
| MPPB | % Unbound | float | Mouse Plasma Protein Binding | |
|
| MBPB | % Unbound | float | Mouse Brain Protein Binding | |
|
| RLM CLint | mL/min/kg | float | Rat Liver Microsomal Stability | |
|
| MGMB. | % Unbound | float | Mouse Gastrocnemius Muscle Binding | |
|
|
|
At test time, we will only provide the Molecule Name and Smiles. Make sure your submission file has the same columns! |
|
|
|
## ๐ Evaluation |
|
The challenge will be judged based on the judging criteria outlined here. |
|
|
|
- TBD |
|
|
|
๐
**Timeline**: |
|
- TBD |
|
|
|
--- |
|
|
|
""" |
|
|
|
|
|
with gr.Tabs(elem_classes="tab-buttons"): |
|
|
|
with gr.TabItem("๐About"): |
|
gr.Markdown(welcome_md) |
|
|
|
with gr.TabItem("๐Leaderboard"): |
|
gr.Markdown("View the leaderboard for each ADMET endpoint by selecting the appropiate tab.") |
|
df1 = pd.DataFrame({ |
|
"user": ["User1", "User2", "User3"], |
|
"MAE": [0.1, 0.2, 0.15], |
|
"R2": [0.94, 0.92, 0.89], |
|
"Spearman R": [0.93, 0.91, 0.88], |
|
"Kendall's Tau": [0.90, 0.89, 0.85], |
|
}) |
|
df2 = pd.DataFrame({ |
|
"user": ["User1", "User2", "User3"], |
|
"MAE": [0.2, 0.3, 0.15], |
|
"R2": [0.2, 0.72, 0.89], |
|
"Spearman R": [0.91, 0.71, 0.68], |
|
"Kendall's Tau": [0.90, 0.4, 0.7], |
|
}) |
|
|
|
mock_data = [df1, df1, df2, df1, df2, df1, df1, df2, df1, df2] |
|
for i, endpoint in enumerate(ENDPOINTS): |
|
df = mock_data[i] |
|
with gr.TabItem(endpoint): |
|
Leaderboard( |
|
value=df, |
|
datatype=['str', 'number', 'number', 'number', 'number'], |
|
select_columns=["user", "MAE", "R2", "Spearman R", "Kendall's Tau"], |
|
search_columns=["user"], |
|
every=60, |
|
) |
|
|
|
with gr.TabItem("Submit Predictions"): |
|
gr.Markdown( |
|
""" |
|
# ADME Endpoints Submission |
|
Upload your prediction files here as a csv file. |
|
""" |
|
) |
|
filename = gr.State(value=None) |
|
eval_state = gr.State(value=None) |
|
user_state = gr.State(value=None) |
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(): |
|
gr.Markdown( |
|
""" |
|
## Participant Information |
|
To participate, you must enter a Hugging Face username, or alias, which will be displayed on the leaderboard. |
|
Other information is optional but helps us track participation. |
|
If you wish to be included in Challenge discussions, please provide your Discord username and email. |
|
If you wish to be included in a future publication with the Challenge results, please provide your name and affiliation. |
|
""" |
|
) |
|
|
|
|
|
|
|
|
|
|
|
username_input = gr.Textbox( |
|
label="Username", |
|
placeholder="Enter your Hugging Face username", |
|
info="This will be displayed on the leaderboard." |
|
) |
|
with gr.Column(): |
|
|
|
participant_name = gr.Textbox( |
|
label="Participant Name", |
|
placeholder="Enter your name (optional)", |
|
info="This will not be displayed on the leaderboard but will be used for tracking participation." |
|
) |
|
discord_username= gr.Textbox( |
|
label="Discord Username", |
|
placeholder="Enter your Discord username (optional)", |
|
info="Enter the username you will use for the Discord channel (if you are planning to engage in the discussion)." |
|
) |
|
email = gr.Textbox( |
|
label="Email", |
|
placeholder="Enter your email (optional)", |
|
) |
|
affiliation = gr.Textbox( |
|
label="Affiliation", |
|
placeholder="Enter your school/company affiliation (optional)", |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown( |
|
""" |
|
## Submission Instructions |
|
Upload a single CSV file containing your predictions for all ligands in the test set. |
|
You can download the ligand test set here (lik/to/download/smiles/csv). |
|
""" |
|
) |
|
with gr.Column(): |
|
predictions_file = gr.File(label="Single file with ADME predictions (.csv)", |
|
file_types=[".csv"], |
|
file_count="single",) |
|
|
|
username_input.change( |
|
fn=lambda x: x if x.strip() else None, |
|
inputs=username_input, |
|
outputs=user_state |
|
) |
|
|
|
submit_btn = gr.Button("Submit Predictions") |
|
message = gr.Textbox(label="Status", lines=1, visible=False) |
|
|
|
submit_btn.click( |
|
submit_data, |
|
inputs=[predictions_file, user_state, participant_name, discord_username, email, affiliation], |
|
outputs=[message], |
|
).success( |
|
fn=lambda m: gr.update(value=m, visible=True), |
|
inputs=[message], |
|
outputs=[message], |
|
).success( |
|
fn=evaluate_data, |
|
inputs=[filename], |
|
outputs=[eval_state] |
|
) |
|
return demo |
|
|
|
if __name__ == "__main__": |
|
gradio_interface().launch() |