Spaces:
Sleeping
Sleeping
File size: 7,240 Bytes
799e642 77578b5 7985b8f 799e642 7985b8f 799e642 77578b5 799e642 7985b8f 799e642 cc14b7c 799e642 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
import uuid
import gradio as gr
import torch
import os
import pandas as pd
from rdkit import Chem
from scripts.pla_net_inference import main
from utils.args import ArgsInit
os.system("nvidia-smi")
print("TORCH_CUDA", torch.cuda.is_available())
PROJECT_URL = "https://www.nature.com/articles/s41598-022-12180-x"
DEFAULT_PATH_DOCKER = "/home/user/app"
ENABLED_MODELS = [
'aa2ar', 'abl1', 'ace', 'aces', 'ada', 'ada17', 'adrb1', 'adrb2',
'akt1', 'akt2', 'aldr', 'ampc', 'andr', 'aofb', 'bace1', 'braf',
'cah2', 'casp3', 'cdk2', 'comt', 'cp2c9', 'cp3a4', 'csf1r',
'cxcr4', 'def', 'dhi1', 'dpp4', 'drd3', 'dyr', 'egfr', 'esr1',
'esr2', 'fa10', 'fa7', 'fabp4', 'fak1', 'fgfr1', 'fkb1a', 'fnta',
'fpps', 'gcr', 'glcm', 'gria2', 'grik1', 'hdac2', 'hdac8',
'hivint', 'hivpr', 'hivrt', 'hmdh', 'hs90a', 'hxk4', 'igf1r',
'inha', 'ital', 'jak2', 'kif11', 'kit', 'kith', 'kpcb', 'lck',
'lkha4', 'mapk2', 'mcr', 'met', 'mk01', 'mk10', 'mk14', 'mmp13',
'mp2k1', 'nos1', 'nram', 'pa2ga', 'parp1', 'pde5a', 'pgh1', 'pgh2',
'plk1', 'pnph', 'ppara', 'ppard', 'pparg', 'prgr', 'ptn1', 'pur2',
'pygm', 'pyrd', 'reni', 'rock1', 'rxra', 'sahh', 'src', 'tgfr1',
'thb', 'thrb', 'try1', 'tryb1', 'tysy', 'urok', 'vgfr2', 'wee1',
'xiap'
]
def load_and_filter_data(protein_id, ligand_smiles):
# generate random short id, make short
random_id = str(uuid.uuid4())[:8]
print("Inference ID: ", random_id)
# check that ligand_smiles is not empty
if not ligand_smiles or ligand_smiles.strip() == "":
error_msg = f"!SMILES string is required 💥"
raise gr.Error(error_msg, duration=5)
if protein_id not in ENABLED_MODELS:
error_msg = f"!Invalid 💥 target protein ID, the available options are: {ENABLED_MODELS}. To do inference other proteins, you can run the model locally an train the model for each target protein."
raise gr.Error(error_msg, duration=5)
# Split the input SMILES string by ':' to get a list
smiles_list = ligand_smiles.split(':')
print("Smiles to predict: ", smiles_list)
print("Target Protein ID: ", protein_id)
# Validate SMILES
invalid_smiles = []
for smiles in smiles_list:
mol = Chem.MolFromSmiles(smiles.strip())
if mol is None:
invalid_smiles.append(smiles.strip())
if invalid_smiles:
error_msg = f"!Invalid 💥 SMILES string(s) : {', '.join(invalid_smiles)}"
raise gr.Error(error_msg, duration=5)
# Create tmp folder
os.makedirs(f"{DEFAULT_PATH_DOCKER}/example/tmp", exist_ok=True)
# Save SMILES to CSV
df = pd.DataFrame({"smiles": [s.strip() for s in smiles_list if s.strip()]})
df.to_csv(f"{DEFAULT_PATH_DOCKER}/example/tmp/{random_id}_input_smiles.csv", index=False)
# Run inference
args = ArgsInit().args
args.nclasses = 2
args.batch_size = 10
args.use_prot = True
args.freeze_molecule = True
args.conv_encode_edge = True
args.learn_t = True
args.binary = True
args.use_gpu = True
args.target = protein_id
args.target_list = f"{DEFAULT_PATH_DOCKER}/data/datasets/AD/Targets_Fasta.csv"
args.target_checkpoint_path = f"{DEFAULT_PATH_DOCKER}/checkpoints/PLA-Net/BINARY_{protein_id}"
args.input_file_smiles = f"{DEFAULT_PATH_DOCKER}/example/tmp/{random_id}_input_smiles.csv"
args.output_file = f"{DEFAULT_PATH_DOCKER}/example/tmp/{random_id}_output_predictions.csv"
print("Args: ", args)
main(args)
# Load the CSV file
df = pd.read_csv(f'{DEFAULT_PATH_DOCKER}/example/tmp/{random_id}_output_predictions.csv')
print("Prediction Results output: ", df)
return df
def load_description(fp):
with open(fp, 'r', encoding='utf-8') as f:
content = f.read()
return content
def run_inference(protein_id, ligand_smile):
result_df = load_and_filter_data(protein_id, ligand_smile)
return result_df
def create_interface():
with gr.Blocks(title="PLA-Net Web Inference") as inference:
gr.HTML(load_description("gradio/title.md"))
gr.Markdown("### Input")
with gr.Row():
with gr.Column():
gr.Markdown("#### Target Protein")
protein_id = gr.Dropdown(
choices=ENABLED_MODELS,
label="Target Protein ID",
info="Select the target protein from the dropdown menu.",
value="ada"
)
gr.Markdown(" Check the available target proteins [here](https://github.com/juliocesar-io/PLA-Net/blob/main/data/targets.md). The corresponding protein sequences are available in [here](https://github.com/juliocesar-io/PLA-Net/blob/main/data/datasets/AD/Targets_Fasta.csv).")
with gr.Column():
gr.Markdown("#### Ligand")
ligand_smile = gr.Textbox(
info="Provide SMILES input (separate multiple SMILES with ':' )",
placeholder="SMILES input",
label="SMILES string(s)",
)
gr.Examples(
examples=[
"Cn4c(CCC(=O)Nc3ccc2ccn(CC[C@H](CO)n1cnc(C(N)=O)c1)c2c3)nc5ccccc45",
"OCCCCCn1cnc2C(O)CN=CNc12",
"Nc4nc(c1ccco1)c3ncn(C(=O)NCCc2ccccc2)c3n4"
],
inputs=ligand_smile,
label="Example SMILES"
)
btn = gr.Button("Run")
gr.Markdown("### Output")
out = gr.Dataframe(
headers=["target", "smiles", "interaction_probability", "interaction_class"],
datatype=["str", "str", "number", "number"],
label="Prediction Results"
)
btn.click(fn=run_inference, inputs=[protein_id, ligand_smile], outputs=out)
gr.Markdown("""
PLA-Net model for predicting interactions
between small organic molecules and one of the 102 target proteins in the AD dataset. Graph representations
of the molecule and a given target protein are generated from SMILES and FASTA sequences and are used as
input to the Ligand Module (LM) and Protein Module (PM), respectively. Each module comprises a deep GCN
followed by an average pooling layer, which extracts relevant features of their corresponding input graph. Both
representations are finally concatenated and combined through a fully connected layer to predict the target–
ligand interaction probability.
""")
gr.Markdown("""
Ruiz Puentes, P., Rueda-Gensini, L., Valderrama, N. et al.
Predicting target–ligand interactions with graph convolutional networks
for interpretable pharmaceutical discovery. Sci Rep 12, 8434 (2022).
[https://doi.org/10.1038/s41598-022-12180-x](https://doi.org/10.1038/s41598-022-12180-x)
""")
return inference
if __name__ == "__main__":
interface = create_interface()
interface.launch() |