Spaces:

anonymousforpaper
/

M3Site

Running

App Files Files Community

anonymousforpaper commited on 22 days ago

Commit

224a33f

verified ·

1 Parent(s): 3bf32d0

Upload 103 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Dockerfile +27 -0
README.md +12 -5
app.py +219 -0
case_study/A0A067FT93.pdb +0 -0
case_study/A0A126P745.pdb +0 -0
case_study/A0A1H1XG33.pdb +0 -0
case_study/A0A242M8J4.pdb +0 -0
case_study/A0A2P2GK84.pdb +0 -0
case_study/A0A2U7QU15.pdb +0 -0
case_study/A0A2U7R6V5.pdb +0 -0
case_study/A0A384E138.pdb +0 -0
case_study/A0A384E143.pdb +0 -0
case_study/A0A3Q0KJ78.pdb +1187 -0
case_study/A0A516RTC5.pdb +0 -0
case_study/A0A5B8NBE6.pdb +0 -0
case_study/A0A5B8NBN0.pdb +0 -0
case_study/A0A7J6F8C5.pdb +0 -0
case_study/B1KN79.pdb +0 -0
case_study/C1DMX5.pdb +0 -0
case_study/C4R826.pdb +0 -0
case_study/G4VQX9.pdb +0 -0
case_study/J9PY59.pdb +0 -0
case_study/J9VGQ7.pdb +0 -0
case_study/J9VVW8.pdb +0 -0
case_study/M9PF61.pdb +0 -0
case_study/O53504.pdb +0 -0
case_study/Q0RWC9.pdb +0 -0
case_study/Q1NEJ0.pdb +0 -0
case_study/Q39VG1.pdb +0 -0
case_study/Q6F4N4.pdb +0 -0
case_study/Q72K04.pdb +0 -0
case_study/Q93UV7.pdb +0 -0
case_study/Q9AGK2.pdb +0 -0
case_study/Q9AI62.pdb +0 -0
case_study/Q9KJF3.pdb +0 -0
case_study/Q9XZ48.pdb +0 -0
constants.py +39 -0
esm/__init__.py +0 -0
esm/layers/attention.py +70 -0
esm/layers/blocks.py +153 -0
esm/layers/codebook.py +88 -0
esm/layers/ffn.py +29 -0
esm/layers/geom_attention.py +151 -0
esm/layers/regression_head.py +24 -0
esm/layers/rotary.py +221 -0
esm/layers/structure_proj.py +68 -0
esm/layers/transformer_stack.py +94 -0
esm/models/esm3.py +798 -0
esm/models/function_decoder.py +338 -0
esm/models/vqvae.py +450 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+FROM continuumio/miniconda3
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN conda create -n m3site python=3.11 dssp -c ostrokach -y
+RUN conda run -n m3site pip install --no-cache-dir --upgrade -r /code/requirements.txt
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH \
+    PYTHONPATH=$HOME/app \
+    PYTHONUNBUFFERED=1 \
+    GRADIO_ALLOW_FLAGGING=never \
+    GRADIO_NUM_PORTS=1 \
+    GRADIO_SERVER_NAME=0.0.0.0 \
+    GRADIO_THEME=huggingface \
+    SYSTEM=spaces
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+CMD ["conda", "run", "--no-capture-output", "-n", "m3site", "python", "app.py"]

README.md CHANGED Viewed

@@ -1,10 +1,17 @@
 ---
 title: M3Site
-emoji: 🏆
-colorFrom: red
-colorTo: yellow
 sdk: docker
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: M3Site
+emoji: 📉
+colorFrom: indigo
+colorTo: indigo
 sdk: docker
+pinned: true
+license: cc-by-4.0
+short_description: 'An interactive demo for M3Site.'
 ---
+# M<sup>3</sup>Site: Leveraging Multi-Class Multi-Modal Learning for Accurate Protein Active Site Identification and Classification
+This an interactive demo for the inference, and the source code of the paper can be found in [Github](). We provide some example `.pdb` files in `case_study` folder. You can use these files to use this demo. Take `A0A384E143.pdb` as an example, the predicted protein active site results using M<sup>3</sup>Site are shown below:
+![image1](img/image1.png)
+Besides, you can visualize and analyze the predicted results interactively like below:
+![image2](img/image2.png)

app.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import copy
+import gradio as gr
+from gradio_molecule3d import Molecule3D
+import Bio
+import Bio.SeqUtils
+from utils.util_functions import merge_ranges
+from predict import model_predict
+from constants import *
+def update_reps_based_on_radio(*args):
+    struct, text = args[0], args[1]
+    background, model, active_sites = args[2:4], args[4], args[5:]
+    predicted_sites, confs, sequence = model_predict(model, struct, text)
+    merged_sites = merge_ranges(predicted_sites, max_value=len(sequence))
+    confidence_details = []
+    new_reps = []
+    # 1. cal summary
+    summary_text = []
+    for k, v in predicted_sites.items():
+        if len(v) > 0:
+            summary_text.append(f"{len(v)} {no_cat_dict[k]} site(s)")
+    if len(summary_text) == 0:
+        summary_text = ["No active sites identified."]
+    summary_text = '; '.join(summary_text)
+    # 2. cal dataframe
+    detail_predicted_sites = {'b':[], '0':[], '1':[], '2':[], '3':[], '4':[], '5':[]}
+    ass = []
+    for k, v in predicted_sites.items():
+        for vv in v:
+            detail_predicted_sites[k].append(
+                {'residue_type': sequence[vv-1], 'number': vv, 'confidence': confs[vv-1]}
+            )
+            ass.append(vv)
+    for i in range(len(sequence)):
+        if i+1 not in ass:
+            detail_predicted_sites['b'].append(
+                {'residue_type': sequence[i], 'number': i+1, 'confidence': confs[i]}
+            )
+    # 2.1 处理背景
+    backgrounds = detail_predicted_sites.get('b', [])
+    for r in backgrounds:
+        confidence_details.append([
+            'Background',
+            Bio.SeqUtils.seq3(r['residue_type']).upper(),
+            r['number'],
+            r.get('confidence', 'N/A')
+        ])
+    # 2.2 处理活性位点
+    for i in range(0, len(active_sites), 2):
+        x, y = active_sites[i], active_sites[i+1]
+        site_key = str(i//2)
+        sites = detail_predicted_sites.get(site_key, [])
+        for s in sites:
+            confidence_details.append([
+                no_cat_dict[site_key],
+                Bio.SeqUtils.seq3(s['residue_type']).upper(),
+                s['number'],
+                s.get('confidence', 'N/A')
+            ])
+    # 3. cal reps
+    # 3.1 background
+    ranges = merged_sites['b']
+    for r in ranges:
+        old_reps = copy.deepcopy(default_reps)[0]
+        old_reps['style'] = background[0][0].lower() + background[0][1:]
+        old_reps['color'] = background[1][0].lower() + background[1][1:] + "Carbon"
+        old_reps['residue_range'] = r
+        new_reps.append(old_reps)
+    # 3.2 active sites
+    for i in range(0, len(active_sites), 2):
+        x, y = active_sites[i], active_sites[i+1]
+        ranges = merged_sites[str(i//2)]
+        for r in ranges:
+            old_reps = copy.deepcopy(default_reps)[0]
+            old_reps['style'] = x[0].lower() + x[1:]
+            old_reps['color'] = y[0].lower() + y[1:] + "Carbon"
+            old_reps['residue_range'] = r
+            new_reps.append(old_reps)
+    return summary_text, confidence_details, Molecule3D(label="Identified Functional Sites", reps=new_reps)
+def disable_fn(*x):
+    return [gr.update(interactive=False)] * len(x)
+def able_tip():
+    return gr.update(visible=True)
+def check_input(input):
+    if input is not None:
+        return gr.update(interactive=True)
+    return gr.update(interactive=False)
+with gr.Blocks(title="M3Site-app", theme=gr.themes.Default()) as demo:
+    gr.Markdown("# M<sup>3</sup>Site: Leveraging Multi-Class Multi-Modal Learning for Accurate Protein Active Site Identification and Classification")
+    gr.Markdown("""
+    ## Overview
+    **M<sup>3</sup>Site** is an advanced tool designed to accurately identify and classify protein active sites using a multi-modal learning approach. By integrating protein sequences, structural data, and functional annotations, M<sup>3</sup>Site provides comprehensive insights into protein functionality, aiding in drug design, synthetic biology, and understanding protein mechanisms.
+    """)
+    gr.Markdown("""
+    ## How to Use
+    1. **Select the Model**: Choose the pre-trained model for site prediction from the dropdown list.
+    2. **Adjust Visual Settings**: Customize the visual style and color for background and active sites.
+    3. **Upload Protein Structure**: Provide the 3D structure of the protein. You can upload from local or download from PDB Assym. Unit, PDB BioAssembly, AlphaFold DB, or ESMFold DB.
+    4. **Enter Function Prompt**: Optionally provide a text description of the protein's function. If unsure, leave it blank.
+    5. **Click "Predict"**: Hit the 'Predict' button to initiate the prediction. The predicted active sites will be highlighted in the structure visualization.
+    6. **View Results**: The detailed results will be displayed below, including the identified active sites, their types, and confidence scores.
+    """)
+    with gr.Accordion("General Settings (Set before prediction)"):
+        with gr.Row():
+            model_drop = gr.Dropdown(model_list, label="Model Selection", value=model_list[0])
+            gr.Markdown("")
+            gr.Markdown("")
+        with gr.Row():
+            with gr.Row():
+                style_dropb = gr.Dropdown(style_list, label="Style (Background)", value=style_list[0], min_width=1)
+                color_dropb = gr.Dropdown(color_list, label="Color (Background)", value=color_list[0], min_width=1)
+            with gr.Row():
+                style_drop1 = gr.Dropdown(style_list, label="Style (CRI)", value=style_list[1], min_width=1)
+                color_drop1 = gr.Dropdown(color_list, label="Color (CRI)", value=color_list[1], min_width=1)
+            with gr.Row():
+                style_drop2 = gr.Dropdown(style_list, label="Style (SCI)", value=style_list[1], min_width=1)
+                color_drop2 = gr.Dropdown(color_list, label="Color (SCI)", value=color_list[2], min_width=1)
+            with gr.Row():
+                style_drop3 = gr.Dropdown(style_list, label="Style (PI)", value=style_list[1], min_width=1)
+                color_drop3 = gr.Dropdown(color_list, label="Color (PI)", value=color_list[3], min_width=1)
+        with gr.Row():
+            with gr.Row():
+                style_drop4 = gr.Dropdown(style_list, label="Style (PTCR)", value=style_list[1], min_width=1)
+                color_drop4 = gr.Dropdown(color_list, label="Color (PTCR)", value=color_list[4], min_width=1)
+            with gr.Row():
+                style_drop5 = gr.Dropdown(style_list, label="Style (IA)", value=style_list[1], min_width=1)
+                color_drop5 = gr.Dropdown(color_list, label="Color (IA)", value=color_list[5], min_width=1)
+            with gr.Row():
+                style_drop6 = gr.Dropdown(style_list, label="Style (SSA)", value=style_list[1], min_width=1)
+                color_drop6 = gr.Dropdown(color_list, label="Color (SSA)", value=color_list[6], min_width=1)
+            with gr.Row():
+                gr.Markdown("")
+        gr.Markdown('''
+            *NOTE:* CRI indicates Covalent Reaction Intermediates, SCI indicates Sulfur-containing Covalent Intermediates, PI indicates Phosphorylated Intermediates,
+            PTCR indicates Proton Transfer & Charge Relay Systems, IA indicates Isomerization Activity, SSA indicates Substrate-specific Activities.
+            ''')
+    with gr.Row():
+        gr.Markdown("<center><font size=5><b>Input Structure</b></font></center>")
+        gr.Markdown("<center><font size=5><b>Output Predictions</b></font></center>")
+    with gr.Row(equal_height=True):
+        input_struct = Molecule3D(label="Input Protein Structure (Default Style)", reps=reps1)
+        output_struct = Molecule3D(label="Output Protein Structure", reps=[])
+    with gr.Row(equal_height=True):
+        input_text = gr.Textbox(lines=1, label="Function Prompt", scale=16, min_width=1, placeholder="I don't know the function of this protein.")
+        btn = gr.Button("Predict", variant="primary", scale=1, min_width=1, interactive=False)
+        summary_output = gr.Label(label="", scale=18, min_width=1, show_label=False, elem_classes="info")
+    gr.Markdown("### Result Details")
+    confidence_output = gr.DataFrame(headers=["Active Site Type", "Residue Type", "Residue Number", "Confidence"])
+    option_list = [
+        style_dropb, color_dropb, model_drop,
+        style_drop1, color_drop1,
+        style_drop2, color_drop2,
+        style_drop3, color_drop3,
+        style_drop4, color_drop4,
+        style_drop5, color_drop5,
+        style_drop6, color_drop6
+    ]
+    tips = gr.Markdown("### *Tips: Please refresh the page to make a new prediction.*", visible=False)
+    # gr.Markdown("## Citation")
+    # gr.Markdown("If you find this tool helpful, please consider citing the following papers:")
+    # with gr.Accordion("Citations", open=False):
+    #     gr.Markdown('''```
+    #                 @inproceedings{ouyangmmsite,
+    #                     title={MMSite: A Multi-modal Framework for the Identification of Active Sites in Proteins},
+    #                     author={Ouyang, Song and Cai, Huiyu and Luo, Yong and Su, Kehua and Zhang, Lefei and Du, Bo},
+    #                     booktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems}
+    #                 }
+    #                 @article{ouyangm3site,
+    #                     title={M3Site: Leveraging Multi-Class Multi-Modal Learning for Accurate Protein Active Site Iden-tification and Classification},
+    #                     author={Ouyang, Song and Luo, Yong and Su, Kehua and Zhang, Lefei and Du, Bo},
+    #                     journal={xxxx},
+    #                     year={xxxx},
+    #                 }
+    #                 ```''')
+    # 绑定事件
+    input_struct.change(check_input, inputs=input_struct, outputs=btn)
+    btn.click(
+        fn=able_tip,
+        inputs=[],
+        outputs=tips
+    ).then(
+        fn=disable_fn,
+        inputs=option_list,
+        outputs=option_list
+    ).then(
+        fn=update_reps_based_on_radio,
+        inputs=[input_struct, input_text] + option_list,
+        outputs=[summary_output, confidence_output, output_struct]
+    ).then(
+        fn=lambda x: x,
+        inputs=[input_struct],
+        outputs=[output_struct]
+    )
+if __name__ == "__main__":
+    demo.launch(share=True, debug=True)

case_study/A0A067FT93.pdb ADDED Viewed