Spaces:
Running
Running
Upload 103 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- Dockerfile +27 -0
- README.md +12 -5
- app.py +219 -0
- case_study/A0A067FT93.pdb +0 -0
- case_study/A0A126P745.pdb +0 -0
- case_study/A0A1H1XG33.pdb +0 -0
- case_study/A0A242M8J4.pdb +0 -0
- case_study/A0A2P2GK84.pdb +0 -0
- case_study/A0A2U7QU15.pdb +0 -0
- case_study/A0A2U7R6V5.pdb +0 -0
- case_study/A0A384E138.pdb +0 -0
- case_study/A0A384E143.pdb +0 -0
- case_study/A0A3Q0KJ78.pdb +1187 -0
- case_study/A0A516RTC5.pdb +0 -0
- case_study/A0A5B8NBE6.pdb +0 -0
- case_study/A0A5B8NBN0.pdb +0 -0
- case_study/A0A7J6F8C5.pdb +0 -0
- case_study/B1KN79.pdb +0 -0
- case_study/C1DMX5.pdb +0 -0
- case_study/C4R826.pdb +0 -0
- case_study/G4VQX9.pdb +0 -0
- case_study/J9PY59.pdb +0 -0
- case_study/J9VGQ7.pdb +0 -0
- case_study/J9VVW8.pdb +0 -0
- case_study/M9PF61.pdb +0 -0
- case_study/O53504.pdb +0 -0
- case_study/Q0RWC9.pdb +0 -0
- case_study/Q1NEJ0.pdb +0 -0
- case_study/Q39VG1.pdb +0 -0
- case_study/Q6F4N4.pdb +0 -0
- case_study/Q72K04.pdb +0 -0
- case_study/Q93UV7.pdb +0 -0
- case_study/Q9AGK2.pdb +0 -0
- case_study/Q9AI62.pdb +0 -0
- case_study/Q9KJF3.pdb +0 -0
- case_study/Q9XZ48.pdb +0 -0
- constants.py +39 -0
- esm/__init__.py +0 -0
- esm/layers/attention.py +70 -0
- esm/layers/blocks.py +153 -0
- esm/layers/codebook.py +88 -0
- esm/layers/ffn.py +29 -0
- esm/layers/geom_attention.py +151 -0
- esm/layers/regression_head.py +24 -0
- esm/layers/rotary.py +221 -0
- esm/layers/structure_proj.py +68 -0
- esm/layers/transformer_stack.py +94 -0
- esm/models/esm3.py +798 -0
- esm/models/function_decoder.py +338 -0
- esm/models/vqvae.py +450 -0
Dockerfile
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM continuumio/miniconda3
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
COPY ./requirements.txt /code/requirements.txt
|
5 |
+
|
6 |
+
RUN conda create -n m3site python=3.11 dssp -c ostrokach -y
|
7 |
+
RUN conda run -n m3site pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
8 |
+
|
9 |
+
RUN useradd -m -u 1000 user
|
10 |
+
|
11 |
+
USER user
|
12 |
+
|
13 |
+
ENV HOME=/home/user \
|
14 |
+
PATH=/home/user/.local/bin:$PATH \
|
15 |
+
PYTHONPATH=$HOME/app \
|
16 |
+
PYTHONUNBUFFERED=1 \
|
17 |
+
GRADIO_ALLOW_FLAGGING=never \
|
18 |
+
GRADIO_NUM_PORTS=1 \
|
19 |
+
GRADIO_SERVER_NAME=0.0.0.0 \
|
20 |
+
GRADIO_THEME=huggingface \
|
21 |
+
SYSTEM=spaces
|
22 |
+
|
23 |
+
WORKDIR $HOME/app
|
24 |
+
|
25 |
+
COPY --chown=user . $HOME/app
|
26 |
+
|
27 |
+
CMD ["conda", "run", "--no-capture-output", "-n", "m3site", "python", "app.py"]
|
README.md
CHANGED
@@ -1,10 +1,17 @@
|
|
1 |
---
|
2 |
title: M3Site
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
7 |
-
pinned:
|
|
|
|
|
8 |
---
|
9 |
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
title: M3Site
|
3 |
+
emoji: 📉
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: indigo
|
6 |
sdk: docker
|
7 |
+
pinned: true
|
8 |
+
license: cc-by-4.0
|
9 |
+
short_description: 'An interactive demo for M3Site.'
|
10 |
---
|
11 |
|
12 |
+
# M<sup>3</sup>Site: Leveraging Multi-Class Multi-Modal Learning for Accurate Protein Active Site Identification and Classification
|
13 |
+
|
14 |
+
This an interactive demo for the inference, and the source code of the paper can be found in [Github](). We provide some example `.pdb` files in `case_study` folder. You can use these files to use this demo. Take `A0A384E143.pdb` as an example, the predicted protein active site results using M<sup>3</sup>Site are shown below:
|
15 |
+
![image1](img/image1.png)
|
16 |
+
Besides, you can visualize and analyze the predicted results interactively like below:
|
17 |
+
![image2](img/image2.png)
|
app.py
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import copy
|
2 |
+
import gradio as gr
|
3 |
+
from gradio_molecule3d import Molecule3D
|
4 |
+
import Bio
|
5 |
+
import Bio.SeqUtils
|
6 |
+
|
7 |
+
from utils.util_functions import merge_ranges
|
8 |
+
from predict import model_predict
|
9 |
+
from constants import *
|
10 |
+
|
11 |
+
|
12 |
+
def update_reps_based_on_radio(*args):
|
13 |
+
struct, text = args[0], args[1]
|
14 |
+
background, model, active_sites = args[2:4], args[4], args[5:]
|
15 |
+
|
16 |
+
predicted_sites, confs, sequence = model_predict(model, struct, text)
|
17 |
+
merged_sites = merge_ranges(predicted_sites, max_value=len(sequence))
|
18 |
+
|
19 |
+
confidence_details = []
|
20 |
+
new_reps = []
|
21 |
+
|
22 |
+
# 1. cal summary
|
23 |
+
summary_text = []
|
24 |
+
for k, v in predicted_sites.items():
|
25 |
+
if len(v) > 0:
|
26 |
+
summary_text.append(f"{len(v)} {no_cat_dict[k]} site(s)")
|
27 |
+
if len(summary_text) == 0:
|
28 |
+
summary_text = ["No active sites identified."]
|
29 |
+
summary_text = '; '.join(summary_text)
|
30 |
+
|
31 |
+
# 2. cal dataframe
|
32 |
+
detail_predicted_sites = {'b':[], '0':[], '1':[], '2':[], '3':[], '4':[], '5':[]}
|
33 |
+
ass = []
|
34 |
+
for k, v in predicted_sites.items():
|
35 |
+
for vv in v:
|
36 |
+
detail_predicted_sites[k].append(
|
37 |
+
{'residue_type': sequence[vv-1], 'number': vv, 'confidence': confs[vv-1]}
|
38 |
+
)
|
39 |
+
ass.append(vv)
|
40 |
+
for i in range(len(sequence)):
|
41 |
+
if i+1 not in ass:
|
42 |
+
detail_predicted_sites['b'].append(
|
43 |
+
{'residue_type': sequence[i], 'number': i+1, 'confidence': confs[i]}
|
44 |
+
)
|
45 |
+
# 2.1 处理背景
|
46 |
+
backgrounds = detail_predicted_sites.get('b', [])
|
47 |
+
for r in backgrounds:
|
48 |
+
confidence_details.append([
|
49 |
+
'Background',
|
50 |
+
Bio.SeqUtils.seq3(r['residue_type']).upper(),
|
51 |
+
r['number'],
|
52 |
+
r.get('confidence', 'N/A')
|
53 |
+
])
|
54 |
+
# 2.2 处理活性位点
|
55 |
+
for i in range(0, len(active_sites), 2):
|
56 |
+
x, y = active_sites[i], active_sites[i+1]
|
57 |
+
site_key = str(i//2)
|
58 |
+
sites = detail_predicted_sites.get(site_key, [])
|
59 |
+
for s in sites:
|
60 |
+
confidence_details.append([
|
61 |
+
no_cat_dict[site_key],
|
62 |
+
Bio.SeqUtils.seq3(s['residue_type']).upper(),
|
63 |
+
s['number'],
|
64 |
+
s.get('confidence', 'N/A')
|
65 |
+
])
|
66 |
+
|
67 |
+
# 3. cal reps
|
68 |
+
# 3.1 background
|
69 |
+
ranges = merged_sites['b']
|
70 |
+
for r in ranges:
|
71 |
+
old_reps = copy.deepcopy(default_reps)[0]
|
72 |
+
old_reps['style'] = background[0][0].lower() + background[0][1:]
|
73 |
+
old_reps['color'] = background[1][0].lower() + background[1][1:] + "Carbon"
|
74 |
+
old_reps['residue_range'] = r
|
75 |
+
new_reps.append(old_reps)
|
76 |
+
# 3.2 active sites
|
77 |
+
for i in range(0, len(active_sites), 2):
|
78 |
+
x, y = active_sites[i], active_sites[i+1]
|
79 |
+
ranges = merged_sites[str(i//2)]
|
80 |
+
for r in ranges:
|
81 |
+
old_reps = copy.deepcopy(default_reps)[0]
|
82 |
+
old_reps['style'] = x[0].lower() + x[1:]
|
83 |
+
old_reps['color'] = y[0].lower() + y[1:] + "Carbon"
|
84 |
+
old_reps['residue_range'] = r
|
85 |
+
new_reps.append(old_reps)
|
86 |
+
|
87 |
+
return summary_text, confidence_details, Molecule3D(label="Identified Functional Sites", reps=new_reps)
|
88 |
+
|
89 |
+
def disable_fn(*x):
|
90 |
+
return [gr.update(interactive=False)] * len(x)
|
91 |
+
|
92 |
+
def able_tip():
|
93 |
+
return gr.update(visible=True)
|
94 |
+
|
95 |
+
def check_input(input):
|
96 |
+
if input is not None:
|
97 |
+
return gr.update(interactive=True)
|
98 |
+
return gr.update(interactive=False)
|
99 |
+
|
100 |
+
|
101 |
+
with gr.Blocks(title="M3Site-app", theme=gr.themes.Default()) as demo:
|
102 |
+
gr.Markdown("# M<sup>3</sup>Site: Leveraging Multi-Class Multi-Modal Learning for Accurate Protein Active Site Identification and Classification")
|
103 |
+
gr.Markdown("""
|
104 |
+
## Overview
|
105 |
+
**M<sup>3</sup>Site** is an advanced tool designed to accurately identify and classify protein active sites using a multi-modal learning approach. By integrating protein sequences, structural data, and functional annotations, M<sup>3</sup>Site provides comprehensive insights into protein functionality, aiding in drug design, synthetic biology, and understanding protein mechanisms.
|
106 |
+
""")
|
107 |
+
gr.Markdown("""
|
108 |
+
## How to Use
|
109 |
+
1. **Select the Model**: Choose the pre-trained model for site prediction from the dropdown list.
|
110 |
+
2. **Adjust Visual Settings**: Customize the visual style and color for background and active sites.
|
111 |
+
3. **Upload Protein Structure**: Provide the 3D structure of the protein. You can upload from local or download from PDB Assym. Unit, PDB BioAssembly, AlphaFold DB, or ESMFold DB.
|
112 |
+
4. **Enter Function Prompt**: Optionally provide a text description of the protein's function. If unsure, leave it blank.
|
113 |
+
5. **Click "Predict"**: Hit the 'Predict' button to initiate the prediction. The predicted active sites will be highlighted in the structure visualization.
|
114 |
+
6. **View Results**: The detailed results will be displayed below, including the identified active sites, their types, and confidence scores.
|
115 |
+
""")
|
116 |
+
|
117 |
+
with gr.Accordion("General Settings (Set before prediction)"):
|
118 |
+
with gr.Row():
|
119 |
+
model_drop = gr.Dropdown(model_list, label="Model Selection", value=model_list[0])
|
120 |
+
gr.Markdown("")
|
121 |
+
gr.Markdown("")
|
122 |
+
with gr.Row():
|
123 |
+
with gr.Row():
|
124 |
+
style_dropb = gr.Dropdown(style_list, label="Style (Background)", value=style_list[0], min_width=1)
|
125 |
+
color_dropb = gr.Dropdown(color_list, label="Color (Background)", value=color_list[0], min_width=1)
|
126 |
+
with gr.Row():
|
127 |
+
style_drop1 = gr.Dropdown(style_list, label="Style (CRI)", value=style_list[1], min_width=1)
|
128 |
+
color_drop1 = gr.Dropdown(color_list, label="Color (CRI)", value=color_list[1], min_width=1)
|
129 |
+
with gr.Row():
|
130 |
+
style_drop2 = gr.Dropdown(style_list, label="Style (SCI)", value=style_list[1], min_width=1)
|
131 |
+
color_drop2 = gr.Dropdown(color_list, label="Color (SCI)", value=color_list[2], min_width=1)
|
132 |
+
with gr.Row():
|
133 |
+
style_drop3 = gr.Dropdown(style_list, label="Style (PI)", value=style_list[1], min_width=1)
|
134 |
+
color_drop3 = gr.Dropdown(color_list, label="Color (PI)", value=color_list[3], min_width=1)
|
135 |
+
with gr.Row():
|
136 |
+
with gr.Row():
|
137 |
+
style_drop4 = gr.Dropdown(style_list, label="Style (PTCR)", value=style_list[1], min_width=1)
|
138 |
+
color_drop4 = gr.Dropdown(color_list, label="Color (PTCR)", value=color_list[4], min_width=1)
|
139 |
+
with gr.Row():
|
140 |
+
style_drop5 = gr.Dropdown(style_list, label="Style (IA)", value=style_list[1], min_width=1)
|
141 |
+
color_drop5 = gr.Dropdown(color_list, label="Color (IA)", value=color_list[5], min_width=1)
|
142 |
+
with gr.Row():
|
143 |
+
style_drop6 = gr.Dropdown(style_list, label="Style (SSA)", value=style_list[1], min_width=1)
|
144 |
+
color_drop6 = gr.Dropdown(color_list, label="Color (SSA)", value=color_list[6], min_width=1)
|
145 |
+
with gr.Row():
|
146 |
+
gr.Markdown("")
|
147 |
+
|
148 |
+
gr.Markdown('''
|
149 |
+
*NOTE:* CRI indicates Covalent Reaction Intermediates, SCI indicates Sulfur-containing Covalent Intermediates, PI indicates Phosphorylated Intermediates,
|
150 |
+
PTCR indicates Proton Transfer & Charge Relay Systems, IA indicates Isomerization Activity, SSA indicates Substrate-specific Activities.
|
151 |
+
''')
|
152 |
+
|
153 |
+
with gr.Row():
|
154 |
+
gr.Markdown("<center><font size=5><b>Input Structure</b></font></center>")
|
155 |
+
gr.Markdown("<center><font size=5><b>Output Predictions</b></font></center>")
|
156 |
+
|
157 |
+
with gr.Row(equal_height=True):
|
158 |
+
input_struct = Molecule3D(label="Input Protein Structure (Default Style)", reps=reps1)
|
159 |
+
output_struct = Molecule3D(label="Output Protein Structure", reps=[])
|
160 |
+
|
161 |
+
with gr.Row(equal_height=True):
|
162 |
+
input_text = gr.Textbox(lines=1, label="Function Prompt", scale=16, min_width=1, placeholder="I don't know the function of this protein.")
|
163 |
+
btn = gr.Button("Predict", variant="primary", scale=1, min_width=1, interactive=False)
|
164 |
+
summary_output = gr.Label(label="", scale=18, min_width=1, show_label=False, elem_classes="info")
|
165 |
+
|
166 |
+
gr.Markdown("### Result Details")
|
167 |
+
confidence_output = gr.DataFrame(headers=["Active Site Type", "Residue Type", "Residue Number", "Confidence"])
|
168 |
+
|
169 |
+
option_list = [
|
170 |
+
style_dropb, color_dropb, model_drop,
|
171 |
+
style_drop1, color_drop1,
|
172 |
+
style_drop2, color_drop2,
|
173 |
+
style_drop3, color_drop3,
|
174 |
+
style_drop4, color_drop4,
|
175 |
+
style_drop5, color_drop5,
|
176 |
+
style_drop6, color_drop6
|
177 |
+
]
|
178 |
+
|
179 |
+
tips = gr.Markdown("### *Tips: Please refresh the page to make a new prediction.*", visible=False)
|
180 |
+
# gr.Markdown("## Citation")
|
181 |
+
# gr.Markdown("If you find this tool helpful, please consider citing the following papers:")
|
182 |
+
# with gr.Accordion("Citations", open=False):
|
183 |
+
# gr.Markdown('''```
|
184 |
+
# @inproceedings{ouyangmmsite,
|
185 |
+
# title={MMSite: A Multi-modal Framework for the Identification of Active Sites in Proteins},
|
186 |
+
# author={Ouyang, Song and Cai, Huiyu and Luo, Yong and Su, Kehua and Zhang, Lefei and Du, Bo},
|
187 |
+
# booktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems}
|
188 |
+
# }
|
189 |
+
# @article{ouyangm3site,
|
190 |
+
# title={M3Site: Leveraging Multi-Class Multi-Modal Learning for Accurate Protein Active Site Iden-tification and Classification},
|
191 |
+
# author={Ouyang, Song and Luo, Yong and Su, Kehua and Zhang, Lefei and Du, Bo},
|
192 |
+
# journal={xxxx},
|
193 |
+
# year={xxxx},
|
194 |
+
# }
|
195 |
+
# ```''')
|
196 |
+
|
197 |
+
# 绑定事件
|
198 |
+
input_struct.change(check_input, inputs=input_struct, outputs=btn)
|
199 |
+
btn.click(
|
200 |
+
fn=able_tip,
|
201 |
+
inputs=[],
|
202 |
+
outputs=tips
|
203 |
+
).then(
|
204 |
+
fn=disable_fn,
|
205 |
+
inputs=option_list,
|
206 |
+
outputs=option_list
|
207 |
+
).then(
|
208 |
+
fn=update_reps_based_on_radio,
|
209 |
+
inputs=[input_struct, input_text] + option_list,
|
210 |
+
outputs=[summary_output, confidence_output, output_struct]
|
211 |
+
).then(
|
212 |
+
fn=lambda x: x,
|
213 |
+
inputs=[input_struct],
|
214 |
+
outputs=[output_struct]
|
215 |
+
)
|
216 |
+
|
217 |
+
|
218 |
+
if __name__ == "__main__":
|
219 |
+
demo.launch(share=True, debug=True)
|
case_study/A0A067FT93.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/A0A126P745.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/A0A1H1XG33.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/A0A242M8J4.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/A0A2P2GK84.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/A0A2U7QU15.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/A0A2U7R6V5.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/A0A384E138.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/A0A384E143.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/A0A3Q0KJ78.pdb
ADDED
@@ -0,0 +1,1187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
HEADER 01-JUN-22
|
2 |
+
TITLE ALPHAFOLD MONOMER V2.0 PREDICTION FOR NUCLEOSIDE DIPHOSPHATE KINASE
|
3 |
+
TITLE 2 (A0A3Q0KJ78)
|
4 |
+
COMPND MOL_ID: 1;
|
5 |
+
COMPND 2 MOLECULE: NUCLEOSIDE DIPHOSPHATE KINASE;
|
6 |
+
COMPND 3 CHAIN: A
|
7 |
+
SOURCE MOL_ID: 1;
|
8 |
+
SOURCE 2 ORGANISM_SCIENTIFIC: SCHISTOSOMA MANSONI;
|
9 |
+
SOURCE 3 ORGANISM_TAXID: 6183
|
10 |
+
REMARK 1
|
11 |
+
REMARK 1 REFERENCE 1
|
12 |
+
REMARK 1 AUTH JOHN JUMPER, RICHARD EVANS, ALEXANDER PRITZEL, TIM GREEN,
|
13 |
+
REMARK 1 AUTH 2 MICHAEL FIGURNOV, OLAF RONNEBERGER, KATHRYN TUNYASUVUNAKOOL,
|
14 |
+
REMARK 1 AUTH 3 RUSS BATES, AUGUSTIN ZIDEK, ANNA POTAPENKO, ALEX BRIDGLAND,
|
15 |
+
REMARK 1 AUTH 4 CLEMENS MEYER, SIMON A A KOHL, ANDREW J BALLARD,
|
16 |
+
REMARK 1 AUTH 5 ANDREW COWIE, BERNARDINO ROMERA-PAREDES, STANISLAV NIKOLOV,
|
17 |
+
REMARK 1 AUTH 6 RISHUB JAIN, JONAS ADLER, TREVOR BACK, STIG PETERSEN,
|
18 |
+
REMARK 1 AUTH 7 DAVID REIMAN, ELLEN CLANCY, MICHAL ZIELINSKI,
|
19 |
+
REMARK 1 AUTH 8 MARTIN STEINEGGER, MICHALINA PACHOLSKA, TAMAS BERGHAMMER,
|
20 |
+
REMARK 1 AUTH 9 DAVID SILVER, ORIOL VINYALS, ANDREW W SENIOR,
|
21 |
+
REMARK 1 AUTH10 KORAY KAVUKCUOGLU, PUSHMEET KOHLI, DEMIS HASSABIS
|
22 |
+
REMARK 1 TITL HIGHLY ACCURATE PROTEIN STRUCTURE PREDICTION WITH ALPHAFOLD
|
23 |
+
REMARK 1 REF NATURE V. 596 583 2021
|
24 |
+
REMARK 1 REFN ISSN 0028-0836
|
25 |
+
REMARK 1 PMID 34265844
|
26 |
+
REMARK 1 DOI 10.1038/s41586-021-03819-2
|
27 |
+
REMARK 1
|
28 |
+
REMARK 1 DISCLAIMERS
|
29 |
+
REMARK 1 ALPHAFOLD DATA, COPYRIGHT (2021) DEEPMIND TECHNOLOGIES LIMITED. THE
|
30 |
+
REMARK 1 INFORMATION PROVIDED IS THEORETICAL MODELLING ONLY AND CAUTION SHOULD
|
31 |
+
REMARK 1 BE EXERCISED IN ITS USE. IT IS PROVIDED "AS-IS" WITHOUT ANY WARRANTY
|
32 |
+
REMARK 1 OF ANY KIND, WHETHER EXPRESSED OR IMPLIED. NO WARRANTY IS GIVEN THAT
|
33 |
+
REMARK 1 USE OF THE INFORMATION SHALL NOT INFRINGE THE RIGHTS OF ANY THIRD
|
34 |
+
REMARK 1 PARTY. THE INFORMATION IS NOT INTENDED TO BE A SUBSTITUTE FOR
|
35 |
+
REMARK 1 PROFESSIONAL MEDICAL ADVICE, DIAGNOSIS, OR TREATMENT, AND DOES NOT
|
36 |
+
REMARK 1 CONSTITUTE MEDICAL OR OTHER PROFESSIONAL ADVICE. IT IS AVAILABLE FOR
|
37 |
+
REMARK 1 ACADEMIC AND COMMERCIAL PURPOSES, UNDER CC-BY 4.0 LICENCE.
|
38 |
+
DBREF XXXX A 1 143 UNP A0A3Q0KJ78 A0A3Q0KJ78_SCHMA 1 143
|
39 |
+
SEQRES 1 A 143 MET VAL LYS PRO ASP GLY VAL GLN ARG GLY LEU VAL GLY
|
40 |
+
SEQRES 2 A 143 GLU VAL ILE GLN ARG PHE GLU ARG ARG GLY TYR LYS LEU
|
41 |
+
SEQRES 3 A 143 VAL ALA ILE LYS MET MET HIS ALA SER GLU GLN LEU LEU
|
42 |
+
SEQRES 4 A 143 GLN THR HIS TYR GLU ALA LEU LYS SER LEU SER PHE PHE
|
43 |
+
SEQRES 5 A 143 PRO LYS LEU VAL ALA TYR MET SER SER GLY PRO VAL VAL
|
44 |
+
SEQRES 6 A 143 PRO MET VAL PHE GLU GLY ARG LYS VAL VAL GLU ASN GLY
|
45 |
+
SEQRES 7 A 143 ARG THR MET LEU GLY ALA THR LYS PRO GLU ALA SER CYS
|
46 |
+
SEQRES 8 A 143 PRO GLY SER ILE ARG GLY ASP TYR CYS GLN ASP VAL GLY
|
47 |
+
SEQRES 9 A 143 ARG ASN VAL VAL HIS GLY SER ASP SER THR GLU SER ALA
|
48 |
+
SEQRES 10 A 143 ASN ARG GLU ILE ASN LEU TRP PHE SER PRO GLN GLU LEU
|
49 |
+
SEQRES 11 A 143 CYS GLN TYR LYS GLN ALA VAL ASP PRO TRP ILE HIS GLU
|
50 |
+
CRYST1 1.000 1.000 1.000 90.00 90.00 90.00 P 1 1
|
51 |
+
ORIGX1 1.000000 0.000000 0.000000 0.00000
|
52 |
+
ORIGX2 0.000000 1.000000 0.000000 0.00000
|
53 |
+
ORIGX3 0.000000 0.000000 1.000000 0.00000
|
54 |
+
SCALE1 1.000000 0.000000 0.000000 0.00000
|
55 |
+
SCALE2 0.000000 1.000000 0.000000 0.00000
|
56 |
+
SCALE3 0.000000 0.000000 1.000000 0.00000
|
57 |
+
MODEL 1
|
58 |
+
ATOM 1 N MET A 1 0.643 4.346 0.767 1.00 98.62 N
|
59 |
+
ATOM 2 CA MET A 1 0.545 5.526 -0.124 1.00 98.62 C
|
60 |
+
ATOM 3 C MET A 1 0.325 5.076 -1.559 1.00 98.62 C
|
61 |
+
ATOM 4 CB MET A 1 1.804 6.406 -0.018 1.00 98.62 C
|
62 |
+
ATOM 5 O MET A 1 1.081 4.230 -2.029 1.00 98.62 O
|
63 |
+
ATOM 6 CG MET A 1 1.853 7.539 -1.054 1.00 98.62 C
|
64 |
+
ATOM 7 SD MET A 1 3.214 8.711 -0.842 1.00 98.62 S
|
65 |
+
ATOM 8 CE MET A 1 4.672 7.639 -0.954 1.00 98.62 C
|
66 |
+
ATOM 9 N VAL A 2 -0.665 5.645 -2.249 1.00 98.85 N
|
67 |
+
ATOM 10 CA VAL A 2 -0.805 5.525 -3.712 1.00 98.85 C
|
68 |
+
ATOM 11 C VAL A 2 0.110 6.556 -4.373 1.00 98.85 C
|
69 |
+
ATOM 12 CB VAL A 2 -2.265 5.698 -4.176 1.00 98.85 C
|
70 |
+
ATOM 13 O VAL A 2 0.079 7.732 -4.011 1.00 98.85 O
|
71 |
+
ATOM 14 CG1 VAL A 2 -2.415 5.503 -5.685 1.00 98.85 C
|
72 |
+
ATOM 15 CG2 VAL A 2 -3.176 4.670 -3.500 1.00 98.85 C
|
73 |
+
ATOM 16 N LYS A 3 0.963 6.087 -5.282 1.00 98.81 N
|
74 |
+
ATOM 17 CA LYS A 3 1.969 6.858 -6.024 1.00 98.81 C
|
75 |
+
ATOM 18 C LYS A 3 1.333 7.589 -7.223 1.00 98.81 C
|
76 |
+
ATOM 19 CB LYS A 3 3.065 5.882 -6.481 1.00 98.81 C
|
77 |
+
ATOM 20 O LYS A 3 0.179 7.296 -7.547 1.00 98.81 O
|
78 |
+
ATOM 21 CG LYS A 3 3.831 5.230 -5.325 1.00 98.81 C
|
79 |
+
ATOM 22 CD LYS A 3 4.961 4.375 -5.903 1.00 98.81 C
|
80 |
+
ATOM 23 CE LYS A 3 5.828 3.810 -4.782 1.00 98.81 C
|
81 |
+
ATOM 24 NZ LYS A 3 7.015 3.134 -5.342 1.00 98.81 N
|
82 |
+
ATOM 25 N PRO A 4 2.058 8.498 -7.908 1.00 98.85 N
|
83 |
+
ATOM 26 CA PRO A 4 1.474 9.295 -8.990 1.00 98.85 C
|
84 |
+
ATOM 27 C PRO A 4 0.857 8.466 -10.123 1.00 98.85 C
|
85 |
+
ATOM 28 CB PRO A 4 2.606 10.201 -9.486 1.00 98.85 C
|
86 |
+
ATOM 29 O PRO A 4 -0.227 8.789 -10.600 1.00 98.85 O
|
87 |
+
ATOM 30 CG PRO A 4 3.466 10.378 -8.238 1.00 98.85 C
|
88 |
+
ATOM 31 CD PRO A 4 3.405 8.986 -7.618 1.00 98.85 C
|
89 |
+
ATOM 32 N ASP A 5 1.486 7.355 -10.512 1.00 98.78 N
|
90 |
+
ATOM 33 CA ASP A 5 0.936 6.418 -11.499 1.00 98.78 C
|
91 |
+
ATOM 34 C ASP A 5 -0.389 5.788 -11.051 1.00 98.78 C
|
92 |
+
ATOM 35 CB ASP A 5 1.967 5.320 -11.812 1.00 98.78 C
|
93 |
+
ATOM 36 O ASP A 5 -1.309 5.658 -11.852 1.00 98.78 O
|
94 |
+
ATOM 37 CG ASP A 5 2.331 4.418 -10.619 1.00 98.78 C
|
95 |
+
ATOM 38 OD1 ASP A 5 2.308 4.888 -9.456 1.00 98.78 O
|
96 |
+
ATOM 39 OD2 ASP A 5 2.712 3.253 -10.848 1.00 98.78 O
|
97 |
+
ATOM 40 N GLY A 6 -0.514 5.425 -9.774 1.00 98.83 N
|
98 |
+
ATOM 41 CA GLY A 6 -1.751 4.890 -9.216 1.00 98.83 C
|
99 |
+
ATOM 42 C GLY A 6 -2.896 5.899 -9.231 1.00 98.83 C
|
100 |
+
ATOM 43 O GLY A 6 -4.032 5.525 -9.522 1.00 98.83 O
|
101 |
+
ATOM 44 N VAL A 7 -2.602 7.179 -8.980 1.00 98.78 N
|
102 |
+
ATOM 45 CA VAL A 7 -3.602 8.252 -9.080 1.00 98.78 C
|
103 |
+
ATOM 46 C VAL A 7 -4.015 8.471 -10.533 1.00 98.78 C
|
104 |
+
ATOM 47 CB VAL A 7 -3.120 9.564 -8.431 1.00 98.78 C
|
105 |
+
ATOM 48 O VAL A 7 -5.204 8.417 -10.837 1.00 98.78 O
|
106 |
+
ATOM 49 CG1 VAL A 7 -4.220 10.631 -8.515 1.00 98.78 C
|
107 |
+
ATOM 50 CG2 VAL A 7 -2.789 9.349 -6.949 1.00 98.78 C
|
108 |
+
ATOM 51 N GLN A 8 -3.047 8.629 -11.440 1.00 98.69 N
|
109 |
+
ATOM 52 CA GLN A 8 -3.300 8.848 -12.870 1.00 98.69 C
|
110 |
+
ATOM 53 C GLN A 8 -4.085 7.702 -13.525 1.00 98.69 C
|
111 |
+
ATOM 54 CB GLN A 8 -1.954 9.029 -13.593 1.00 98.69 C
|
112 |
+
ATOM 55 O GLN A 8 -4.831 7.914 -14.475 1.00 98.69 O
|
113 |
+
ATOM 56 CG GLN A 8 -1.250 10.358 -13.270 1.00 98.69 C
|
114 |
+
ATOM 57 CD GLN A 8 -1.926 11.583 -13.880 1.00 98.69 C
|
115 |
+
ATOM 58 NE2 GLN A 8 -1.318 12.744 -13.787 1.00 98.69 N
|
116 |
+
ATOM 59 OE1 GLN A 8 -3.000 11.536 -14.450 1.00 98.69 O
|
117 |
+
ATOM 60 N ARG A 9 -3.939 6.477 -13.009 1.00 98.75 N
|
118 |
+
ATOM 61 CA ARG A 9 -4.631 5.282 -13.510 1.00 98.75 C
|
119 |
+
ATOM 62 C ARG A 9 -5.968 4.997 -12.823 1.00 98.75 C
|
120 |
+
ATOM 63 CB ARG A 9 -3.677 4.088 -13.425 1.00 98.75 C
|
121 |
+
ATOM 64 O ARG A 9 -6.558 3.955 -13.084 1.00 98.75 O
|
122 |
+
ATOM 65 CG ARG A 9 -2.492 4.241 -14.394 1.00 98.75 C
|
123 |
+
ATOM 66 CD ARG A 9 -1.452 3.156 -14.132 1.00 98.75 C
|
124 |
+
ATOM 67 NE ARG A 9 -2.032 1.821 -14.347 1.00 98.75 N
|
125 |
+
ATOM 68 NH1 ARG A 9 -0.571 0.632 -13.047 1.00 98.75 N
|
126 |
+
ATOM 69 NH2 ARG A 9 -2.256 -0.411 -14.057 1.00 98.75 N
|
127 |
+
ATOM 70 CZ ARG A 9 -1.618 0.695 -13.816 1.00 98.75 C
|
128 |
+
ATOM 71 N GLY A 10 -6.437 5.874 -11.933 1.00 98.56 N
|
129 |
+
ATOM 72 CA GLY A 10 -7.723 5.702 -11.250 1.00 98.56 C
|
130 |
+
ATOM 73 C GLY A 10 -7.751 4.563 -10.223 1.00 98.56 C
|
131 |
+
ATOM 74 O GLY A 10 -8.817 4.045 -9.909 1.00 98.56 O
|
132 |
+
ATOM 75 N LEU A 11 -6.598 4.168 -9.672 1.00 98.86 N
|
133 |
+
ATOM 76 CA LEU A 11 -6.479 3.019 -8.762 1.00 98.86 C
|
134 |
+
ATOM 77 C LEU A 11 -6.620 3.385 -7.275 1.00 98.86 C
|
135 |
+
ATOM 78 CB LEU A 11 -5.167 2.263 -9.047 1.00 98.86 C
|
136 |
+
ATOM 79 O LEU A 11 -6.448 2.527 -6.414 1.00 98.86 O
|
137 |
+
ATOM 80 CG LEU A 11 -5.062 1.676 -10.467 1.00 98.86 C
|
138 |
+
ATOM 81 CD1 LEU A 11 -3.680 1.048 -10.647 1.00 98.86 C
|
139 |
+
ATOM 82 CD2 LEU A 11 -6.115 0.600 -10.734 1.00 98.86 C
|
140 |
+
ATOM 83 N VAL A 12 -6.932 4.643 -6.943 1.00 98.90 N
|
141 |
+
ATOM 84 CA VAL A 12 -7.062 5.089 -5.542 1.00 98.90 C
|
142 |
+
ATOM 85 C VAL A 12 -8.161 4.313 -4.815 1.00 98.90 C
|
143 |
+
ATOM 86 CB VAL A 12 -7.308 6.607 -5.447 1.00 98.90 C
|
144 |
+
ATOM 87 O VAL A 12 -7.897 3.723 -3.769 1.00 98.90 O
|
145 |
+
ATOM 88 CG1 VAL A 12 -7.497 7.070 -3.997 1.00 98.90 C
|
146 |
+
ATOM 89 CG2 VAL A 12 -6.127 7.389 -6.034 1.00 98.90 C
|
147 |
+
ATOM 90 N GLY A 13 -9.369 4.271 -5.388 1.00 98.78 N
|
148 |
+
ATOM 91 CA GLY A 13 -10.502 3.557 -4.792 1.00 98.78 C
|
149 |
+
ATOM 92 C GLY A 13 -10.252 2.053 -4.683 1.00 98.78 C
|
150 |
+
ATOM 93 O GLY A 13 -10.553 1.454 -3.658 1.00 98.78 O
|
151 |
+
ATOM 94 N GLU A 14 -9.621 1.464 -5.697 1.00 98.73 N
|
152 |
+
ATOM 95 CA GLU A 14 -9.267 0.043 -5.720 1.00 98.73 C
|
153 |
+
ATOM 96 C GLU A 14 -8.277 -0.328 -4.603 1.00 98.73 C
|
154 |
+
ATOM 97 CB GLU A 14 -8.688 -0.251 -7.109 1.00 98.73 C
|
155 |
+
ATOM 98 O GLU A 14 -8.463 -1.316 -3.893 1.00 98.73 O
|
156 |
+
ATOM 99 CG GLU A 14 -8.243 -1.697 -7.305 1.00 98.73 C
|
157 |
+
ATOM 100 CD GLU A 14 -9.348 -2.736 -7.091 1.00 98.73 C
|
158 |
+
ATOM 101 OE1 GLU A 14 -8.944 -3.883 -6.756 1.00 98.73 O
|
159 |
+
ATOM 102 OE2 GLU A 14 -10.530 -2.406 -7.315 1.00 98.73 O
|
160 |
+
ATOM 103 N VAL A 15 -7.241 0.492 -4.393 1.00 98.78 N
|
161 |
+
ATOM 104 CA VAL A 15 -6.285 0.282 -3.299 1.00 98.78 C
|
162 |
+
ATOM 105 C VAL A 15 -6.981 0.405 -1.945 1.00 98.78 C
|
163 |
+
ATOM 106 CB VAL A 15 -5.081 1.236 -3.413 1.00 98.78 C
|
164 |
+
ATOM 107 O VAL A 15 -6.806 -0.479 -1.110 1.00 98.78 O
|
165 |
+
ATOM 108 CG1 VAL A 15 -4.180 1.195 -2.168 1.00 98.78 C
|
166 |
+
ATOM 109 CG2 VAL A 15 -4.211 0.848 -4.619 1.00 98.78 C
|
167 |
+
ATOM 110 N ILE A 16 -7.782 1.454 -1.728 1.00 98.81 N
|
168 |
+
ATOM 111 CA ILE A 16 -8.519 1.656 -0.468 1.00 98.81 C
|
169 |
+
ATOM 112 C ILE A 16 -9.438 0.464 -0.191 1.00 98.81 C
|
170 |
+
ATOM 113 CB ILE A 16 -9.305 2.987 -0.506 1.00 98.81 C
|
171 |
+
ATOM 114 O ILE A 16 -9.350 -0.140 0.879 1.00 98.81 O
|
172 |
+
ATOM 115 CG1 ILE A 16 -8.322 4.174 -0.480 1.00 98.81 C
|
173 |
+
ATOM 116 CG2 ILE A 16 -10.277 3.101 0.682 1.00 98.81 C
|
174 |
+
ATOM 117 CD1 ILE A 16 -8.964 5.515 -0.846 1.00 98.81 C
|
175 |
+
ATOM 118 N GLN A 17 -10.232 0.062 -1.185 1.00 98.73 N
|
176 |
+
ATOM 119 CA GLN A 17 -11.197 -1.024 -1.060 1.00 98.73 C
|
177 |
+
ATOM 120 C GLN A 17 -10.538 -2.328 -0.601 1.00 98.73 C
|
178 |
+
ATOM 121 CB GLN A 17 -11.906 -1.225 -2.410 1.00 98.73 C
|
179 |
+
ATOM 122 O GLN A 17 -11.109 -3.048 0.218 1.00 98.73 O
|
180 |
+
ATOM 123 CG GLN A 17 -12.945 -2.351 -2.329 1.00 98.73 C
|
181 |
+
ATOM 124 CD GLN A 17 -13.650 -2.596 -3.651 1.00 98.73 C
|
182 |
+
ATOM 125 NE2 GLN A 17 -14.808 -2.021 -3.875 1.00 98.73 N
|
183 |
+
ATOM 126 OE1 GLN A 17 -13.223 -3.377 -4.477 1.00 98.73 O
|
184 |
+
ATOM 127 N ARG A 18 -9.336 -2.658 -1.095 1.00 98.71 N
|
185 |
+
ATOM 128 CA ARG A 18 -8.632 -3.882 -0.678 1.00 98.71 C
|
186 |
+
ATOM 129 C ARG A 18 -8.279 -3.869 0.810 1.00 98.71 C
|
187 |
+
ATOM 130 CB ARG A 18 -7.390 -4.108 -1.551 1.00 98.71 C
|
188 |
+
ATOM 131 O ARG A 18 -8.406 -4.905 1.459 1.00 98.71 O
|
189 |
+
ATOM 132 CG ARG A 18 -7.776 -4.559 -2.968 1.00 98.71 C
|
190 |
+
ATOM 133 CD ARG A 18 -6.520 -4.808 -3.803 1.00 98.71 C
|
191 |
+
ATOM 134 NE ARG A 18 -6.854 -5.181 -5.187 1.00 98.71 N
|
192 |
+
ATOM 135 NH1 ARG A 18 -5.187 -6.748 -5.608 1.00 98.71 N
|
193 |
+
ATOM 136 NH2 ARG A 18 -6.615 -6.241 -7.174 1.00 98.71 N
|
194 |
+
ATOM 137 CZ ARG A 18 -6.216 -6.038 -5.965 1.00 98.71 C
|
195 |
+
ATOM 138 N PHE A 19 -7.870 -2.725 1.360 1.00 98.73 N
|
196 |
+
ATOM 139 CA PHE A 19 -7.597 -2.583 2.796 1.00 98.73 C
|
197 |
+
ATOM 140 C PHE A 19 -8.880 -2.583 3.634 1.00 98.73 C
|
198 |
+
ATOM 141 CB PHE A 19 -6.755 -1.330 3.056 1.00 98.73 C
|
199 |
+
ATOM 142 O PHE A 19 -8.931 -3.280 4.648 1.00 98.73 O
|
200 |
+
ATOM 143 CG PHE A 19 -5.275 -1.507 2.775 1.00 98.73 C
|
201 |
+
ATOM 144 CD1 PHE A 19 -4.400 -1.847 3.823 1.00 98.73 C
|
202 |
+
ATOM 145 CD2 PHE A 19 -4.765 -1.338 1.476 1.00 98.73 C
|
203 |
+
ATOM 146 CE1 PHE A 19 -3.031 -2.032 3.568 1.00 98.73 C
|
204 |
+
ATOM 147 CE2 PHE A 19 -3.396 -1.517 1.216 1.00 98.73 C
|
205 |
+
ATOM 148 CZ PHE A 19 -2.531 -1.869 2.267 1.00 98.73 C
|
206 |
+
ATOM 149 N GLU A 20 -9.931 -1.892 3.188 1.00 98.51 N
|
207 |
+
ATOM 150 CA GLU A 20 -11.235 -1.889 3.866 1.00 98.51 C
|
208 |
+
ATOM 151 C GLU A 20 -11.846 -3.295 3.925 1.00 98.51 C
|
209 |
+
ATOM 152 CB GLU A 20 -12.210 -0.956 3.140 1.00 98.51 C
|
210 |
+
ATOM 153 O GLU A 20 -12.225 -3.764 4.995 1.00 98.51 O
|
211 |
+
ATOM 154 CG GLU A 20 -11.842 0.525 3.282 1.00 98.51 C
|
212 |
+
ATOM 155 CD GLU A 20 -12.870 1.415 2.571 1.00 98.51 C
|
213 |
+
ATOM 156 OE1 GLU A 20 -13.269 2.430 3.181 1.00 98.51 O
|
214 |
+
ATOM 157 OE2 GLU A 20 -13.235 1.076 1.419 1.00 98.51 O
|
215 |
+
ATOM 158 N ARG A 21 -11.866 -4.020 2.797 1.00 97.86 N
|
216 |
+
ATOM 159 CA ARG A 21 -12.381 -5.401 2.724 1.00 97.86 C
|
217 |
+
ATOM 160 C ARG A 21 -11.568 -6.390 3.550 1.00 97.86 C
|
218 |
+
ATOM 161 CB ARG A 21 -12.422 -5.885 1.271 1.00 97.86 C
|
219 |
+
ATOM 162 O ARG A 21 -12.092 -7.431 3.935 1.00 97.86 O
|
220 |
+
ATOM 163 CG ARG A 21 -13.579 -5.264 0.485 1.00 97.86 C
|
221 |
+
ATOM 164 CD ARG A 21 -13.613 -5.874 -0.917 1.00 97.86 C
|
222 |
+
ATOM 165 NE ARG A 21 -14.782 -5.408 -1.681 1.00 97.86 N
|
223 |
+
ATOM 166 NH1 ARG A 21 -14.241 -6.417 -3.672 1.00 97.86 N
|
224 |
+
ATOM 167 NH2 ARG A 21 -16.091 -5.183 -3.530 1.00 97.86 N
|
225 |
+
ATOM 168 CZ ARG A 21 -15.032 -5.673 -2.952 1.00 97.86 C
|
226 |
+
ATOM 169 N ARG A 22 -10.295 -6.089 3.818 1.00 97.31 N
|
227 |
+
ATOM 170 CA ARG A 22 -9.465 -6.893 4.720 1.00 97.31 C
|
228 |
+
ATOM 171 C ARG A 22 -9.830 -6.681 6.193 1.00 97.31 C
|
229 |
+
ATOM 172 CB ARG A 22 -7.983 -6.602 4.427 1.00 97.31 C
|
230 |
+
ATOM 173 O ARG A 22 -9.452 -7.515 7.011 1.00 97.31 O
|
231 |
+
ATOM 174 CG ARG A 22 -6.988 -7.512 5.160 1.00 97.31 C
|
232 |
+
ATOM 175 CD ARG A 22 -7.237 -8.994 4.870 1.00 97.31 C
|
233 |
+
ATOM 176 NE ARG A 22 -6.149 -9.835 5.399 1.00 97.31 N
|
234 |
+
ATOM 177 NH1 ARG A 22 -7.252 -11.840 5.225 1.00 97.31 N
|
235 |
+
ATOM 178 NH2 ARG A 22 -5.173 -11.792 6.037 1.00 97.31 N
|
236 |
+
ATOM 179 CZ ARG A 22 -6.197 -11.146 5.551 1.00 97.31 C
|
237 |
+
ATOM 180 N GLY A 23 -10.548 -5.606 6.517 1.00 98.08 N
|
238 |
+
ATOM 181 CA GLY A 23 -10.900 -5.228 7.883 1.00 98.08 C
|
239 |
+
ATOM 182 C GLY A 23 -9.899 -4.278 8.540 1.00 98.08 C
|
240 |
+
ATOM 183 O GLY A 23 -9.909 -4.145 9.756 1.00 98.08 O
|
241 |
+
ATOM 184 N TYR A 24 -9.016 -3.629 7.772 1.00 98.41 N
|
242 |
+
ATOM 185 CA TYR A 24 -8.137 -2.603 8.331 1.00 98.41 C
|
243 |
+
ATOM 186 C TYR A 24 -8.875 -1.279 8.518 1.00 98.41 C
|
244 |
+
ATOM 187 CB TYR A 24 -6.865 -2.442 7.485 1.00 98.41 C
|
245 |
+
ATOM 188 O TYR A 24 -9.672 -0.868 7.673 1.00 98.41 O
|
246 |
+
ATOM 189 CG TYR A 24 -5.918 -3.621 7.588 1.00 98.41 C
|
247 |
+
ATOM 190 CD1 TYR A 24 -5.487 -4.025 8.863 1.00 98.41 C
|
248 |
+
ATOM 191 CD2 TYR A 24 -5.480 -4.319 6.443 1.00 98.41 C
|
249 |
+
ATOM 192 CE1 TYR A 24 -4.679 -5.158 9.017 1.00 98.41 C
|
250 |
+
ATOM 193 CE2 TYR A 24 -4.633 -5.438 6.587 1.00 98.41 C
|
251 |
+
ATOM 194 OH TYR A 24 -3.545 -6.997 8.090 1.00 98.41 O
|
252 |
+
ATOM 195 CZ TYR A 24 -4.271 -5.875 7.880 1.00 98.41 C
|
253 |
+
ATOM 196 N LYS A 25 -8.568 -0.585 9.615 1.00 98.53 N
|
254 |
+
ATOM 197 CA LYS A 25 -9.174 0.698 9.969 1.00 98.53 C
|
255 |
+
ATOM 198 C LYS A 25 -8.412 1.835 9.293 1.00 98.53 C
|
256 |
+
ATOM 199 CB LYS A 25 -9.176 0.810 11.502 1.00 98.53 C
|
257 |
+
ATOM 200 O LYS A 25 -7.224 2.012 9.552 1.00 98.53 O
|
258 |
+
ATOM 201 CG LYS A 25 -10.043 1.974 11.992 1.00 98.53 C
|
259 |
+
ATOM 202 CD LYS A 25 -10.153 2.052 13.520 1.00 98.53 C
|
260 |
+
ATOM 203 CE LYS A 25 -8.808 2.430 14.142 1.00 98.53 C
|
261 |
+
ATOM 204 NZ LYS A 25 -8.819 2.340 15.615 1.00 98.53 N
|
262 |
+
ATOM 205 N LEU A 26 -9.060 2.615 8.430 1.00 98.74 N
|
263 |
+
ATOM 206 CA LEU A 26 -8.453 3.817 7.845 1.00 98.74 C
|
264 |
+
ATOM 207 C LEU A 26 -8.442 4.933 8.894 1.00 98.74 C
|
265 |
+
ATOM 208 CB LEU A 26 -9.241 4.224 6.585 1.00 98.74 C
|
266 |
+
ATOM 209 O LEU A 26 -9.513 5.368 9.307 1.00 98.74 O
|
267 |
+
ATOM 210 CG LEU A 26 -8.692 5.477 5.873 1.00 98.74 C
|
268 |
+
ATOM 211 CD1 LEU A 26 -7.289 5.246 5.306 1.00 98.74 C
|
269 |
+
ATOM 212 CD2 LEU A 26 -9.611 5.856 4.713 1.00 98.74 C
|
270 |
+
ATOM 213 N VAL A 27 -7.266 5.391 9.328 1.00 98.68 N
|
271 |
+
ATOM 214 CA VAL A 27 -7.136 6.418 10.385 1.00 98.68 C
|
272 |
+
ATOM 215 C VAL A 27 -6.636 7.765 9.876 1.00 98.68 C
|
273 |
+
ATOM 216 CB VAL A 27 -6.300 5.932 11.583 1.00 98.68 C
|
274 |
+
ATOM 217 O VAL A 27 -6.801 8.763 10.564 1.00 98.68 O
|
275 |
+
ATOM 218 CG1 VAL A 27 -6.940 4.699 12.224 1.00 98.68 C
|
276 |
+
ATOM 219 CG2 VAL A 27 -4.851 5.600 11.226 1.00 98.68 C
|
277 |
+
ATOM 220 N ALA A 28 -6.071 7.825 8.670 1.00 98.85 N
|
278 |
+
ATOM 221 CA ALA A 28 -5.763 9.084 7.998 1.00 98.85 C
|
279 |
+
ATOM 222 C ALA A 28 -5.690 8.891 6.484 1.00 98.85 C
|
280 |
+
ATOM 223 CB ALA A 28 -4.444 9.647 8.530 1.00 98.85 C
|
281 |
+
ATOM 224 O ALA A 28 -5.229 7.853 6.001 1.00 98.85 O
|
282 |
+
ATOM 225 N ILE A 29 -6.105 9.909 5.734 1.00 98.83 N
|
283 |
+
ATOM 226 CA ILE A 29 -6.009 9.969 4.275 1.00 98.83 C
|
284 |
+
ATOM 227 C ILE A 29 -5.952 11.429 3.827 1.00 98.83 C
|
285 |
+
ATOM 228 CB ILE A 29 -7.182 9.208 3.609 1.00 98.83 C
|
286 |
+
ATOM 229 O ILE A 29 -6.684 12.269 4.341 1.00 98.83 O
|
287 |
+
ATOM 230 CG1 ILE A 29 -7.034 9.219 2.072 1.00 98.83 C
|
288 |
+
ATOM 231 CG2 ILE A 29 -8.568 9.733 4.033 1.00 98.83 C
|
289 |
+
ATOM 232 CD1 ILE A 29 -7.922 8.189 1.364 1.00 98.83 C
|
290 |
+
ATOM 233 N LYS A 30 -5.101 11.733 2.845 1.00 98.71 N
|
291 |
+
ATOM 234 CA LYS A 30 -5.139 13.004 2.109 1.00 98.71 C
|
292 |
+
ATOM 235 C LYS A 30 -4.627 12.813 0.691 1.00 98.71 C
|
293 |
+
ATOM 236 CB LYS A 30 -4.370 14.107 2.859 1.00 98.71 C
|
294 |
+
ATOM 237 O LYS A 30 -3.844 11.903 0.440 1.00 98.71 O
|
295 |
+
ATOM 238 CG LYS A 30 -2.868 13.836 3.051 1.00 98.71 C
|
296 |
+
ATOM 239 CD LYS A 30 -2.253 14.940 3.917 1.00 98.71 C
|
297 |
+
ATOM 240 CE LYS A 30 -0.853 14.573 4.410 1.00 98.71 C
|
298 |
+
ATOM 241 NZ LYS A 30 -0.397 15.535 5.440 1.00 98.71 N
|
299 |
+
ATOM 242 N MET A 31 -5.042 13.684 -0.224 1.00 98.75 N
|
300 |
+
ATOM 243 CA MET A 31 -4.399 13.836 -1.528 1.00 98.75 C
|
301 |
+
ATOM 244 C MET A 31 -3.517 15.080 -1.486 1.00 98.75 C
|
302 |
+
ATOM 245 CB MET A 31 -5.447 13.894 -2.647 1.00 98.75 C
|
303 |
+
ATOM 246 O MET A 31 -3.956 16.134 -1.034 1.00 98.75 O
|
304 |
+
ATOM 247 CG MET A 31 -4.783 14.022 -4.024 1.00 98.75 C
|
305 |
+
ATOM 248 SD MET A 31 -5.921 13.905 -5.428 1.00 98.75 S
|
306 |
+
ATOM 249 CE MET A 31 -6.123 12.108 -5.546 1.00 98.75 C
|
307 |
+
ATOM 250 N MET A 32 -2.272 14.959 -1.935 1.00 98.63 N
|
308 |
+
ATOM 251 CA MET A 32 -1.336 16.081 -1.945 1.00 98.63 C
|
309 |
+
ATOM 252 C MET A 32 -0.298 15.939 -3.051 1.00 98.63 C
|
310 |
+
ATOM 253 CB MET A 32 -0.657 16.222 -0.570 1.00 98.63 C
|
311 |
+
ATOM 254 O MET A 32 0.004 14.833 -3.496 1.00 98.63 O
|
312 |
+
ATOM 255 CG MET A 32 0.242 15.033 -0.200 1.00 98.63 C
|
313 |
+
ATOM 256 SD MET A 32 1.033 15.162 1.426 1.00 98.63 S
|
314 |
+
ATOM 257 CE MET A 32 2.119 16.593 1.174 1.00 98.63 C
|
315 |
+
ATOM 258 N HIS A 33 0.299 17.059 -3.446 1.00 98.59 N
|
316 |
+
ATOM 259 CA HIS A 33 1.522 17.073 -4.241 1.00 98.59 C
|
317 |
+
ATOM 260 C HIS A 33 2.714 17.150 -3.280 1.00 98.59 C
|
318 |
+
ATOM 261 CB HIS A 33 1.457 18.250 -5.221 1.00 98.59 C
|
319 |
+
ATOM 262 O HIS A 33 2.903 18.161 -2.607 1.00 98.59 O
|
320 |
+
ATOM 263 CG HIS A 33 2.368 18.101 -6.406 1.00 98.59 C
|
321 |
+
ATOM 264 CD2 HIS A 33 3.656 17.653 -6.401 1.00 98.59 C
|
322 |
+
ATOM 265 ND1 HIS A 33 1.994 18.398 -7.716 1.00 98.59 N
|
323 |
+
ATOM 266 CE1 HIS A 33 3.068 18.141 -8.472 1.00 98.59 C
|
324 |
+
ATOM 267 NE2 HIS A 33 4.072 17.691 -7.708 1.00 98.59 N
|
325 |
+
ATOM 268 N ALA A 34 3.483 16.067 -3.150 1.00 98.22 N
|
326 |
+
ATOM 269 CA ALA A 34 4.604 16.017 -2.210 1.00 98.22 C
|
327 |
+
ATOM 270 C ALA A 34 5.795 16.849 -2.713 1.00 98.22 C
|
328 |
+
ATOM 271 CB ALA A 34 4.981 14.555 -1.951 1.00 98.22 C
|
329 |
+
ATOM 272 O ALA A 34 6.183 16.740 -3.877 1.00 98.22 O
|
330 |
+
ATOM 273 N SER A 35 6.393 17.657 -1.832 1.00 98.15 N
|
331 |
+
ATOM 274 CA SER A 35 7.605 18.408 -2.167 1.00 98.15 C
|
332 |
+
ATOM 275 C SER A 35 8.804 17.472 -2.324 1.00 98.15 C
|
333 |
+
ATOM 276 CB SER A 35 7.892 19.502 -1.128 1.00 98.15 C
|
334 |
+
ATOM 277 O SER A 35 8.881 16.420 -1.684 1.00 98.15 O
|
335 |
+
ATOM 278 OG SER A 35 8.277 18.958 0.122 1.00 98.15 O
|
336 |
+
ATOM 279 N GLU A 36 9.782 17.869 -3.139 1.00 97.77 N
|
337 |
+
ATOM 280 CA GLU A 36 11.019 17.094 -3.282 1.00 97.77 C
|
338 |
+
ATOM 281 C GLU A 36 11.753 16.941 -1.950 1.00 97.77 C
|
339 |
+
ATOM 282 CB GLU A 36 11.959 17.736 -4.300 1.00 97.77 C
|
340 |
+
ATOM 283 O GLU A 36 12.257 15.860 -1.665 1.00 97.77 O
|
341 |
+
ATOM 284 CG GLU A 36 11.370 17.687 -5.712 1.00 97.77 C
|
342 |
+
ATOM 285 CD GLU A 36 12.391 18.040 -6.798 1.00 97.77 C
|
343 |
+
ATOM 286 OE1 GLU A 36 12.005 17.875 -7.975 1.00 97.77 O
|
344 |
+
ATOM 287 OE2 GLU A 36 13.536 18.408 -6.450 1.00 97.77 O
|
345 |
+
ATOM 288 N GLN A 37 11.751 17.972 -1.098 1.00 98.11 N
|
346 |
+
ATOM 289 CA GLN A 37 12.347 17.905 0.238 1.00 98.11 C
|
347 |
+
ATOM 290 C GLN A 37 11.688 16.816 1.098 1.00 98.11 C
|
348 |
+
ATOM 291 CB GLN A 37 12.236 19.283 0.904 1.00 98.11 C
|
349 |
+
ATOM 292 O GLN A 37 12.388 16.002 1.697 1.00 98.11 O
|
350 |
+
ATOM 293 CG GLN A 37 12.994 19.344 2.240 1.00 98.11 C
|
351 |
+
ATOM 294 CD GLN A 37 12.865 20.697 2.933 1.00 98.11 C
|
352 |
+
ATOM 295 NE2 GLN A 37 13.519 20.887 4.057 1.00 98.11 N
|
353 |
+
ATOM 296 OE1 GLN A 37 12.170 21.598 2.496 1.00 98.11 O
|
354 |
+
ATOM 297 N LEU A 38 10.350 16.750 1.119 1.00 98.42 N
|
355 |
+
ATOM 298 CA LEU A 38 9.618 15.710 1.849 1.00 98.42 C
|
356 |
+
ATOM 299 C LEU A 38 9.969 14.310 1.324 1.00 98.42 C
|
357 |
+
ATOM 300 CB LEU A 38 8.109 16.000 1.732 1.00 98.42 C
|
358 |
+
ATOM 301 O LEU A 38 10.207 13.387 2.102 1.00 98.42 O
|
359 |
+
ATOM 302 CG LEU A 38 7.199 14.957 2.408 1.00 98.42 C
|
360 |
+
ATOM 303 CD1 LEU A 38 7.459 14.870 3.908 1.00 98.42 C
|
361 |
+
ATOM 304 CD2 LEU A 38 5.732 15.331 2.193 1.00 98.42 C
|
362 |
+
ATOM 305 N LEU A 39 10.036 14.157 0.000 1.00 98.43 N
|
363 |
+
ATOM 306 CA LEU A 39 10.369 12.890 -0.652 1.00 98.43 C
|
364 |
+
ATOM 307 C LEU A 39 11.825 12.471 -0.418 1.00 98.43 C
|
365 |
+
ATOM 308 CB LEU A 39 10.074 13.018 -2.151 1.00 98.43 C
|
366 |
+
ATOM 309 O LEU A 39 12.089 11.282 -0.239 1.00 98.43 O
|
367 |
+
ATOM 310 CG LEU A 39 8.572 13.112 -2.468 1.00 98.43 C
|
368 |
+
ATOM 311 CD1 LEU A 39 8.408 13.542 -3.920 1.00 98.43 C
|
369 |
+
ATOM 312 CD2 LEU A 39 7.863 11.774 -2.237 1.00 98.43 C
|
370 |
+
ATOM 313 N GLN A 40 12.756 13.424 -0.380 1.00 98.03 N
|
371 |
+
ATOM 314 CA GLN A 40 14.157 13.171 -0.051 1.00 98.03 C
|
372 |
+
ATOM 315 C GLN A 40 14.307 12.660 1.383 1.00 98.03 C
|
373 |
+
ATOM 316 CB GLN A 40 14.992 14.443 -0.252 1.00 98.03 C
|
374 |
+
ATOM 317 O GLN A 40 15.019 11.681 1.587 1.00 98.03 O
|
375 |
+
ATOM 318 CG GLN A 40 15.274 14.750 -1.730 1.00 98.03 C
|
376 |
+
ATOM 319 CD GLN A 40 15.960 16.100 -1.927 1.00 98.03 C
|
377 |
+
ATOM 320 NE2 GLN A 40 16.176 16.521 -3.153 1.00 98.03 N
|
378 |
+
ATOM 321 OE1 GLN A 40 16.347 16.789 -0.997 1.00 98.03 O
|
379 |
+
ATOM 322 N THR A 41 13.599 13.254 2.349 1.00 98.08 N
|
380 |
+
ATOM 323 CA THR A 41 13.566 12.764 3.737 1.00 98.08 C
|
381 |
+
ATOM 324 C THR A 41 12.902 11.390 3.831 1.00 98.08 C
|
382 |
+
ATOM 325 CB THR A 41 12.818 13.756 4.642 1.00 98.08 C
|
383 |
+
ATOM 326 O THR A 41 13.382 10.494 4.524 1.00 98.08 O
|
384 |
+
ATOM 327 CG2 THR A 41 12.887 13.376 6.120 1.00 98.08 C
|
385 |
+
ATOM 328 OG1 THR A 41 13.393 15.035 4.531 1.00 98.08 O
|
386 |
+
ATOM 329 N HIS A 42 11.803 11.171 3.104 1.00 98.16 N
|
387 |
+
ATOM 330 CA HIS A 42 11.109 9.885 3.115 1.00 98.16 C
|
388 |
+
ATOM 331 C HIS A 42 12.008 8.744 2.611 1.00 98.16 C
|
389 |
+
ATOM 332 CB HIS A 42 9.819 10.002 2.298 1.00 98.16 C
|
390 |
+
ATOM 333 O HIS A 42 12.106 7.706 3.272 1.00 98.16 O
|
391 |
+
ATOM 334 CG HIS A 42 9.057 8.707 2.214 1.00 98.16 C
|
392 |
+
ATOM 335 CD2 HIS A 42 8.955 7.913 1.111 1.00 98.16 C
|
393 |
+
ATOM 336 ND1 HIS A 42 8.364 8.083 3.224 1.00 98.16 N
|
394 |
+
ATOM 337 CE1 HIS A 42 7.836 6.952 2.723 1.00 98.16 C
|
395 |
+
ATOM 338 NE2 HIS A 42 8.157 6.811 1.428 1.00 98.16 N
|
396 |
+
ATOM 339 N TYR A 43 12.699 8.965 1.488 1.00 96.79 N
|
397 |
+
ATOM 340 CA TYR A 43 13.594 8.001 0.842 1.00 96.79 C
|
398 |
+
ATOM 341 C TYR A 43 15.070 8.158 1.236 1.00 96.79 C
|
399 |
+
ATOM 342 CB TYR A 43 13.390 8.049 -0.684 1.00 96.79 C
|
400 |
+
ATOM 343 O TYR A 43 15.939 7.677 0.512 1.00 96.79 O
|
401 |
+
ATOM 344 CG TYR A 43 12.044 7.551 -1.158 1.00 96.79 C
|
402 |
+
ATOM 345 CD1 TYR A 43 11.654 6.230 -0.869 1.00 96.79 C
|
403 |
+
ATOM 346 CD2 TYR A 43 11.185 8.400 -1.880 1.00 96.79 C
|
404 |
+
ATOM 347 CE1 TYR A 43 10.392 5.762 -1.272 1.00 96.79 C
|
405 |
+
ATOM 348 CE2 TYR A 43 9.926 7.930 -2.301 1.00 96.79 C
|
406 |
+
ATOM 349 OH TYR A 43 8.275 6.170 -2.289 1.00 96.79 O
|
407 |
+
ATOM 350 CZ TYR A 43 9.524 6.613 -1.988 1.00 96.79 C
|
408 |
+
ATOM 351 N GLU A 44 15.382 8.795 2.367 1.00 96.28 N
|
409 |
+
ATOM 352 CA GLU A 44 16.760 9.113 2.770 1.00 96.28 C
|
410 |
+
ATOM 353 C GLU A 44 17.688 7.886 2.776 1.00 96.28 C
|
411 |
+
ATOM 354 CB GLU A 44 16.720 9.777 4.151 1.00 96.28 C
|
412 |
+
ATOM 355 O GLU A 44 18.819 7.958 2.295 1.00 96.28 O
|
413 |
+
ATOM 356 CG GLU A 44 18.122 10.147 4.651 1.00 96.28 C
|
414 |
+
ATOM 357 CD GLU A 44 18.090 10.974 5.940 1.00 96.28 C
|
415 |
+
ATOM 358 OE1 GLU A 44 19.077 11.717 6.141 1.00 96.28 O
|
416 |
+
ATOM 359 OE2 GLU A 44 17.092 10.868 6.687 1.00 96.28 O
|
417 |
+
ATOM 360 N ALA A 45 17.179 6.727 3.206 1.00 94.35 N
|
418 |
+
ATOM 361 CA ALA A 45 17.915 5.460 3.198 1.00 94.35 C
|
419 |
+
ATOM 362 C ALA A 45 18.390 5.019 1.795 1.00 94.35 C
|
420 |
+
ATOM 363 CB ALA A 45 17.008 4.395 3.826 1.00 94.35 C
|
421 |
+
ATOM 364 O ALA A 45 19.291 4.193 1.674 1.00 94.35 O
|
422 |
+
ATOM 365 N LEU A 46 17.799 5.562 0.727 1.00 94.78 N
|
423 |
+
ATOM 366 CA LEU A 46 18.123 5.256 -0.667 1.00 94.78 C
|
424 |
+
ATOM 367 C LEU A 46 18.995 6.335 -1.325 1.00 94.78 C
|
425 |
+
ATOM 368 CB LEU A 46 16.821 5.025 -1.460 1.00 94.78 C
|
426 |
+
ATOM 369 O LEU A 46 19.298 6.213 -2.509 1.00 94.78 O
|
427 |
+
ATOM 370 CG LEU A 46 15.841 4.009 -0.846 1.00 94.78 C
|
428 |
+
ATOM 371 CD1 LEU A 46 14.628 3.860 -1.766 1.00 94.78 C
|
429 |
+
ATOM 372 CD2 LEU A 46 16.449 2.620 -0.642 1.00 94.78 C
|
430 |
+
ATOM 373 N LYS A 47 19.407 7.382 -0.596 1.00 95.48 N
|
431 |
+
ATOM 374 CA LYS A 47 20.086 8.569 -1.149 1.00 95.48 C
|
432 |
+
ATOM 375 C LYS A 47 21.388 8.259 -1.895 1.00 95.48 C
|
433 |
+
ATOM 376 CB LYS A 47 20.307 9.563 0.001 1.00 95.48 C
|
434 |
+
ATOM 377 O LYS A 47 21.747 8.987 -2.816 1.00 95.48 O
|
435 |
+
ATOM 378 CG LYS A 47 20.865 10.921 -0.449 1.00 95.48 C
|
436 |
+
ATOM 379 CD LYS A 47 20.980 11.852 0.763 1.00 95.48 C
|
437 |
+
ATOM 380 CE LYS A 47 21.497 13.233 0.352 1.00 95.48 C
|
438 |
+
ATOM 381 NZ LYS A 47 21.596 14.124 1.536 1.00 95.48 N
|
439 |
+
ATOM 382 N SER A 48 22.088 7.188 -1.521 1.00 96.25 N
|
440 |
+
ATOM 383 CA SER A 48 23.331 6.747 -2.171 1.00 96.25 C
|
441 |
+
ATOM 384 C SER A 48 23.110 5.939 -3.457 1.00 96.25 C
|
442 |
+
ATOM 385 CB SER A 48 24.163 5.930 -1.178 1.00 96.25 C
|
443 |
+
ATOM 386 O SER A 48 24.069 5.671 -4.180 1.00 96.25 O
|
444 |
+
ATOM 387 OG SER A 48 23.413 4.818 -0.725 1.00 96.25 O
|
445 |
+
ATOM 388 N LEU A 49 21.872 5.538 -3.765 1.00 96.06 N
|
446 |
+
ATOM 389 CA LEU A 49 21.566 4.711 -4.930 1.00 96.06 C
|
447 |
+
ATOM 390 C LEU A 49 21.432 5.567 -6.193 1.00 96.06 C
|
448 |
+
ATOM 391 CB LEU A 49 20.312 3.855 -4.668 1.00 96.06 C
|
449 |
+
ATOM 392 O LEU A 49 20.797 6.620 -6.196 1.00 96.06 O
|
450 |
+
ATOM 393 CG LEU A 49 20.417 2.948 -3.425 1.00 96.06 C
|
451 |
+
ATOM 394 CD1 LEU A 49 19.131 2.150 -3.238 1.00 96.06 C
|
452 |
+
ATOM 395 CD2 LEU A 49 21.563 1.939 -3.537 1.00 96.06 C
|
453 |
+
ATOM 396 N SER A 50 21.966 5.073 -7.312 1.00 96.40 N
|
454 |
+
ATOM 397 CA SER A 50 22.013 5.803 -8.592 1.00 96.40 C
|
455 |
+
ATOM 398 C SER A 50 20.639 6.202 -9.146 1.00 96.40 C
|
456 |
+
ATOM 399 CB SER A 50 22.749 4.951 -9.630 1.00 96.40 C
|
457 |
+
ATOM 400 O SER A 50 20.527 7.160 -9.910 1.00 96.40 O
|
458 |
+
ATOM 401 OG SER A 50 22.095 3.704 -9.805 1.00 96.40 O
|
459 |
+
ATOM 402 N PHE A 51 19.574 5.495 -8.759 1.00 94.42 N
|
460 |
+
ATOM 403 CA PHE A 51 18.205 5.800 -9.171 1.00 94.42 C
|
461 |
+
ATOM 404 C PHE A 51 17.486 6.806 -8.259 1.00 94.42 C
|
462 |
+
ATOM 405 CB PHE A 51 17.416 4.494 -9.323 1.00 94.42 C
|
463 |
+
ATOM 406 O PHE A 51 16.370 7.206 -8.587 1.00 94.42 O
|
464 |
+
ATOM 407 CG PHE A 51 17.294 3.676 -8.053 1.00 94.42 C
|
465 |
+
ATOM 408 CD1 PHE A 51 18.103 2.538 -7.867 1.00 94.42 C
|
466 |
+
ATOM 409 CD2 PHE A 51 16.363 4.042 -7.061 1.00 94.42 C
|
467 |
+
ATOM 410 CE1 PHE A 51 17.957 1.754 -6.711 1.00 94.42 C
|
468 |
+
ATOM 411 CE2 PHE A 51 16.229 3.266 -5.898 1.00 94.42 C
|
469 |
+
ATOM 412 CZ PHE A 51 17.010 2.111 -5.735 1.00 94.42 C
|
470 |
+
ATOM 413 N PHE A 52 18.086 7.235 -7.144 1.00 96.82 N
|
471 |
+
ATOM 414 CA PHE A 52 17.443 8.126 -6.175 1.00 96.82 C
|
472 |
+
ATOM 415 C PHE A 52 16.937 9.448 -6.780 1.00 96.82 C
|
473 |
+
ATOM 416 CB PHE A 52 18.379 8.356 -4.986 1.00 96.82 C
|
474 |
+
ATOM 417 O PHE A 52 15.767 9.768 -6.559 1.00 96.82 O
|
475 |
+
ATOM 418 CG PHE A 52 17.825 9.302 -3.945 1.00 96.82 C
|
476 |
+
ATOM 419 CD1 PHE A 52 18.251 10.644 -3.904 1.00 96.82 C
|
477 |
+
ATOM 420 CD2 PHE A 52 16.897 8.833 -3.000 1.00 96.82 C
|
478 |
+
ATOM 421 CE1 PHE A 52 17.767 11.508 -2.907 1.00 96.82 C
|
479 |
+
ATOM 422 CE2 PHE A 52 16.424 9.699 -2.002 1.00 96.82 C
|
480 |
+
ATOM 423 CZ PHE A 52 16.865 11.028 -1.945 1.00 96.82 C
|
481 |
+
ATOM 424 N PRO A 53 17.703 10.174 -7.625 1.00 97.37 N
|
482 |
+
ATOM 425 CA PRO A 53 17.192 11.393 -8.255 1.00 97.37 C
|
483 |
+
ATOM 426 C PRO A 53 15.956 11.129 -9.123 1.00 97.37 C
|
484 |
+
ATOM 427 CB PRO A 53 18.354 11.950 -9.087 1.00 97.37 C
|
485 |
+
ATOM 428 O PRO A 53 14.989 11.886 -9.088 1.00 97.37 O
|
486 |
+
ATOM 429 CG PRO A 53 19.593 11.366 -8.412 1.00 97.37 C
|
487 |
+
ATOM 430 CD PRO A 53 19.111 9.992 -7.962 1.00 97.37 C
|
488 |
+
ATOM 431 N LYS A 54 15.946 10.006 -9.856 1.00 97.30 N
|
489 |
+
ATOM 432 CA LYS A 54 14.801 9.597 -10.682 1.00 97.30 C
|
490 |
+
ATOM 433 C LYS A 54 13.595 9.206 -9.830 1.00 97.30 C
|
491 |
+
ATOM 434 CB LYS A 54 15.181 8.444 -11.623 1.00 97.30 C
|
492 |
+
ATOM 435 O LYS A 54 12.471 9.469 -10.240 1.00 97.30 O
|
493 |
+
ATOM 436 CG LYS A 54 16.218 8.849 -12.681 1.00 97.30 C
|
494 |
+
ATOM 437 CD LYS A 54 16.484 7.686 -13.648 1.00 97.30 C
|
495 |
+
ATOM 438 CE LYS A 54 17.513 8.104 -14.706 1.00 97.30 C
|
496 |
+
ATOM 439 NZ LYS A 54 17.801 7.009 -15.668 1.00 97.30 N
|
497 |
+
ATOM 440 N LEU A 55 13.812 8.591 -8.665 1.00 97.30 N
|
498 |
+
ATOM 441 CA LEU A 55 12.749 8.249 -7.718 1.00 97.30 C
|
499 |
+
ATOM 442 C LEU A 55 12.082 9.508 -7.156 1.00 97.30 C
|
500 |
+
ATOM 443 CB LEU A 55 13.324 7.362 -6.598 1.00 97.30 C
|
501 |
+
ATOM 444 O LEU A 55 10.855 9.581 -7.160 1.00 97.30 O
|
502 |
+
ATOM 445 CG LEU A 55 12.335 7.044 -5.459 1.00 97.30 C
|
503 |
+
ATOM 446 CD1 LEU A 55 11.116 6.257 -5.949 1.00 97.30 C
|
504 |
+
ATOM 447 CD2 LEU A 55 13.051 6.214 -4.396 1.00 97.30 C
|
505 |
+
ATOM 448 N VAL A 56 12.874 10.490 -6.713 1.00 98.14 N
|
506 |
+
ATOM 449 CA VAL A 56 12.349 11.763 -6.196 1.00 98.14 C
|
507 |
+
ATOM 450 C VAL A 56 11.582 12.500 -7.292 1.00 98.14 C
|
508 |
+
ATOM 451 CB VAL A 56 13.463 12.637 -5.587 1.00 98.14 C
|
509 |
+
ATOM 452 O VAL A 56 10.415 12.804 -7.078 1.00 98.14 O
|
510 |
+
ATOM 453 CG1 VAL A 56 12.940 14.012 -5.150 1.00 98.14 C
|
511 |
+
ATOM 454 CG2 VAL A 56 14.064 11.956 -4.349 1.00 98.14 C
|
512 |
+
ATOM 455 N ALA A 57 12.175 12.671 -8.480 1.00 98.01 N
|
513 |
+
ATOM 456 CA ALA A 57 11.528 13.335 -9.616 1.00 98.01 C
|
514 |
+
ATOM 457 C ALA A 57 10.234 12.631 -10.062 1.00 98.01 C
|
515 |
+
ATOM 458 CB ALA A 57 12.535 13.390 -10.771 1.00 98.01 C
|
516 |
+
ATOM 459 O ALA A 57 9.245 13.266 -10.414 1.00 98.01 O
|
517 |
+
ATOM 460 N TYR A 58 10.216 11.296 -10.040 1.00 98.32 N
|
518 |
+
ATOM 461 CA TYR A 58 9.013 10.526 -10.328 1.00 98.32 C
|
519 |
+
ATOM 462 C TYR A 58 7.919 10.777 -9.286 1.00 98.32 C
|
520 |
+
ATOM 463 CB TYR A 58 9.369 9.038 -10.387 1.00 98.32 C
|
521 |
+
ATOM 464 O TYR A 58 6.773 11.041 -9.640 1.00 98.32 O
|
522 |
+
ATOM 465 CG TYR A 58 8.156 8.143 -10.327 1.00 98.32 C
|
523 |
+
ATOM 466 CD1 TYR A 58 7.957 7.298 -9.216 1.00 98.32 C
|
524 |
+
ATOM 467 CD2 TYR A 58 7.183 8.241 -11.336 1.00 98.32 C
|
525 |
+
ATOM 468 CE1 TYR A 58 6.784 6.529 -9.125 1.00 98.32 C
|
526 |
+
ATOM 469 CE2 TYR A 58 6.017 7.472 -11.250 1.00 98.32 C
|
527 |
+
ATOM 470 OH TYR A 58 4.693 5.898 -10.139 1.00 98.32 O
|
528 |
+
ATOM 471 CZ TYR A 58 5.825 6.619 -10.152 1.00 98.32 C
|
529 |
+
ATOM 472 N MET A 59 8.261 10.705 -8.003 1.00 98.50 N
|
530 |
+
ATOM 473 CA MET A 59 7.293 10.863 -6.921 1.00 98.50 C
|
531 |
+
ATOM 474 C MET A 59 6.785 12.304 -6.789 1.00 98.50 C
|
532 |
+
ATOM 475 CB MET A 59 7.924 10.361 -5.622 1.00 98.50 C
|
533 |
+
ATOM 476 O MET A 59 5.641 12.489 -6.384 1.00 98.50 O
|
534 |
+
ATOM 477 CG MET A 59 8.056 8.831 -5.610 1.00 98.50 C
|
535 |
+
ATOM 478 SD MET A 59 6.512 7.871 -5.554 1.00 98.50 S
|
536 |
+
ATOM 479 CE MET A 59 5.785 8.512 -4.021 1.00 98.50 C
|
537 |
+
ATOM 480 N SER A 60 7.589 13.302 -7.172 1.00 98.09 N
|
538 |
+
ATOM 481 CA SER A 60 7.193 14.714 -7.243 1.00 98.09 C
|
539 |
+
ATOM 482 C SER A 60 6.512 15.089 -8.564 1.00 98.09 C
|
540 |
+
ATOM 483 CB SER A 60 8.396 15.638 -6.995 1.00 98.09 C
|
541 |
+
ATOM 484 O SER A 60 6.116 16.238 -8.729 1.00 98.09 O
|
542 |
+
ATOM 485 OG SER A 60 9.364 15.504 -8.018 1.00 98.09 O
|
543 |
+
ATOM 486 N SER A 61 6.335 14.159 -9.511 1.00 98.35 N
|
544 |
+
ATOM 487 CA SER A 61 5.731 14.467 -10.821 1.00 98.35 C
|
545 |
+
ATOM 488 C SER A 61 4.209 14.650 -10.795 1.00 98.35 C
|
546 |
+
ATOM 489 CB SER A 61 6.106 13.413 -11.867 1.00 98.35 C
|
547 |
+
ATOM 490 O SER A 61 3.630 15.110 -11.777 1.00 98.35 O
|
548 |
+
ATOM 491 OG SER A 61 5.484 12.163 -11.617 1.00 98.35 O
|
549 |
+
ATOM 492 N GLY A 62 3.539 14.297 -9.697 1.00 98.47 N
|
550 |
+
ATOM 493 CA GLY A 62 2.088 14.388 -9.599 1.00 98.47 C
|
551 |
+
ATOM 494 C GLY A 62 1.555 14.125 -8.191 1.00 98.47 C
|
552 |
+
ATOM 495 O GLY A 62 2.334 13.942 -7.252 1.00 98.47 O
|
553 |
+
ATOM 496 N PRO A 63 0.221 14.105 -8.032 1.00 98.80 N
|
554 |
+
ATOM 497 CA PRO A 63 -0.407 13.896 -6.739 1.00 98.80 C
|
555 |
+
ATOM 498 C PRO A 63 -0.174 12.476 -6.220 1.00 98.80 C
|
556 |
+
ATOM 499 CB PRO A 63 -1.894 14.192 -6.947 1.00 98.80 C
|
557 |
+
ATOM 500 O PRO A 63 -0.158 11.501 -6.970 1.00 98.80 O
|
558 |
+
ATOM 501 CG PRO A 63 -2.105 13.840 -8.419 1.00 98.80 C
|
559 |
+
ATOM 502 CD PRO A 63 -0.791 14.266 -9.069 1.00 98.80 C
|
560 |
+
ATOM 503 N VAL A 64 -0.066 12.366 -4.902 1.00 98.88 N
|
561 |
+
ATOM 504 CA VAL A 64 -0.026 11.110 -4.155 1.00 98.88 C
|
562 |
+
ATOM 505 C VAL A 64 -1.173 11.062 -3.155 1.00 98.88 C
|
563 |
+
ATOM 506 CB VAL A 64 1.329 10.907 -3.451 1.00 98.88 C
|
564 |
+
ATOM 507 O VAL A 64 -1.696 12.098 -2.738 1.00 98.88 O
|
565 |
+
ATOM 508 CG1 VAL A 64 2.441 10.691 -4.479 1.00 98.88 C
|
566 |
+
ATOM 509 CG2 VAL A 64 1.738 12.067 -2.534 1.00 98.88 C
|
567 |
+
ATOM 510 N VAL A 65 -1.542 9.849 -2.742 1.00 98.92 N
|
568 |
+
ATOM 511 CA VAL A 65 -2.523 9.620 -1.671 1.00 98.92 C
|
569 |
+
ATOM 512 C VAL A 65 -1.841 8.869 -0.525 1.00 98.92 C
|
570 |
+
ATOM 513 CB VAL A 65 -3.807 8.935 -2.180 1.00 98.92 C
|
571 |
+
ATOM 514 O VAL A 65 -1.855 7.628 -0.492 1.00 98.92 O
|
572 |
+
ATOM 515 CG1 VAL A 65 -4.889 8.893 -1.098 1.00 98.92 C
|
573 |
+
ATOM 516 CG2 VAL A 65 -4.396 9.677 -3.387 1.00 98.92 C
|
574 |
+
ATOM 517 N PRO A 66 -1.135 9.571 0.381 1.00 98.83 N
|
575 |
+
ATOM 518 CA PRO A 66 -0.705 8.991 1.643 1.00 98.83 C
|
576 |
+
ATOM 519 C PRO A 66 -1.928 8.611 2.485 1.00 98.83 C
|
577 |
+
ATOM 520 CB PRO A 66 0.186 10.044 2.307 1.00 98.83 C
|
578 |
+
ATOM 521 O PRO A 66 -2.954 9.290 2.486 1.00 98.83 O
|
579 |
+
ATOM 522 CG PRO A 66 -0.382 11.351 1.763 1.00 98.83 C
|
580 |
+
ATOM 523 CD PRO A 66 -0.742 10.974 0.331 1.00 98.83 C
|
581 |
+
ATOM 524 N MET A 67 -1.808 7.487 3.185 1.00 98.88 N
|
582 |
+
ATOM 525 CA MET A 67 -2.851 6.911 4.032 1.00 98.88 C
|
583 |
+
ATOM 526 C MET A 67 -2.195 6.206 5.210 1.00 98.88 C
|
584 |
+
ATOM 527 CB MET A 67 -3.686 5.876 3.265 1.00 98.88 C
|
585 |
+
ATOM 528 O MET A 67 -1.088 5.679 5.057 1.00 98.88 O
|
586 |
+
ATOM 529 CG MET A 67 -4.579 6.503 2.200 1.00 98.88 C
|
587 |
+
ATOM 530 SD MET A 67 -5.417 5.303 1.143 1.00 98.88 S
|
588 |
+
ATOM 531 CE MET A 67 -4.063 4.791 0.070 1.00 98.88 C
|
589 |
+
ATOM 532 N VAL A 68 -2.907 6.140 6.329 1.00 98.88 N
|
590 |
+
ATOM 533 CA VAL A 68 -2.532 5.350 7.503 1.00 98.88 C
|
591 |
+
ATOM 534 C VAL A 68 -3.652 4.356 7.787 1.00 98.88 C
|
592 |
+
ATOM 535 CB VAL A 68 -2.235 6.245 8.719 1.00 98.88 C
|
593 |
+
ATOM 536 O VAL A 68 -4.807 4.749 7.951 1.00 98.88 O
|
594 |
+
ATOM 537 CG1 VAL A 68 -1.684 5.415 9.885 1.00 98.88 C
|
595 |
+
ATOM 538 CG2 VAL A 68 -1.198 7.323 8.382 1.00 98.88 C
|
596 |
+
ATOM 539 N PHE A 69 -3.300 3.072 7.827 1.00 98.81 N
|
597 |
+
ATOM 540 CA PHE A 69 -4.197 1.982 8.201 1.00 98.81 C
|
598 |
+
ATOM 541 C PHE A 69 -3.748 1.377 9.530 1.00 98.81 C
|
599 |
+
ATOM 542 CB PHE A 69 -4.250 0.912 7.101 1.00 98.81 C
|
600 |
+
ATOM 543 O PHE A 69 -2.555 1.165 9.746 1.00 98.81 O
|
601 |
+
ATOM 544 CG PHE A 69 -5.010 1.325 5.857 1.00 98.81 C
|
602 |
+
ATOM 545 CD1 PHE A 69 -6.415 1.236 5.829 1.00 98.81 C
|
603 |
+
ATOM 546 CD2 PHE A 69 -4.317 1.788 4.722 1.00 98.81 C
|
604 |
+
ATOM 547 CE1 PHE A 69 -7.122 1.605 4.672 1.00 98.81 C
|
605 |
+
ATOM 548 CE2 PHE A 69 -5.025 2.158 3.564 1.00 98.81 C
|
606 |
+
ATOM 549 CZ PHE A 69 -6.428 2.065 3.539 1.00 98.81 C
|
607 |
+
ATOM 550 N GLU A 70 -4.706 1.067 10.394 1.00 98.56 N
|
608 |
+
ATOM 551 CA GLU A 70 -4.506 0.444 11.699 1.00 98.56 C
|
609 |
+
ATOM 552 C GLU A 70 -5.092 -0.978 11.702 1.00 98.56 C
|
610 |
+
ATOM 553 CB GLU A 70 -5.126 1.347 12.778 1.00 98.56 C
|
611 |
+
ATOM 554 O GLU A 70 -6.113 -1.254 11.068 1.00 98.56 O
|
612 |
+
ATOM 555 CG GLU A 70 -4.853 0.896 14.222 1.00 98.56 C
|
613 |
+
ATOM 556 CD GLU A 70 -5.738 1.662 15.215 1.00 98.56 C
|
614 |
+
ATOM 557 OE1 GLU A 70 -6.487 1.014 15.976 1.00 98.56 O
|
615 |
+
ATOM 558 OE2 GLU A 70 -5.805 2.906 15.153 1.00 98.56 O
|
616 |
+
ATOM 559 N GLY A 71 -4.431 -1.893 12.413 1.00 97.58 N
|
617 |
+
ATOM 560 CA GLY A 71 -4.939 -3.235 12.695 1.00 97.58 C
|
618 |
+
ATOM 561 C GLY A 71 -3.834 -4.248 12.999 1.00 97.58 C
|
619 |
+
ATOM 562 O GLY A 71 -2.636 -3.974 12.861 1.00 97.58 O
|
620 |
+
ATOM 563 N ARG A 72 -4.233 -5.460 13.403 1.00 96.75 N
|
621 |
+
ATOM 564 CA ARG A 72 -3.297 -6.541 13.759 1.00 96.75 C
|
622 |
+
ATOM 565 C ARG A 72 -2.372 -6.869 12.581 1.00 96.75 C
|
623 |
+
ATOM 566 CB ARG A 72 -4.080 -7.772 14.252 1.00 96.75 C
|
624 |
+
ATOM 567 O ARG A 72 -2.832 -7.195 11.484 1.00 96.75 O
|
625 |
+
ATOM 568 CG ARG A 72 -3.176 -8.881 14.825 1.00 96.75 C
|
626 |
+
ATOM 569 CD ARG A 72 -4.024 -10.042 15.372 1.00 96.75 C
|
627 |
+
ATOM 570 NE ARG A 72 -3.206 -11.121 15.971 1.00 96.75 N
|
628 |
+
ATOM 571 NH1 ARG A 72 -4.932 -12.602 16.416 1.00 96.75 N
|
629 |
+
ATOM 572 NH2 ARG A 72 -2.860 -13.136 16.998 1.00 96.75 N
|
630 |
+
ATOM 573 CZ ARG A 72 -3.669 -12.273 16.450 1.00 96.75 C
|
631 |
+
ATOM 574 N LYS A 73 -1.056 -6.799 12.828 1.00 97.45 N
|
632 |
+
ATOM 575 CA LYS A 73 0.005 -7.067 11.838 1.00 97.45 C
|
633 |
+
ATOM 576 C LYS A 73 -0.148 -6.273 10.526 1.00 97.45 C
|
634 |
+
ATOM 577 CB LYS A 73 0.118 -8.582 11.575 1.00 97.45 C
|
635 |
+
ATOM 578 O LYS A 73 0.269 -6.749 9.474 1.00 97.45 O
|
636 |
+
ATOM 579 CG LYS A 73 0.577 -9.417 12.777 1.00 97.45 C
|
637 |
+
ATOM 580 CD LYS A 73 0.674 -10.886 12.335 1.00 97.45 C
|
638 |
+
ATOM 581 CE LYS A 73 1.676 -11.683 13.176 1.00 97.45 C
|
639 |
+
ATOM 582 NZ LYS A 73 2.590 -12.436 12.280 1.00 97.45 N
|
640 |
+
ATOM 583 N VAL A 74 -0.742 -5.075 10.560 1.00 98.54 N
|
641 |
+
ATOM 584 CA VAL A 74 -1.056 -4.299 9.343 1.00 98.54 C
|
642 |
+
ATOM 585 C VAL A 74 0.172 -3.983 8.482 1.00 98.54 C
|
643 |
+
ATOM 586 CB VAL A 74 -1.846 -3.024 9.698 1.00 98.54 C
|
644 |
+
ATOM 587 O VAL A 74 0.063 -3.975 7.261 1.00 98.54 O
|
645 |
+
ATOM 588 CG1 VAL A 74 -1.028 -2.022 10.524 1.00 98.54 C
|
646 |
+
ATOM 589 CG2 VAL A 74 -2.391 -2.320 8.451 1.00 98.54 C
|
647 |
+
ATOM 590 N VAL A 75 1.355 -3.804 9.080 1.00 98.61 N
|
648 |
+
ATOM 591 CA VAL A 75 2.601 -3.582 8.325 1.00 98.61 C
|
649 |
+
ATOM 592 C VAL A 75 2.995 -4.830 7.525 1.00 98.61 C
|
650 |
+
ATOM 593 CB VAL A 75 3.744 -3.131 9.253 1.00 98.61 C
|
651 |
+
ATOM 594 O VAL A 75 3.136 -4.760 6.306 1.00 98.61 O
|
652 |
+
ATOM 595 CG1 VAL A 75 5.029 -2.893 8.459 1.00 98.61 C
|
653 |
+
ATOM 596 CG2 VAL A 75 3.393 -1.831 9.990 1.00 98.61 C
|
654 |
+
ATOM 597 N GLU A 76 3.111 -5.980 8.197 1.00 98.31 N
|
655 |
+
ATOM 598 CA GLU A 76 3.481 -7.269 7.588 1.00 98.31 C
|
656 |
+
ATOM 599 C GLU A 76 2.452 -7.696 6.528 1.00 98.31 C
|
657 |
+
ATOM 600 CB GLU A 76 3.598 -8.323 8.712 1.00 98.31 C
|
658 |
+
ATOM 601 O GLU A 76 2.778 -7.913 5.362 1.00 98.31 O
|
659 |
+
ATOM 602 CG GLU A 76 4.072 -9.709 8.239 1.00 98.31 C
|
660 |
+
ATOM 603 CD GLU A 76 4.005 -10.798 9.336 1.00 98.31 C
|
661 |
+
ATOM 604 OE1 GLU A 76 4.273 -11.974 9.020 1.00 98.31 O
|
662 |
+
ATOM 605 OE2 GLU A 76 3.617 -10.521 10.503 1.00 98.31 O
|
663 |
+
ATOM 606 N ASN A 77 1.176 -7.741 6.905 1.00 98.23 N
|
664 |
+
ATOM 607 CA ASN A 77 0.106 -8.178 6.013 1.00 98.23 C
|
665 |
+
ATOM 608 C ASN A 77 -0.172 -7.166 4.897 1.00 98.23 C
|
666 |
+
ATOM 609 CB ASN A 77 -1.150 -8.392 6.852 1.00 98.23 C
|
667 |
+
ATOM 610 O ASN A 77 -0.473 -7.566 3.774 1.00 98.23 O
|
668 |
+
ATOM 611 CG ASN A 77 -1.119 -9.628 7.726 1.00 98.23 C
|
669 |
+
ATOM 612 ND2 ASN A 77 -2.050 -9.737 8.642 1.00 98.23 N
|
670 |
+
ATOM 613 OD1 ASN A 77 -0.321 -10.532 7.579 1.00 98.23 O
|
671 |
+
ATOM 614 N GLY A 78 -0.038 -5.867 5.173 1.00 98.37 N
|
672 |
+
ATOM 615 CA GLY A 78 -0.117 -4.826 4.156 1.00 98.37 C
|
673 |
+
ATOM 616 C GLY A 78 0.964 -5.032 3.100 1.00 98.37 C
|
674 |
+
ATOM 617 O GLY A 78 0.663 -5.071 1.910 1.00 98.37 O
|
675 |
+
ATOM 618 N ARG A 79 2.214 -5.277 3.513 1.00 98.35 N
|
676 |
+
ATOM 619 CA ARG A 79 3.321 -5.614 2.604 1.00 98.35 C
|
677 |
+
ATOM 620 C ARG A 79 3.009 -6.823 1.721 1.00 98.35 C
|
678 |
+
ATOM 621 CB ARG A 79 4.601 -5.827 3.436 1.00 98.35 C
|
679 |
+
ATOM 622 O ARG A 79 3.305 -6.757 0.525 1.00 98.35 O
|
680 |
+
ATOM 623 CG ARG A 79 5.267 -4.488 3.811 1.00 98.35 C
|
681 |
+
ATOM 624 CD ARG A 79 6.356 -4.033 2.826 1.00 98.35 C
|
682 |
+
ATOM 625 NE ARG A 79 6.055 -4.430 1.436 1.00 98.35 N
|
683 |
+
ATOM 626 NH1 ARG A 79 6.102 -2.374 0.422 1.00 98.35 N
|
684 |
+
ATOM 627 NH2 ARG A 79 5.437 -4.203 -0.709 1.00 98.35 N
|
685 |
+
ATOM 628 CZ ARG A 79 5.881 -3.660 0.390 1.00 98.35 C
|
686 |
+
ATOM 629 N THR A 80 2.368 -7.855 2.270 1.00 98.32 N
|
687 |
+
ATOM 630 CA THR A 80 1.891 -9.018 1.504 1.00 98.32 C
|
688 |
+
ATOM 631 C THR A 80 0.787 -8.645 0.509 1.00 98.32 C
|
689 |
+
ATOM 632 CB THR A 80 1.444 -10.147 2.447 1.00 98.32 C
|
690 |
+
ATOM 633 O THR A 80 0.877 -9.018 -0.658 1.00 98.32 O
|
691 |
+
ATOM 634 CG2 THR A 80 0.922 -11.379 1.710 1.00 98.32 C
|
692 |
+
ATOM 635 OG1 THR A 80 2.561 -10.575 3.183 1.00 98.32 O
|
693 |
+
ATOM 636 N MET A 81 -0.210 -7.845 0.906 1.00 98.59 N
|
694 |
+
ATOM 637 CA MET A 81 -1.279 -7.374 0.005 1.00 98.59 C
|
695 |
+
ATOM 638 C MET A 81 -0.752 -6.514 -1.148 1.00 98.59 C
|
696 |
+
ATOM 639 CB MET A 81 -2.297 -6.540 0.790 1.00 98.59 C
|
697 |
+
ATOM 640 O MET A 81 -1.277 -6.559 -2.261 1.00 98.59 O
|
698 |
+
ATOM 641 CG MET A 81 -3.141 -7.380 1.747 1.00 98.59 C
|
699 |
+
ATOM 642 SD MET A 81 -4.180 -6.388 2.846 1.00 98.59 S
|
700 |
+
ATOM 643 CE MET A 81 -5.247 -5.558 1.641 1.00 98.59 C
|
701 |
+
ATOM 644 N LEU A 82 0.294 -5.725 -0.897 1.00 98.63 N
|
702 |
+
ATOM 645 CA LEU A 82 0.945 -4.914 -1.921 1.00 98.63 C
|
703 |
+
ATOM 646 C LEU A 82 1.639 -5.777 -2.985 1.00 98.63 C
|
704 |
+
ATOM 647 CB LEU A 82 1.958 -3.962 -1.257 1.00 98.63 C
|
705 |
+
ATOM 648 O LEU A 82 1.680 -5.376 -4.147 1.00 98.63 O
|
706 |
+
ATOM 649 CG LEU A 82 1.376 -2.603 -0.840 1.00 98.63 C
|
707 |
+
ATOM 650 CD1 LEU A 82 0.385 -2.612 0.311 1.00 98.63 C
|
708 |
+
ATOM 651 CD2 LEU A 82 2.513 -1.661 -0.438 1.00 98.63 C
|
709 |
+
ATOM 652 N GLY A 83 2.194 -6.927 -2.596 1.00 98.45 N
|
710 |
+
ATOM 653 CA GLY A 83 3.070 -7.744 -3.435 1.00 98.45 C
|
711 |
+
ATOM 654 C GLY A 83 4.537 -7.299 -3.386 1.00 98.45 C
|
712 |
+
ATOM 655 O GLY A 83 4.900 -6.304 -2.739 1.00 98.45 O
|
713 |
+
ATOM 656 N ALA A 84 5.401 -8.046 -4.075 1.00 97.59 N
|
714 |
+
ATOM 657 CA ALA A 84 6.837 -7.781 -4.154 1.00 97.59 C
|
715 |
+
ATOM 658 C ALA A 84 7.137 -6.394 -4.754 1.00 97.59 C
|
716 |
+
ATOM 659 CB ALA A 84 7.499 -8.902 -4.965 1.00 97.59 C
|
717 |
+
ATOM 660 O ALA A 84 6.314 -5.795 -5.441 1.00 97.59 O
|
718 |
+
ATOM 661 N THR A 85 8.313 -5.818 -4.480 1.00 94.66 N
|
719 |
+
ATOM 662 CA THR A 85 8.682 -4.493 -5.031 1.00 94.66 C
|
720 |
+
ATOM 663 C THR A 85 8.776 -4.501 -6.553 1.00 94.66 C
|
721 |
+
ATOM 664 CB THR A 85 9.962 -3.960 -4.381 1.00 94.66 C
|
722 |
+
ATOM 665 O THR A 85 8.341 -3.534 -7.184 1.00 94.66 O
|
723 |
+
ATOM 666 CG2 THR A 85 10.404 -2.602 -4.926 1.00 94.66 C
|
724 |
+
ATOM 667 OG1 THR A 85 9.680 -3.775 -3.013 1.00 94.66 O
|
725 |
+
ATOM 668 N LYS A 86 9.291 -5.596 -7.120 1.00 95.47 N
|
726 |
+
ATOM 669 CA LYS A 86 9.256 -5.879 -8.553 1.00 95.47 C
|
727 |
+
ATOM 670 C LYS A 86 7.914 -6.537 -8.907 1.00 95.47 C
|
728 |
+
ATOM 671 CB LYS A 86 10.413 -6.803 -8.940 1.00 95.47 C
|
729 |
+
ATOM 672 O LYS A 86 7.621 -7.596 -8.344 1.00 95.47 O
|
730 |
+
ATOM 673 CG LYS A 86 11.791 -6.153 -8.748 1.00 95.47 C
|
731 |
+
ATOM 674 CD LYS A 86 12.847 -7.130 -9.264 1.00 95.47 C
|
732 |
+
ATOM 675 CE LYS A 86 14.257 -6.545 -9.284 1.00 95.47 C
|
733 |
+
ATOM 676 NZ LYS A 86 15.132 -7.444 -10.075 1.00 95.47 N
|
734 |
+
ATOM 677 N PRO A 87 7.105 -5.943 -9.798 1.00 94.78 N
|
735 |
+
ATOM 678 CA PRO A 87 5.823 -6.521 -10.191 1.00 94.78 C
|
736 |
+
ATOM 679 C PRO A 87 5.938 -7.935 -10.763 1.00 94.78 C
|
737 |
+
ATOM 680 CB PRO A 87 5.234 -5.554 -11.212 1.00 94.78 C
|
738 |
+
ATOM 681 O PRO A 87 5.119 -8.775 -10.419 1.00 94.78 O
|
739 |
+
ATOM 682 CG PRO A 87 5.836 -4.214 -10.798 1.00 94.78 C
|
740 |
+
ATOM 683 CD PRO A 87 7.245 -4.613 -10.375 1.00 94.78 C
|
741 |
+
ATOM 684 N GLU A 88 7.007 -8.240 -11.507 1.00 95.24 N
|
742 |
+
ATOM 685 CA GLU A 88 7.259 -9.577 -12.072 1.00 95.24 C
|
743 |
+
ATOM 686 C GLU A 88 7.347 -10.707 -11.025 1.00 95.24 C
|
744 |
+
ATOM 687 CB GLU A 88 8.549 -9.517 -12.917 1.00 95.24 C
|
745 |
+
ATOM 688 O GLU A 88 7.113 -11.868 -11.341 1.00 95.24 O
|
746 |
+
ATOM 689 CG GLU A 88 9.842 -9.335 -12.085 1.00 95.24 C
|
747 |
+
ATOM 690 CD GLU A 88 10.767 -8.202 -12.552 1.00 95.24 C
|
748 |
+
ATOM 691 OE1 GLU A 88 12.009 -8.365 -12.438 1.00 95.24 O
|
749 |
+
ATOM 692 OE2 GLU A 88 10.241 -7.096 -12.801 1.00 95.24 O
|
750 |
+
ATOM 693 N ALA A 89 7.672 -10.370 -9.773 1.00 97.30 N
|
751 |
+
ATOM 694 CA ALA A 89 7.756 -11.302 -8.650 1.00 97.30 C
|
752 |
+
ATOM 695 C ALA A 89 6.526 -11.223 -7.726 1.00 97.30 C
|
753 |
+
ATOM 696 CB ALA A 89 9.069 -11.028 -7.907 1.00 97.30 C
|
754 |
+
ATOM 697 O ALA A 89 6.519 -11.808 -6.643 1.00 97.30 O
|
755 |
+
ATOM 698 N SER A 90 5.508 -10.444 -8.099 1.00 97.65 N
|
756 |
+
ATOM 699 CA SER A 90 4.282 -10.288 -7.321 1.00 97.65 C
|
757 |
+
ATOM 700 C SER A 90 3.260 -11.344 -7.727 1.00 97.65 C
|
758 |
+
ATOM 701 CB SER A 90 3.705 -8.885 -7.500 1.00 97.65 C
|
759 |
+
ATOM 702 O SER A 90 2.945 -11.492 -8.902 1.00 97.65 O
|
760 |
+
ATOM 703 OG SER A 90 4.636 -7.929 -7.031 1.00 97.65 O
|
761 |
+
ATOM 704 N CYS A 91 2.709 -12.062 -6.749 1.00 97.61 N
|
762 |
+
ATOM 705 CA CYS A 91 1.668 -13.051 -7.015 1.00 97.61 C
|
763 |
+
ATOM 706 C CYS A 91 0.375 -12.384 -7.527 1.00 97.61 C
|
764 |
+
ATOM 707 CB CYS A 91 1.381 -13.858 -5.742 1.00 97.61 C
|
765 |
+
ATOM 708 O CYS A 91 0.038 -11.287 -7.052 1.00 97.61 O
|
766 |
+
ATOM 709 SG CYS A 91 2.876 -14.732 -5.195 1.00 97.61 S
|
767 |
+
ATOM 710 N PRO A 92 -0.392 -13.055 -8.411 1.00 98.06 N
|
768 |
+
ATOM 711 CA PRO A 92 -1.765 -12.661 -8.724 1.00 98.06 C
|
769 |
+
ATOM 712 C PRO A 92 -2.602 -12.478 -7.449 1.00 98.06 C
|
770 |
+
ATOM 713 CB PRO A 92 -2.322 -13.778 -9.615 1.00 98.06 C
|
771 |
+
ATOM 714 O PRO A 92 -2.443 -13.226 -6.483 1.00 98.06 O
|
772 |
+
ATOM 715 CG PRO A 92 -1.073 -14.373 -10.263 1.00 98.06 C
|
773 |
+
ATOM 716 CD PRO A 92 -0.029 -14.255 -9.156 1.00 98.06 C
|
774 |
+
ATOM 717 N GLY A 93 -3.461 -11.460 -7.427 1.00 97.89 N
|
775 |
+
ATOM 718 CA GLY A 93 -4.252 -11.044 -6.265 1.00 97.89 C
|
776 |
+
ATOM 719 C GLY A 93 -3.575 -9.992 -5.378 1.00 97.89 C
|
777 |
+
ATOM 720 O GLY A 93 -4.246 -9.326 -4.584 1.00 97.89 O
|
778 |
+
ATOM 721 N SER A 94 -2.260 -9.786 -5.508 1.00 98.62 N
|
779 |
+
ATOM 722 CA SER A 94 -1.591 -8.632 -4.896 1.00 98.62 C
|
780 |
+
ATOM 723 C SER A 94 -1.785 -7.376 -5.745 1.00 98.62 C
|
781 |
+
ATOM 724 CB SER A 94 -0.109 -8.905 -4.647 1.00 98.62 C
|
782 |
+
ATOM 725 O SER A 94 -1.936 -7.456 -6.961 1.00 98.62 O
|
783 |
+
ATOM 726 OG SER A 94 0.619 -8.949 -5.856 1.00 98.62 O
|
784 |
+
ATOM 727 N ILE A 95 -1.731 -6.192 -5.129 1.00 98.76 N
|
785 |
+
ATOM 728 CA ILE A 95 -1.938 -4.919 -5.846 1.00 98.76 C
|
786 |
+
ATOM 729 C ILE A 95 -0.956 -4.778 -7.015 1.00 98.76 C
|
787 |
+
ATOM 730 CB ILE A 95 -1.818 -3.729 -4.866 1.00 98.76 C
|
788 |
+
ATOM 731 O ILE A 95 -1.337 -4.365 -8.107 1.00 98.76 O
|
789 |
+
ATOM 732 CG1 ILE A 95 -2.991 -3.761 -3.863 1.00 98.76 C
|
790 |
+
ATOM 733 CG2 ILE A 95 -1.796 -2.371 -5.597 1.00 98.76 C
|
791 |
+
ATOM 734 CD1 ILE A 95 -2.834 -2.814 -2.666 1.00 98.76 C
|
792 |
+
ATOM 735 N ARG A 96 0.320 -5.118 -6.808 1.00 98.71 N
|
793 |
+
ATOM 736 CA ARG A 96 1.321 -5.027 -7.876 1.00 98.71 C
|
794 |
+
ATOM 737 C ARG A 96 1.205 -6.148 -8.895 1.00 98.71 C
|
795 |
+
ATOM 738 CB ARG A 96 2.723 -4.966 -7.288 1.00 98.71 C
|
796 |
+
ATOM 739 O ARG A 96 1.418 -5.868 -10.066 1.00 98.71 O
|
797 |
+
ATOM 740 CG ARG A 96 2.945 -3.621 -6.589 1.00 98.71 C
|
798 |
+
ATOM 741 CD ARG A 96 4.249 -3.724 -5.823 1.00 98.71 C
|
799 |
+
ATOM 742 NE ARG A 96 4.516 -2.529 -5.009 1.00 98.71 N
|
800 |
+
ATOM 743 NH1 ARG A 96 5.865 -3.617 -3.514 1.00 98.71 N
|
801 |
+
ATOM 744 NH2 ARG A 96 5.614 -1.417 -3.351 1.00 98.71 N
|
802 |
+
ATOM 745 CZ ARG A 96 5.322 -2.524 -3.968 1.00 98.71 C
|
803 |
+
ATOM 746 N GLY A 97 0.845 -7.359 -8.471 1.00 98.30 N
|
804 |
+
ATOM 747 CA GLY A 97 0.607 -8.471 -9.392 1.00 98.30 C
|
805 |
+
ATOM 748 C GLY A 97 -0.561 -8.196 -10.340 1.00 98.30 C
|
806 |
+
ATOM 749 O GLY A 97 -0.468 -8.498 -11.523 1.00 98.30 O
|
807 |
+
ATOM 750 N ASP A 98 -1.620 -7.552 -9.847 1.00 98.55 N
|
808 |
+
ATOM 751 CA ASP A 98 -2.813 -7.275 -10.652 1.00 98.55 C
|
809 |
+
ATOM 752 C ASP A 98 -2.676 -6.012 -11.512 1.00 98.55 C
|
810 |
+
ATOM 753 CB ASP A 98 -4.033 -7.155 -9.730 1.00 98.55 C
|
811 |
+
ATOM 754 O ASP A 98 -3.278 -5.919 -12.582 1.00 98.55 O
|
812 |
+
ATOM 755 CG ASP A 98 -4.351 -8.438 -8.959 1.00 98.55 C
|
813 |
+
ATOM 756 OD1 ASP A 98 -3.948 -9.543 -9.383 1.00 98.55 O
|
814 |
+
ATOM 757 OD2 ASP A 98 -5.009 -8.304 -7.899 1.00 98.55 O
|
815 |
+
ATOM 758 N TYR A 99 -1.903 -5.018 -11.054 1.00 98.44 N
|
816 |
+
ATOM 759 CA TYR A 99 -1.921 -3.685 -11.661 1.00 98.44 C
|
817 |
+
ATOM 760 C TYR A 99 -0.576 -3.163 -12.151 1.00 98.44 C
|
818 |
+
ATOM 761 CB TYR A 99 -2.582 -2.680 -10.702 1.00 98.44 C
|
819 |
+
ATOM 762 O TYR A 99 -0.556 -2.069 -12.710 1.00 98.44 O
|
820 |
+
ATOM 763 CG TYR A 99 -3.974 -3.071 -10.248 1.00 98.44 C
|
821 |
+
ATOM 764 CD1 TYR A 99 -4.956 -3.384 -11.205 1.00 98.44 C
|
822 |
+
ATOM 765 CD2 TYR A 99 -4.285 -3.145 -8.877 1.00 98.44 C
|
823 |
+
ATOM 766 CE1 TYR A 99 -6.229 -3.809 -10.795 1.00 98.44 C
|
824 |
+
ATOM 767 CE2 TYR A 99 -5.575 -3.536 -8.462 1.00 98.44 C
|
825 |
+
ATOM 768 OH TYR A 99 -7.787 -4.332 -9.092 1.00 98.44 O
|
826 |
+
ATOM 769 CZ TYR A 99 -6.544 -3.893 -9.428 1.00 98.44 C
|
827 |
+
ATOM 770 N CYS A 100 0.551 -3.842 -11.961 1.00 98.28 N
|
828 |
+
ATOM 771 CA CYS A 100 1.861 -3.287 -12.316 1.00 98.28 C
|
829 |
+
ATOM 772 C CYS A 100 2.623 -4.199 -13.279 1.00 98.28 C
|
830 |
+
ATOM 773 CB CYS A 100 2.656 -2.986 -11.040 1.00 98.28 C
|
831 |
+
ATOM 774 O CYS A 100 2.511 -5.415 -13.215 1.00 98.28 O
|
832 |
+
ATOM 775 SG CYS A 100 1.696 -1.940 -9.913 1.00 98.28 S
|
833 |
+
ATOM 776 N GLN A 101 3.431 -3.591 -14.149 1.00 96.85 N
|
834 |
+
ATOM 777 CA GLN A 101 4.346 -4.302 -15.052 1.00 96.85 C
|
835 |
+
ATOM 778 C GLN A 101 5.799 -3.938 -14.723 1.00 96.85 C
|
836 |
+
ATOM 779 CB GLN A 101 3.987 -4.002 -16.519 1.00 96.85 C
|
837 |
+
ATOM 780 O GLN A 101 6.625 -4.820 -14.519 1.00 96.85 O
|
838 |
+
ATOM 781 CG GLN A 101 2.578 -4.500 -16.891 1.00 96.85 C
|
839 |
+
ATOM 782 CD GLN A 101 2.203 -4.227 -18.346 1.00 96.85 C
|
840 |
+
ATOM 783 NE2 GLN A 101 0.928 -4.164 -18.664 1.00 96.85 N
|
841 |
+
ATOM 784 OE1 GLN A 101 3.023 -4.070 -19.231 1.00 96.85 O
|
842 |
+
ATOM 785 N ASP A 102 6.087 -2.645 -14.538 1.00 96.59 N
|
843 |
+
ATOM 786 CA ASP A 102 7.443 -2.145 -14.291 1.00 96.59 C
|
844 |
+
ATOM 787 C ASP A 102 7.704 -1.752 -12.831 1.00 96.59 C
|
845 |
+
ATOM 788 CB ASP A 102 7.711 -0.930 -15.181 1.00 96.59 C
|
846 |
+
ATOM 789 O ASP A 102 6.889 -1.101 -12.173 1.00 96.59 O
|
847 |
+
ATOM 790 CG ASP A 102 7.659 -1.290 -16.660 1.00 96.59 C
|
848 |
+
ATOM 791 OD1 ASP A 102 8.534 -2.079 -17.069 1.00 96.59 O
|
849 |
+
ATOM 792 OD2 ASP A 102 6.773 -0.727 -17.336 1.00 96.59 O
|
850 |
+
ATOM 793 N VAL A 103 8.917 -2.026 -12.337 1.00 95.22 N
|
851 |
+
ATOM 794 CA VAL A 103 9.349 -1.637 -10.977 1.00 95.22 C
|
852 |
+
ATOM 795 C VAL A 103 9.292 -0.122 -10.731 1.00 95.22 C
|
853 |
+
ATOM 796 CB VAL A 103 10.748 -2.213 -10.677 1.00 95.22 C
|
854 |
+
ATOM 797 O VAL A 103 8.979 0.318 -9.623 1.00 95.22 O
|
855 |
+
ATOM 798 CG1 VAL A 103 11.861 -1.607 -11.544 1.00 95.22 C
|
856 |
+
ATOM 799 CG2 VAL A 103 11.129 -2.048 -9.201 1.00 95.22 C
|
857 |
+
ATOM 800 N GLY A 104 9.542 0.689 -11.764 1.00 95.14 N
|
858 |
+
ATOM 801 CA GLY A 104 9.465 2.152 -11.696 1.00 95.14 C
|
859 |
+
ATOM 802 C GLY A 104 8.037 2.716 -11.714 1.00 95.14 C
|
860 |
+
ATOM 803 O GLY A 104 7.866 3.921 -11.523 1.00 95.14 O
|
861 |
+
ATOM 804 N ARG A 105 7.024 1.874 -11.957 1.00 97.47 N
|
862 |
+
ATOM 805 CA ARG A 105 5.590 2.209 -12.032 1.00 97.47 C
|
863 |
+
ATOM 806 C ARG A 105 4.776 1.164 -11.265 1.00 97.47 C
|
864 |
+
ATOM 807 CB ARG A 105 5.135 2.311 -13.500 1.00 97.47 C
|
865 |
+
ATOM 808 O ARG A 105 3.951 0.440 -11.820 1.00 97.47 O
|
866 |
+
ATOM 809 CG ARG A 105 5.806 3.431 -14.305 1.00 97.47 C
|
867 |
+
ATOM 810 CD ARG A 105 5.448 4.822 -13.767 1.00 97.47 C
|
868 |
+
ATOM 811 NE ARG A 105 6.067 5.881 -14.584 1.00 97.47 N
|
869 |
+
ATOM 812 NH1 ARG A 105 8.095 5.983 -13.498 1.00 97.47 N
|
870 |
+
ATOM 813 NH2 ARG A 105 7.702 7.308 -15.251 1.00 97.47 N
|
871 |
+
ATOM 814 CZ ARG A 105 7.281 6.379 -14.438 1.00 97.47 C
|
872 |
+
ATOM 815 N ASN A 106 5.093 1.040 -9.976 1.00 97.81 N
|
873 |
+
ATOM 816 CA ASN A 106 4.556 -0.005 -9.107 1.00 97.81 C
|
874 |
+
ATOM 817 C ASN A 106 3.438 0.460 -8.152 1.00 97.81 C
|
875 |
+
ATOM 818 CB ASN A 106 5.704 -0.771 -8.432 1.00 97.81 C
|
876 |
+
ATOM 819 O ASN A 106 3.247 -0.188 -7.119 1.00 97.81 O
|
877 |
+
ATOM 820 CG ASN A 106 6.431 -0.012 -7.340 1.00 97.81 C
|
878 |
+
ATOM 821 ND2 ASN A 106 7.305 -0.695 -6.636 1.00 97.81 N
|
879 |
+
ATOM 822 OD1 ASN A 106 6.253 1.175 -7.093 1.00 97.81 O
|
880 |
+
ATOM 823 N VAL A 107 2.734 1.557 -8.485 1.00 98.15 N
|
881 |
+
ATOM 824 CA VAL A 107 1.443 2.054 -7.942 1.00 98.15 C
|
882 |
+
ATOM 825 C VAL A 107 1.389 2.416 -6.453 1.00 98.15 C
|
883 |
+
ATOM 826 CB VAL A 107 0.301 1.097 -8.355 1.00 98.15 C
|
884 |
+
ATOM 827 O VAL A 107 0.765 3.406 -6.081 1.00 98.15 O
|
885 |
+
ATOM 828 CG1 VAL A 107 -1.091 1.468 -7.825 1.00 98.15 C
|
886 |
+
ATOM 829 CG2 VAL A 107 0.151 1.069 -9.883 1.00 98.15 C
|
887 |
+
ATOM 830 N VAL A 108 2.023 1.653 -5.568 1.00 98.56 N
|
888 |
+
ATOM 831 CA VAL A 108 1.879 1.754 -4.113 1.00 98.56 C
|
889 |
+
ATOM 832 C VAL A 108 3.219 1.685 -3.385 1.00 98.56 C
|
890 |
+
ATOM 833 CB VAL A 108 0.908 0.693 -3.547 1.00 98.56 C
|
891 |
+
ATOM 834 O VAL A 108 4.133 0.936 -3.744 1.00 98.56 O
|
892 |
+
ATOM 835 CG1 VAL A 108 -0.553 1.021 -3.857 1.00 98.56 C
|
893 |
+
ATOM 836 CG2 VAL A 108 1.208 -0.726 -4.055 1.00 98.56 C
|
894 |
+
ATOM 837 N HIS A 109 3.320 2.462 -2.310 1.00 98.47 N
|
895 |
+
ATOM 838 CA HIS A 109 4.326 2.351 -1.250 1.00 98.47 C
|
896 |
+
ATOM 839 C HIS A 109 3.662 1.886 0.051 1.00 98.47 C
|
897 |
+
ATOM 840 CB HIS A 109 5.043 3.694 -1.047 1.00 98.47 C
|
898 |
+
ATOM 841 O HIS A 109 2.533 2.289 0.345 1.00 98.47 O
|
899 |
+
ATOM 842 CG HIS A 109 5.957 3.680 0.152 1.00 98.47 C
|
900 |
+
ATOM 843 CD2 HIS A 109 5.651 4.124 1.407 1.00 98.47 C
|
901 |
+
ATOM 844 ND1 HIS A 109 7.204 3.062 0.175 1.00 98.47 N
|
902 |
+
ATOM 845 CE1 HIS A 109 7.643 3.187 1.444 1.00 98.47 C
|
903 |
+
ATOM 846 NE2 HIS A 109 6.720 3.793 2.208 1.00 98.47 N
|
904 |
+
ATOM 847 N GLY A 110 4.384 1.090 0.835 1.00 98.24 N
|
905 |
+
ATOM 848 CA GLY A 110 3.994 0.669 2.179 1.00 98.24 C
|
906 |
+
ATOM 849 C GLY A 110 5.244 0.354 2.991 1.00 98.24 C
|
907 |
+
ATOM 850 O GLY A 110 6.169 -0.259 2.444 1.00 98.24 O
|
908 |
+
ATOM 851 N SER A 111 5.257 0.797 4.247 1.00 98.33 N
|
909 |
+
ATOM 852 CA SER A 111 6.378 0.667 5.179 1.00 98.33 C
|
910 |
+
ATOM 853 C SER A 111 6.792 -0.798 5.350 1.00 98.33 C
|
911 |
+
ATOM 854 CB SER A 111 5.996 1.281 6.531 1.00 98.33 C
|
912 |
+
ATOM 855 O SER A 111 5.955 -1.698 5.284 1.00 98.33 O
|
913 |
+
ATOM 856 OG SER A 111 5.264 2.483 6.344 1.00 98.33 O
|
914 |
+
ATOM 857 N ASP A 112 8.087 -1.048 5.524 1.00 96.76 N
|
915 |
+
ATOM 858 CA ASP A 112 8.671 -2.398 5.583 1.00 96.76 C
|
916 |
+
ATOM 859 C ASP A 112 8.773 -2.978 7.000 1.00 96.76 C
|
917 |
+
ATOM 860 CB ASP A 112 10.039 -2.377 4.887 1.00 96.76 C
|
918 |
+
ATOM 861 O ASP A 112 8.945 -4.182 7.170 1.00 96.76 O
|
919 |
+
ATOM 862 CG ASP A 112 11.047 -1.379 5.476 1.00 96.76 C
|
920 |
+
ATOM 863 OD1 ASP A 112 10.802 -0.828 6.580 1.00 96.76 O
|
921 |
+
ATOM 864 OD2 ASP A 112 12.031 -1.109 4.763 1.00 96.76 O
|
922 |
+
ATOM 865 N SER A 113 8.628 -2.130 8.012 1.00 97.76 N
|
923 |
+
ATOM 866 CA SER A 113 8.737 -2.463 9.426 1.00 97.76 C
|
924 |
+
ATOM 867 C SER A 113 7.872 -1.518 10.257 1.00 97.76 C
|
925 |
+
ATOM 868 CB SER A 113 10.201 -2.366 9.863 1.00 97.76 C
|
926 |
+
ATOM 869 O SER A 113 7.460 -0.452 9.790 1.00 97.76 O
|
927 |
+
ATOM 870 OG SER A 113 10.659 -1.041 9.704 1.00 97.76 O
|
928 |
+
ATOM 871 N THR A 114 7.572 -1.903 11.496 1.00 97.93 N
|
929 |
+
ATOM 872 CA THR A 114 6.872 -1.032 12.452 1.00 97.93 C
|
930 |
+
ATOM 873 C THR A 114 7.669 0.236 12.756 1.00 97.93 C
|
931 |
+
ATOM 874 CB THR A 114 6.595 -1.782 13.763 1.00 97.93 C
|
932 |
+
ATOM 875 O THR A 114 7.081 1.303 12.910 1.00 97.93 O
|
933 |
+
ATOM 876 CG2 THR A 114 5.397 -2.719 13.624 1.00 97.93 C
|
934 |
+
ATOM 877 OG1 THR A 114 7.706 -2.585 14.103 1.00 97.93 O
|
935 |
+
ATOM 878 N GLU A 115 9.000 0.147 12.775 1.00 97.99 N
|
936 |
+
ATOM 879 CA GLU A 115 9.892 1.295 12.954 1.00 97.99 C
|
937 |
+
ATOM 880 C GLU A 115 9.787 2.279 11.779 1.00 97.99 C
|
938 |
+
ATOM 881 CB GLU A 115 11.316 0.767 13.152 1.00 97.99 C
|
939 |
+
ATOM 882 O GLU A 115 9.492 3.461 11.985 1.00 97.99 O
|
940 |
+
ATOM 883 CG GLU A 115 12.292 1.886 13.537 1.00 97.99 C
|
941 |
+
ATOM 884 CD GLU A 115 13.683 1.364 13.927 1.00 97.99 C
|
942 |
+
ATOM 885 OE1 GLU A 115 14.504 2.215 14.331 1.00 97.99 O
|
943 |
+
ATOM 886 OE2 GLU A 115 13.908 0.136 13.832 1.00 97.99 O
|
944 |
+
ATOM 887 N SER A 116 9.905 1.783 10.540 1.00 97.79 N
|
945 |
+
ATOM 888 CA SER A 116 9.667 2.582 9.332 1.00 97.79 C
|
946 |
+
ATOM 889 C SER A 116 8.258 3.168 9.308 1.00 97.79 C
|
947 |
+
ATOM 890 CB SER A 116 9.859 1.747 8.064 1.00 97.79 C
|
948 |
+
ATOM 891 O SER A 116 8.101 4.331 8.949 1.00 97.79 O
|
949 |
+
ATOM 892 OG SER A 116 11.219 1.437 7.867 1.00 97.79 O
|
950 |
+
ATOM 893 N ALA A 117 7.238 2.409 9.720 1.00 98.50 N
|
951 |
+
ATOM 894 CA ALA A 117 5.860 2.890 9.764 1.00 98.50 C
|
952 |
+
ATOM 895 C ALA A 117 5.705 4.087 10.711 1.00 98.50 C
|
953 |
+
ATOM 896 CB ALA A 117 4.925 1.736 10.138 1.00 98.50 C
|
954 |
+
ATOM 897 O ALA A 117 5.166 5.109 10.300 1.00 98.50 O
|
955 |
+
ATOM 898 N ASN A 118 6.235 4.013 11.936 1.00 98.13 N
|
956 |
+
ATOM 899 CA ASN A 118 6.182 5.126 12.890 1.00 98.13 C
|
957 |
+
ATOM 900 C ASN A 118 6.890 6.378 12.351 1.00 98.13 C
|
958 |
+
ATOM 901 CB ASN A 118 6.810 4.673 14.217 1.00 98.13 C
|
959 |
+
ATOM 902 O ASN A 118 6.342 7.480 12.414 1.00 98.13 O
|
960 |
+
ATOM 903 CG ASN A 118 5.933 3.704 14.987 1.00 98.13 C
|
961 |
+
ATOM 904 ND2 ASN A 118 6.523 2.803 15.735 1.00 98.13 N
|
962 |
+
ATOM 905 OD1 ASN A 118 4.717 3.757 14.959 1.00 98.13 O
|
963 |
+
ATOM 906 N ARG A 119 8.090 6.214 11.779 1.00 98.21 N
|
964 |
+
ATOM 907 CA ARG A 119 8.846 7.316 11.163 1.00 98.21 C
|
965 |
+
ATOM 908 C ARG A 119 8.077 7.947 10.003 1.00 98.21 C
|
966 |
+
ATOM 909 CB ARG A 119 10.209 6.778 10.702 1.00 98.21 C
|
967 |
+
ATOM 910 O ARG A 119 7.982 9.167 9.915 1.00 98.21 O
|
968 |
+
ATOM 911 CG ARG A 119 11.118 7.872 10.109 1.00 98.21 C
|
969 |
+
ATOM 912 CD ARG A 119 12.412 7.276 9.537 1.00 98.21 C
|
970 |
+
ATOM 913 NE ARG A 119 12.142 6.383 8.387 1.00 98.21 N
|
971 |
+
ATOM 914 NH1 ARG A 119 12.373 7.906 6.667 1.00 98.21 N
|
972 |
+
ATOM 915 NH2 ARG A 119 11.738 5.829 6.211 1.00 98.21 N
|
973 |
+
ATOM 916 CZ ARG A 119 12.088 6.710 7.105 1.00 98.21 C
|
974 |
+
ATOM 917 N GLU A 120 7.537 7.122 9.111 1.00 98.63 N
|
975 |
+
ATOM 918 CA GLU A 120 6.803 7.578 7.933 1.00 98.63 C
|
976 |
+
ATOM 919 C GLU A 120 5.483 8.255 8.316 1.00 98.63 C
|
977 |
+
ATOM 920 CB GLU A 120 6.570 6.406 6.963 1.00 98.63 C
|
978 |
+
ATOM 921 O GLU A 120 5.193 9.324 7.792 1.00 98.63 O
|
979 |
+
ATOM 922 CG GLU A 120 7.867 5.959 6.264 1.00 98.63 C
|
980 |
+
ATOM 923 CD GLU A 120 7.706 4.669 5.440 1.00 98.63 C
|
981 |
+
ATOM 924 OE1 GLU A 120 8.734 4.043 5.116 1.00 98.63 O
|
982 |
+
ATOM 925 OE2 GLU A 120 6.576 4.331 5.008 1.00 98.63 O
|
983 |
+
ATOM 926 N ILE A 121 4.709 7.711 9.258 1.00 98.69 N
|
984 |
+
ATOM 927 CA ILE A 121 3.463 8.340 9.724 1.00 98.69 C
|
985 |
+
ATOM 928 C ILE A 121 3.744 9.754 10.239 1.00 98.69 C
|
986 |
+
ATOM 929 CB ILE A 121 2.767 7.454 10.782 1.00 98.69 C
|
987 |
+
ATOM 930 O ILE A 121 3.123 10.697 9.757 1.00 98.69 O
|
988 |
+
ATOM 931 CG1 ILE A 121 2.201 6.183 10.108 1.00 98.69 C
|
989 |
+
ATOM 932 CG2 ILE A 121 1.630 8.216 11.490 1.00 98.69 C
|
990 |
+
ATOM 933 CD1 ILE A 121 1.862 5.065 11.102 1.00 98.69 C
|
991 |
+
ATOM 934 N ASN A 122 4.736 9.909 11.121 1.00 98.32 N
|
992 |
+
ATOM 935 CA ASN A 122 5.114 11.207 11.688 1.00 98.32 C
|
993 |
+
ATOM 936 C ASN A 122 5.680 12.189 10.649 1.00 98.32 C
|
994 |
+
ATOM 937 CB ASN A 122 6.148 10.962 12.798 1.00 98.32 C
|
995 |
+
ATOM 938 O ASN A 122 5.614 13.399 10.844 1.00 98.32 O
|
996 |
+
ATOM 939 CG ASN A 122 5.560 10.302 14.032 1.00 98.32 C
|
997 |
+
ATOM 940 ND2 ASN A 122 6.362 9.592 14.788 1.00 98.32 N
|
998 |
+
ATOM 941 OD1 ASN A 122 4.395 10.432 14.360 1.00 98.32 O
|
999 |
+
ATOM 942 N LEU A 123 6.256 11.683 9.555 1.00 98.53 N
|
1000 |
+
ATOM 943 CA LEU A 123 6.753 12.514 8.459 1.00 98.53 C
|
1001 |
+
ATOM 944 C LEU A 123 5.616 13.020 7.557 1.00 98.53 C
|
1002 |
+
ATOM 945 CB LEU A 123 7.792 11.698 7.669 1.00 98.53 C
|
1003 |
+
ATOM 946 O LEU A 123 5.680 14.139 7.052 1.00 98.53 O
|
1004 |
+
ATOM 947 CG LEU A 123 8.426 12.461 6.494 1.00 98.53 C
|
1005 |
+
ATOM 948 CD1 LEU A 123 9.223 13.684 6.953 1.00 98.53 C
|
1006 |
+
ATOM 949 CD2 LEU A 123 9.363 11.538 5.718 1.00 98.53 C
|
1007 |
+
ATOM 950 N TRP A 124 4.599 12.190 7.316 1.00 98.65 N
|
1008 |
+
ATOM 951 CA TRP A 124 3.526 12.485 6.362 1.00 98.65 C
|
1009 |
+
ATOM 952 C TRP A 124 2.294 13.131 6.999 1.00 98.65 C
|
1010 |
+
ATOM 953 CB TRP A 124 3.154 11.204 5.604 1.00 98.65 C
|
1011 |
+
ATOM 954 O TRP A 124 1.557 13.821 6.292 1.00 98.65 O
|
1012 |
+
ATOM 955 CG TRP A 124 4.130 10.796 4.540 1.00 98.65 C
|
1013 |
+
ATOM 956 CD1 TRP A 124 5.178 9.954 4.687 1.00 98.65 C
|
1014 |
+
ATOM 957 CD2 TRP A 124 4.164 11.206 3.140 1.00 98.65 C
|
1015 |
+
ATOM 958 CE2 TRP A 124 5.273 10.569 2.508 1.00 98.65 C
|
1016 |
+
ATOM 959 CE3 TRP A 124 3.369 12.047 2.334 1.00 98.65 C
|
1017 |
+
ATOM 960 NE1 TRP A 124 5.855 9.822 3.496 1.00 98.65 N
|
1018 |
+
ATOM 961 CH2 TRP A 124 4.769 11.583 0.381 1.00 98.65 C
|
1019 |
+
ATOM 962 CZ2 TRP A 124 5.586 10.745 1.157 1.00 98.65 C
|
1020 |
+
ATOM 963 CZ3 TRP A 124 3.664 12.227 0.969 1.00 98.65 C
|
1021 |
+
ATOM 964 N PHE A 125 2.056 12.925 8.293 1.00 98.68 N
|
1022 |
+
ATOM 965 CA PHE A 125 0.862 13.379 8.999 1.00 98.68 C
|
1023 |
+
ATOM 966 C PHE A 125 1.216 14.008 10.344 1.00 98.68 C
|
1024 |
+
ATOM 967 CB PHE A 125 -0.101 12.203 9.200 1.00 98.68 C
|
1025 |
+
ATOM 968 O PHE A 125 2.029 13.492 11.107 1.00 98.68 O
|
1026 |
+
ATOM 969 CG PHE A 125 -0.648 11.630 7.909 1.00 98.68 C
|
1027 |
+
ATOM 970 CD1 PHE A 125 -1.785 12.207 7.316 1.00 98.68 C
|
1028 |
+
ATOM 971 CD2 PHE A 125 -0.024 10.526 7.301 1.00 98.68 C
|
1029 |
+
ATOM 972 CE1 PHE A 125 -2.323 11.660 6.141 1.00 98.68 C
|
1030 |
+
ATOM 973 CE2 PHE A 125 -0.549 9.988 6.113 1.00 98.68 C
|
1031 |
+
ATOM 974 CZ PHE A 125 -1.707 10.550 5.544 1.00 98.68 C
|
1032 |
+
ATOM 975 N SER A 126 0.548 15.113 10.649 1.00 98.31 N
|
1033 |
+
ATOM 976 CA SER A 126 0.478 15.662 11.995 1.00 98.31 C
|
1034 |
+
ATOM 977 C SER A 126 -0.423 14.794 12.886 1.00 98.31 C
|
1035 |
+
ATOM 978 CB SER A 126 -0.032 17.105 11.956 1.00 98.31 C
|
1036 |
+
ATOM 979 O SER A 126 -1.332 14.126 12.382 1.00 98.31 O
|
1037 |
+
ATOM 980 OG SER A 126 -1.350 17.158 11.457 1.00 98.31 O
|
1038 |
+
ATOM 981 N PRO A 127 -0.245 14.827 14.218 1.00 97.88 N
|
1039 |
+
ATOM 982 CA PRO A 127 -1.102 14.077 15.136 1.00 97.88 C
|
1040 |
+
ATOM 983 C PRO A 127 -2.598 14.394 14.992 1.00 97.88 C
|
1041 |
+
ATOM 984 CB PRO A 127 -0.592 14.434 16.535 1.00 97.88 C
|
1042 |
+
ATOM 985 O PRO A 127 -3.429 13.522 15.217 1.00 97.88 O
|
1043 |
+
ATOM 986 CG PRO A 127 0.878 14.779 16.301 1.00 97.88 C
|
1044 |
+
ATOM 987 CD PRO A 127 0.852 15.461 14.937 1.00 97.88 C
|
1045 |
+
ATOM 988 N GLN A 128 -2.953 15.621 14.597 1.00 98.05 N
|
1046 |
+
ATOM 989 CA GLN A 128 -4.343 16.054 14.415 1.00 98.05 C
|
1047 |
+
ATOM 990 C GLN A 128 -4.990 15.483 13.145 1.00 98.05 C
|
1048 |
+
ATOM 991 CB GLN A 128 -4.419 17.591 14.381 1.00 98.05 C
|
1049 |
+
ATOM 992 O GLN A 128 -6.213 15.443 13.051 1.00 98.05 O
|
1050 |
+
ATOM 993 CG GLN A 128 -3.970 18.268 15.687 1.00 98.05 C
|
1051 |
+
ATOM 994 CD GLN A 128 -2.458 18.276 15.911 1.00 98.05 C
|
1052 |
+
ATOM 995 NE2 GLN A 128 -2.007 18.360 17.141 1.00 98.05 N
|
1053 |
+
ATOM 996 OE1 GLN A 128 -1.645 18.190 15.006 1.00 98.05 O
|
1054 |
+
ATOM 997 N GLU A 129 -4.189 15.035 12.176 1.00 98.59 N
|
1055 |
+
ATOM 998 CA GLU A 129 -4.682 14.367 10.966 1.00 98.59 C
|
1056 |
+
ATOM 999 C GLU A 129 -5.002 12.880 11.203 1.00 98.59 C
|
1057 |
+
ATOM 1000 CB GLU A 129 -3.662 14.530 9.826 1.00 98.59 C
|
1058 |
+
ATOM 1001 O GLU A 129 -5.566 12.236 10.319 1.00 98.59 O
|
1059 |
+
ATOM 1002 CG GLU A 129 -3.554 15.965 9.289 1.00 98.59 C
|
1060 |
+
ATOM 1003 CD GLU A 129 -2.342 16.109 8.357 1.00 98.59 C
|
1061 |
+
ATOM 1004 OE1 GLU A 129 -2.489 16.040 7.115 1.00 98.59 O
|
1062 |
+
ATOM 1005 OE2 GLU A 129 -1.209 16.248 8.866 1.00 98.59 O
|
1063 |
+
ATOM 1006 N LEU A 130 -4.656 12.326 12.374 1.00 98.65 N
|
1064 |
+
ATOM 1007 CA LEU A 130 -4.946 10.942 12.747 1.00 98.65 C
|
1065 |
+
ATOM 1008 C LEU A 130 -6.284 10.862 13.497 1.00 98.65 C
|
1066 |
+
ATOM 1009 CB LEU A 130 -3.786 10.358 13.577 1.00 98.65 C
|
1067 |
+
ATOM 1010 O LEU A 130 -6.406 11.280 14.649 1.00 98.65 O
|
1068 |
+
ATOM 1011 CG LEU A 130 -2.385 10.445 12.942 1.00 98.65 C
|
1069 |
+
ATOM 1012 CD1 LEU A 130 -1.366 9.771 13.861 1.00 98.65 C
|
1070 |
+
ATOM 1013 CD2 LEU A 130 -2.303 9.774 11.569 1.00 98.65 C
|
1071 |
+
ATOM 1014 N CYS A 131 -7.304 10.297 12.855 1.00 98.47 N
|
1072 |
+
ATOM 1015 CA CYS A 131 -8.637 10.152 13.427 1.00 98.47 C
|
1073 |
+
ATOM 1016 C CYS A 131 -8.710 8.993 14.431 1.00 98.47 C
|
1074 |
+
ATOM 1017 CB CYS A 131 -9.668 9.957 12.307 1.00 98.47 C
|
1075 |
+
ATOM 1018 O CYS A 131 -8.394 7.847 14.110 1.00 98.47 O
|
1076 |
+
ATOM 1019 SG CYS A 131 -9.729 11.408 11.215 1.00 98.47 S
|
1077 |
+
ATOM 1020 N GLN A 132 -9.233 9.274 15.627 1.00 97.77 N
|
1078 |
+
ATOM 1021 CA GLN A 132 -9.548 8.261 16.634 1.00 97.77 C
|
1079 |
+
ATOM 1022 C GLN A 132 -11.034 7.899 16.578 1.00 97.77 C
|
1080 |
+
ATOM 1023 CB GLN A 132 -9.162 8.757 18.033 1.00 97.77 C
|
1081 |
+
ATOM 1024 O GLN A 132 -11.896 8.743 16.813 1.00 97.77 O
|
1082 |
+
ATOM 1025 CG GLN A 132 -7.649 8.977 18.175 1.00 97.77 C
|
1083 |
+
ATOM 1026 CD GLN A 132 -7.239 9.345 19.597 1.00 97.77 C
|
1084 |
+
ATOM 1027 NE2 GLN A 132 -5.968 9.579 19.833 1.00 97.77 N
|
1085 |
+
ATOM 1028 OE1 GLN A 132 -8.031 9.424 20.521 1.00 97.77 O
|
1086 |
+
ATOM 1029 N TYR A 133 -11.340 6.640 16.271 1.00 98.01 N
|
1087 |
+
ATOM 1030 CA TYR A 133 -12.708 6.124 16.256 1.00 98.01 C
|
1088 |
+
ATOM 1031 C TYR A 133 -12.744 4.610 16.490 1.00 98.01 C
|
1089 |
+
ATOM 1032 CB TYR A 133 -13.414 6.491 14.940 1.00 98.01 C
|
1090 |
+
ATOM 1033 O TYR A 133 -11.742 3.908 16.308 1.00 98.01 O
|
1091 |
+
ATOM 1034 CG TYR A 133 -12.918 5.751 13.713 1.00 98.01 C
|
1092 |
+
ATOM 1035 CD1 TYR A 133 -11.670 6.084 13.155 1.00 98.01 C
|
1093 |
+
ATOM 1036 CD2 TYR A 133 -13.715 4.755 13.112 1.00 98.01 C
|
1094 |
+
ATOM 1037 CE1 TYR A 133 -11.234 5.449 11.983 1.00 98.01 C
|
1095 |
+
ATOM 1038 CE2 TYR A 133 -13.277 4.115 11.936 1.00 98.01 C
|
1096 |
+
ATOM 1039 OH TYR A 133 -11.656 3.940 10.173 1.00 98.01 O
|
1097 |
+
ATOM 1040 CZ TYR A 133 -12.047 4.484 11.354 1.00 98.01 C
|
1098 |
+
ATOM 1041 N LYS A 134 -13.924 4.124 16.891 1.00 96.78 N
|
1099 |
+
ATOM 1042 CA LYS A 134 -14.242 2.698 17.012 1.00 96.78 C
|
1100 |
+
ATOM 1043 C LYS A 134 -14.803 2.191 15.687 1.00 96.78 C
|
1101 |
+
ATOM 1044 CB LYS A 134 -15.230 2.491 18.171 1.00 96.78 C
|
1102 |
+
ATOM 1045 O LYS A 134 -15.750 2.785 15.167 1.00 96.78 O
|
1103 |
+
ATOM 1046 CG LYS A 134 -15.509 1.000 18.411 1.00 96.78 C
|
1104 |
+
ATOM 1047 CD LYS A 134 -16.436 0.788 19.613 1.00 96.78 C
|
1105 |
+
ATOM 1048 CE LYS A 134 -16.638 -0.718 19.810 1.00 96.78 C
|
1106 |
+
ATOM 1049 NZ LYS A 134 -17.508 -1.016 20.975 1.00 96.78 N
|
1107 |
+
ATOM 1050 N GLN A 135 -14.272 1.091 15.163 1.00 95.74 N
|
1108 |
+
ATOM 1051 CA GLN A 135 -14.828 0.483 13.963 1.00 95.74 C
|
1109 |
+
ATOM 1052 C GLN A 135 -15.991 -0.423 14.384 1.00 95.74 C
|
1110 |
+
ATOM 1053 CB GLN A 135 -13.717 -0.237 13.188 1.00 95.74 C
|
1111 |
+
ATOM 1054 O GLN A 135 -15.839 -1.329 15.197 1.00 95.74 O
|
1112 |
+
ATOM 1055 CG GLN A 135 -14.009 -0.362 11.690 1.00 95.74 C
|
1113 |
+
ATOM 1056 CD GLN A 135 -12.837 -0.952 10.902 1.00 95.74 C
|
1114 |
+
ATOM 1057 NE2 GLN A 135 -12.968 -1.123 9.606 1.00 95.74 N
|
1115 |
+
ATOM 1058 OE1 GLN A 135 -11.762 -1.210 11.399 1.00 95.74 O
|
1116 |
+
ATOM 1059 N ALA A 136 -17.193 -0.176 13.857 1.00 96.94 N
|
1117 |
+
ATOM 1060 CA ALA A 136 -18.390 -0.920 14.269 1.00 96.94 C
|
1118 |
+
ATOM 1061 C ALA A 136 -18.271 -2.439 14.033 1.00 96.94 C
|
1119 |
+
ATOM 1062 CB ALA A 136 -19.595 -0.343 13.519 1.00 96.94 C
|
1120 |
+
ATOM 1063 O ALA A 136 -18.921 -3.228 14.716 1.00 96.94 O
|
1121 |
+
ATOM 1064 N VAL A 137 -17.425 -2.833 13.079 1.00 96.00 N
|
1122 |
+
ATOM 1065 CA VAL A 137 -17.173 -4.226 12.709 1.00 96.00 C
|
1123 |
+
ATOM 1066 C VAL A 137 -16.037 -4.890 13.496 1.00 96.00 C
|
1124 |
+
ATOM 1067 CB VAL A 137 -16.982 -4.382 11.187 1.00 96.00 C
|
1125 |
+
ATOM 1068 O VAL A 137 -15.826 -6.081 13.297 1.00 96.00 O
|
1126 |
+
ATOM 1069 CG1 VAL A 137 -18.285 -4.062 10.441 1.00 96.00 C
|
1127 |
+
ATOM 1070 CG2 VAL A 137 -15.885 -3.468 10.633 1.00 96.00 C
|
1128 |
+
ATOM 1071 N ASP A 138 -15.354 -4.183 14.411 1.00 95.73 N
|
1129 |
+
ATOM 1072 CA ASP A 138 -14.252 -4.741 15.221 1.00 95.73 C
|
1130 |
+
ATOM 1073 C ASP A 138 -14.603 -6.100 15.868 1.00 95.73 C
|
1131 |
+
ATOM 1074 CB ASP A 138 -13.774 -3.743 16.308 1.00 95.73 C
|
1132 |
+
ATOM 1075 O ASP A 138 -13.807 -7.030 15.721 1.00 95.73 O
|
1133 |
+
ATOM 1076 CG ASP A 138 -12.899 -2.592 15.799 1.00 95.73 C
|
1134 |
+
ATOM 1077 OD1 ASP A 138 -12.216 -2.793 14.778 1.00 95.73 O
|
1135 |
+
ATOM 1078 OD2 ASP A 138 -12.918 -1.506 16.437 1.00 95.73 O
|
1136 |
+
ATOM 1079 N PRO A 139 -15.795 -6.297 16.483 1.00 96.81 N
|
1137 |
+
ATOM 1080 CA PRO A 139 -16.163 -7.580 17.100 1.00 96.81 C
|
1138 |
+
ATOM 1081 C PRO A 139 -16.336 -8.754 16.122 1.00 96.81 C
|
1139 |
+
ATOM 1082 CB PRO A 139 -17.483 -7.322 17.839 1.00 96.81 C
|
1140 |
+
ATOM 1083 O PRO A 139 -16.470 -9.891 16.555 1.00 96.81 O
|
1141 |
+
ATOM 1084 CG PRO A 139 -17.509 -5.811 18.044 1.00 96.81 C
|
1142 |
+
ATOM 1085 CD PRO A 139 -16.831 -5.312 16.776 1.00 96.81 C
|
1143 |
+
ATOM 1086 N TRP A 140 -16.395 -8.489 14.814 1.00 97.41 N
|
1144 |
+
ATOM 1087 CA TRP A 140 -16.508 -9.510 13.766 1.00 97.41 C
|
1145 |
+
ATOM 1088 C TRP A 140 -15.171 -9.774 13.056 1.00 97.41 C
|
1146 |
+
ATOM 1089 CB TRP A 140 -17.587 -9.089 12.760 1.00 97.41 C
|
1147 |
+
ATOM 1090 O TRP A 140 -15.081 -10.694 12.245 1.00 97.41 O
|
1148 |
+
ATOM 1091 CG TRP A 140 -18.928 -8.758 13.342 1.00 97.41 C
|
1149 |
+
ATOM 1092 CD1 TRP A 140 -19.415 -7.512 13.525 1.00 97.41 C
|
1150 |
+
ATOM 1093 CD2 TRP A 140 -19.960 -9.661 13.843 1.00 97.41 C
|
1151 |
+
ATOM 1094 CE2 TRP A 140 -21.061 -8.877 14.304 1.00 97.41 C
|
1152 |
+
ATOM 1095 CE3 TRP A 140 -20.076 -11.062 13.962 1.00 97.41 C
|
1153 |
+
ATOM 1096 NE1 TRP A 140 -20.673 -7.572 14.089 1.00 97.41 N
|
1154 |
+
ATOM 1097 CH2 TRP A 140 -22.306 -10.849 14.942 1.00 97.41 C
|
1155 |
+
ATOM 1098 CZ2 TRP A 140 -22.222 -9.450 14.843 1.00 97.41 C
|
1156 |
+
ATOM 1099 CZ3 TRP A 140 -21.235 -11.649 14.505 1.00 97.41 C
|
1157 |
+
ATOM 1100 N ILE A 141 -14.145 -8.956 13.326 1.00 96.13 N
|
1158 |
+
ATOM 1101 CA ILE A 141 -12.797 -9.059 12.741 1.00 96.13 C
|
1159 |
+
ATOM 1102 C ILE A 141 -11.799 -9.609 13.770 1.00 96.13 C
|
1160 |
+
ATOM 1103 CB ILE A 141 -12.349 -7.674 12.206 1.00 96.13 C
|
1161 |
+
ATOM 1104 O ILE A 141 -10.845 -10.303 13.406 1.00 96.13 O
|
1162 |
+
ATOM 1105 CG1 ILE A 141 -13.303 -7.164 11.100 1.00 96.13 C
|
1163 |
+
ATOM 1106 CG2 ILE A 141 -10.915 -7.723 11.635 1.00 96.13 C
|
1164 |
+
ATOM 1107 CD1 ILE A 141 -13.096 -5.686 10.752 1.00 96.13 C
|
1165 |
+
ATOM 1108 N HIS A 142 -11.995 -9.286 15.048 1.00 93.03 N
|
1166 |
+
ATOM 1109 CA HIS A 142 -11.108 -9.645 16.148 1.00 93.03 C
|
1167 |
+
ATOM 1110 C HIS A 142 -11.830 -10.543 17.159 1.00 93.03 C
|
1168 |
+
ATOM 1111 CB HIS A 142 -10.554 -8.354 16.772 1.00 93.03 C
|
1169 |
+
ATOM 1112 O HIS A 142 -12.997 -10.306 17.464 1.00 93.03 O
|
1170 |
+
ATOM 1113 CG HIS A 142 -9.795 -7.494 15.784 1.00 93.03 C
|
1171 |
+
ATOM 1114 CD2 HIS A 142 -9.924 -6.144 15.580 1.00 93.03 C
|
1172 |
+
ATOM 1115 ND1 HIS A 142 -8.895 -7.957 14.852 1.00 93.03 N
|
1173 |
+
ATOM 1116 CE1 HIS A 142 -8.480 -6.912 14.118 1.00 93.03 C
|
1174 |
+
ATOM 1117 NE2 HIS A 142 -9.056 -5.783 14.540 1.00 93.03 N
|
1175 |
+
ATOM 1118 N GLU A 143 -11.121 -11.573 17.634 1.00 90.01 N
|
1176 |
+
ATOM 1119 CA GLU A 143 -11.546 -12.472 18.721 1.00 90.01 C
|
1177 |
+
ATOM 1120 C GLU A 143 -11.356 -11.840 20.106 1.00 90.01 C
|
1178 |
+
ATOM 1121 CB GLU A 143 -10.831 -13.838 18.593 1.00 90.01 C
|
1179 |
+
ATOM 1122 O GLU A 143 -10.395 -11.045 20.264 1.00 90.01 O
|
1180 |
+
ATOM 1123 CG GLU A 143 -9.312 -13.779 18.871 1.00 90.01 C
|
1181 |
+
ATOM 1124 CD GLU A 143 -8.523 -15.059 18.519 1.00 90.01 C
|
1182 |
+
ATOM 1125 OE1 GLU A 143 -7.279 -14.926 18.332 1.00 90.01 O
|
1183 |
+
ATOM 1126 OE2 GLU A 143 -9.123 -16.150 18.382 1.00 90.01 O
|
1184 |
+
ATOM 1127 OXT GLU A 143 -12.189 -12.169 20.976 1.00 90.01 O
|
1185 |
+
TER 1128 GLU A 143
|
1186 |
+
ENDMDL
|
1187 |
+
END
|
case_study/A0A516RTC5.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/A0A5B8NBE6.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/A0A5B8NBN0.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/A0A7J6F8C5.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/B1KN79.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/C1DMX5.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/C4R826.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/G4VQX9.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/J9PY59.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/J9VGQ7.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/J9VVW8.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/M9PF61.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/O53504.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/Q0RWC9.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/Q1NEJ0.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/Q39VG1.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/Q6F4N4.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/Q72K04.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/Q93UV7.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/Q9AGK2.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/Q9AI62.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/Q9KJF3.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
case_study/Q9XZ48.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
constants.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
reps1 = [
|
2 |
+
{
|
3 |
+
"model": 0,
|
4 |
+
"chain": "",
|
5 |
+
"resname": "",
|
6 |
+
"style": "cartoon", # line, stick, sphere, cartoon, surface
|
7 |
+
"color": "whiteCarbon", # blue, red, green, yellow, whiteCarbon
|
8 |
+
"residue_range": "", # 3-15
|
9 |
+
"around": 0, # 周围范围,默认0
|
10 |
+
"byres": False,
|
11 |
+
"visible": False
|
12 |
+
},
|
13 |
+
]
|
14 |
+
|
15 |
+
style_list = ["Cartoon", "Sphere", "Stick", "Line", "Surface"]
|
16 |
+
color_list = ["White", "Blue", "Red", "Green", "Yellow", "Magenta", "Cyan", "Orange", "Purple", "Gray"]
|
17 |
+
default_reps = [
|
18 |
+
{
|
19 |
+
"model": 0,
|
20 |
+
"chain": "",
|
21 |
+
"resname": "",
|
22 |
+
"style": style_list[0][0].lower() + style_list[0][1:],
|
23 |
+
"color": color_list[0][0].lower() + color_list[0][1:] + "Carbon", # whiteCarbon
|
24 |
+
"residue_range": "", # 3-15
|
25 |
+
"around": 0, # 周围范围,默认0
|
26 |
+
"byres": False,
|
27 |
+
"visible": False
|
28 |
+
},
|
29 |
+
]
|
30 |
+
model_list = ['M3Site-ESM3-abs', 'M3Site-ESM3-full', 'M3Site-ESM2-abs', 'M3Site-ESM2-full', 'M3Site-ESM1b-abs', 'M3Site-ESM1b-full']
|
31 |
+
no_cat_dict = {
|
32 |
+
'b': 'background',
|
33 |
+
'0': 'CRI',
|
34 |
+
'1': 'SCI',
|
35 |
+
'2': 'PI',
|
36 |
+
'3': 'PTCR',
|
37 |
+
'4': 'IA',
|
38 |
+
'5': 'SSA'
|
39 |
+
}
|
esm/__init__.py
ADDED
File without changes
|
esm/layers/attention.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import functools
|
2 |
+
|
3 |
+
import einops
|
4 |
+
import torch
|
5 |
+
import torch.nn.functional as F
|
6 |
+
from torch import nn
|
7 |
+
|
8 |
+
from esm.layers.rotary import RotaryEmbedding
|
9 |
+
|
10 |
+
|
11 |
+
class MultiHeadAttention(nn.Module):
|
12 |
+
def __init__(
|
13 |
+
self,
|
14 |
+
d_model: int,
|
15 |
+
n_heads: int,
|
16 |
+
bias: bool = False,
|
17 |
+
qk_layernorm: bool = True,
|
18 |
+
):
|
19 |
+
super().__init__()
|
20 |
+
|
21 |
+
self.d_model = d_model
|
22 |
+
self.n_heads = n_heads
|
23 |
+
|
24 |
+
self.d_head = self.d_model // self.n_heads
|
25 |
+
self.layernorm_qkv = nn.Sequential(
|
26 |
+
nn.LayerNorm(d_model), nn.Linear(d_model, d_model * 3, bias=bias)
|
27 |
+
)
|
28 |
+
self.out_proj = nn.Linear(d_model, d_model, bias=bias)
|
29 |
+
|
30 |
+
if qk_layernorm:
|
31 |
+
self.q_ln = nn.LayerNorm(d_model, bias=bias)
|
32 |
+
self.k_ln = nn.LayerNorm(d_model, bias=bias)
|
33 |
+
else:
|
34 |
+
self.q_ln = nn.Identity()
|
35 |
+
self.k_ln = nn.Identity()
|
36 |
+
|
37 |
+
self.rotary = RotaryEmbedding(d_model // n_heads)
|
38 |
+
|
39 |
+
def _apply_rotary(self, q: torch.Tensor, k: torch.Tensor):
|
40 |
+
q = q.unflatten(-1, (self.n_heads, self.d_head))
|
41 |
+
k = k.unflatten(-1, (self.n_heads, self.d_head))
|
42 |
+
q, k = self.rotary(q, k)
|
43 |
+
q = q.flatten(-2, -1)
|
44 |
+
k = k.flatten(-2, -1)
|
45 |
+
return q, k
|
46 |
+
|
47 |
+
def forward(self, x, seq_id):
|
48 |
+
qkv_BLD3 = self.layernorm_qkv(x)
|
49 |
+
query_BLD, key_BLD, value_BLD = torch.chunk(qkv_BLD3, 3, dim=-1)
|
50 |
+
query_BLD, key_BLD = self.q_ln(query_BLD), self.k_ln(key_BLD)
|
51 |
+
query_BLD, key_BLD = self._apply_rotary(query_BLD, key_BLD)
|
52 |
+
|
53 |
+
n_heads = self.n_heads
|
54 |
+
reshaper = functools.partial(
|
55 |
+
einops.rearrange, pattern="b s (h d) -> b h s d", h=n_heads
|
56 |
+
)
|
57 |
+
|
58 |
+
query_BHLD, key_BHLD, value_BHLD = map(
|
59 |
+
reshaper, (query_BLD, key_BLD, value_BLD)
|
60 |
+
)
|
61 |
+
|
62 |
+
# Where True, enable participation in attention.
|
63 |
+
mask_BLL = seq_id.unsqueeze(-1) == seq_id.unsqueeze(-2)
|
64 |
+
mask_BHLL = mask_BLL.unsqueeze(1)
|
65 |
+
|
66 |
+
context_BHLD = F.scaled_dot_product_attention(
|
67 |
+
query_BHLD, key_BHLD, value_BHLD, mask_BHLL
|
68 |
+
)
|
69 |
+
context_BLD = einops.rearrange(context_BHLD, "b h s d -> b s (h d)")
|
70 |
+
return self.out_proj(context_BLD)
|
esm/layers/blocks.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
from esm.layers.attention import MultiHeadAttention
|
6 |
+
from esm.layers.geom_attention import (
|
7 |
+
GeometricReasoningOriginalImpl,
|
8 |
+
)
|
9 |
+
from esm.utils.structure.affine3d import Affine3D
|
10 |
+
|
11 |
+
|
12 |
+
def swiglu_correction_fn(expansion_ratio: float, d_model: int) -> int:
|
13 |
+
# set hidden dimesion to nearest multiple of 256 after expansion ratio
|
14 |
+
return int(((expansion_ratio * d_model) + 255) // 256 * 256)
|
15 |
+
|
16 |
+
|
17 |
+
class SwiGLU(nn.Module):
|
18 |
+
"""
|
19 |
+
SwiGLU activation function as an nn.Module, allowing it to be used within nn.Sequential.
|
20 |
+
This module splits the input tensor along the last dimension and applies the SiLU (Swish)
|
21 |
+
activation function to the first half, then multiplies it by the second half.
|
22 |
+
"""
|
23 |
+
|
24 |
+
def __init__(self):
|
25 |
+
super(SwiGLU, self).__init__()
|
26 |
+
|
27 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
28 |
+
x1, x2 = x.chunk(2, dim=-1)
|
29 |
+
return F.silu(x1) * x2
|
30 |
+
|
31 |
+
|
32 |
+
def swiglu_ln_ffn(d_model: int, expansion_ratio: float, bias: bool):
|
33 |
+
return nn.Sequential(
|
34 |
+
nn.LayerNorm(d_model),
|
35 |
+
nn.Linear(
|
36 |
+
d_model, swiglu_correction_fn(expansion_ratio, d_model) * 2, bias=bias
|
37 |
+
),
|
38 |
+
SwiGLU(),
|
39 |
+
nn.Linear(swiglu_correction_fn(expansion_ratio, d_model), d_model, bias=bias),
|
40 |
+
)
|
41 |
+
|
42 |
+
|
43 |
+
def gelu_ln_ffn(d_model: int, expansion_ratio: float, bias: bool):
|
44 |
+
hidden_dim = int(expansion_ratio * d_model)
|
45 |
+
return nn.Sequential(
|
46 |
+
nn.LayerNorm(d_model),
|
47 |
+
nn.Linear(d_model, hidden_dim, bias=bias),
|
48 |
+
nn.GELU(),
|
49 |
+
nn.Linear(hidden_dim, d_model, bias=bias),
|
50 |
+
)
|
51 |
+
|
52 |
+
|
53 |
+
class UnifiedTransformerBlock(nn.Module):
|
54 |
+
"""
|
55 |
+
A unified transformer block that can optionally incorporate geometric attention.
|
56 |
+
|
57 |
+
This class defines a transformer block that can be configured to use geometric attention
|
58 |
+
alongside the standard multi-head attention mechanism. It is designed to be a flexible
|
59 |
+
component of transformer-based models, allowing for the integration of geometric reasoning.
|
60 |
+
|
61 |
+
Parameters
|
62 |
+
----------
|
63 |
+
d_model : int
|
64 |
+
The dimensionality of the input and output features of the transformer block.
|
65 |
+
n_heads : int
|
66 |
+
The number of attention heads in the multi-head attention mechanism.
|
67 |
+
n_layers : int
|
68 |
+
The number of layers in the transformer block.
|
69 |
+
use_geom_attn : bool, optional
|
70 |
+
Whether to use geometric attention in addition to the standard multi-head attention. Defaults to False.
|
71 |
+
v_heads : int, optional
|
72 |
+
The number of heads to use for the geometric attention mechanism, if enabled. Must be specified if `use_geom_attn` is True.
|
73 |
+
"""
|
74 |
+
|
75 |
+
def __init__(
|
76 |
+
self,
|
77 |
+
d_model: int,
|
78 |
+
n_heads: int,
|
79 |
+
use_geom_attn: bool = False,
|
80 |
+
use_plain_attn: bool = True,
|
81 |
+
v_heads: int | None = None,
|
82 |
+
bias: bool = False,
|
83 |
+
expansion_ratio: float = 4.0,
|
84 |
+
residue_scaling_factor: float = 1,
|
85 |
+
mask_and_zero_frameless: bool = False,
|
86 |
+
qk_layernorm: bool = True,
|
87 |
+
ffn_type: str = "swiglu", # swiglu | gelu
|
88 |
+
):
|
89 |
+
super().__init__()
|
90 |
+
self.use_plain_attn = use_plain_attn
|
91 |
+
if self.use_plain_attn:
|
92 |
+
self.attn = MultiHeadAttention(
|
93 |
+
d_model, n_heads, bias, qk_layernorm=qk_layernorm
|
94 |
+
)
|
95 |
+
self.use_geom_attn = use_geom_attn
|
96 |
+
if self.use_geom_attn:
|
97 |
+
if v_heads is None:
|
98 |
+
raise ValueError("v_heads must be specified when use_geom_attn is True")
|
99 |
+
self.geom_attn = GeometricReasoningOriginalImpl(
|
100 |
+
c_s=d_model,
|
101 |
+
v_heads=v_heads,
|
102 |
+
bias=bias,
|
103 |
+
mask_and_zero_frameless=mask_and_zero_frameless,
|
104 |
+
)
|
105 |
+
if ffn_type == "swiglu":
|
106 |
+
self.ffn = swiglu_ln_ffn(d_model, expansion_ratio, bias)
|
107 |
+
elif ffn_type == "gelu":
|
108 |
+
self.ffn = gelu_ln_ffn(d_model, expansion_ratio, bias)
|
109 |
+
else:
|
110 |
+
raise ValueError(f"Unknown ffn_type: {ffn_type}")
|
111 |
+
self.scaling_factor = residue_scaling_factor
|
112 |
+
|
113 |
+
def forward(
|
114 |
+
self,
|
115 |
+
x: torch.Tensor,
|
116 |
+
sequence_id: torch.Tensor,
|
117 |
+
frames: Affine3D,
|
118 |
+
frames_mask: torch.Tensor,
|
119 |
+
chain_id: torch.Tensor,
|
120 |
+
) -> torch.Tensor:
|
121 |
+
"""
|
122 |
+
Forward pass for the UnifiedTransformerBlock.
|
123 |
+
|
124 |
+
Parameters
|
125 |
+
----------
|
126 |
+
x : torch.Tensor[float]
|
127 |
+
Input tensor to the transformer block, typically the output from the previous layer.
|
128 |
+
sequence_id : torch.Tensor[int]
|
129 |
+
Tensor containing sequence IDs for each element in the batch, used for attention masking.
|
130 |
+
frames : Affine3D
|
131 |
+
Affine3D containing geometric frame information for geometric attention.
|
132 |
+
frames_mask : torch.Tensor[bool]
|
133 |
+
Boolean mask tensor indicating valid frames for geometric attention.
|
134 |
+
chain_id : torch.Tensor[int]
|
135 |
+
Tensor containing chain IDs for each element, used for attention masking in geometric attention.
|
136 |
+
|
137 |
+
Returns
|
138 |
+
-------
|
139 |
+
torch.Tensor[float]
|
140 |
+
The output tensor after applying the transformer block operations.
|
141 |
+
"""
|
142 |
+
if self.use_plain_attn:
|
143 |
+
r1 = self.attn(x, sequence_id)
|
144 |
+
x = x + r1 / self.scaling_factor
|
145 |
+
|
146 |
+
if self.use_geom_attn:
|
147 |
+
r2 = self.geom_attn(x, frames, frames_mask, sequence_id, chain_id)
|
148 |
+
x = x + r2 / self.scaling_factor
|
149 |
+
|
150 |
+
r3 = self.ffn(x) / self.scaling_factor
|
151 |
+
x = x + r3
|
152 |
+
|
153 |
+
return x
|
esm/layers/codebook.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import torch.distributed as dist
|
4 |
+
import torch.nn as nn
|
5 |
+
import torch.nn.functional as F
|
6 |
+
|
7 |
+
|
8 |
+
class EMACodebook(nn.Module):
|
9 |
+
def __init__(
|
10 |
+
self,
|
11 |
+
n_codes,
|
12 |
+
embedding_dim,
|
13 |
+
no_random_restart=True,
|
14 |
+
restart_thres=1.0,
|
15 |
+
ema_decay=0.99,
|
16 |
+
):
|
17 |
+
super().__init__()
|
18 |
+
self.register_buffer("embeddings", torch.randn(n_codes, embedding_dim))
|
19 |
+
self.register_buffer("N", torch.zeros(n_codes))
|
20 |
+
self.register_buffer("z_avg", self.embeddings.data.clone())
|
21 |
+
|
22 |
+
self.n_codes = n_codes
|
23 |
+
self.embedding_dim = embedding_dim
|
24 |
+
self._need_init = True
|
25 |
+
self.no_random_restart = no_random_restart
|
26 |
+
self.restart_thres = restart_thres
|
27 |
+
self.freeze_codebook = False
|
28 |
+
self.ema_decay = ema_decay
|
29 |
+
|
30 |
+
def reset_parameters(self):
|
31 |
+
# For meta init
|
32 |
+
pass
|
33 |
+
|
34 |
+
def _tile(self, x):
|
35 |
+
d, ew = x.shape
|
36 |
+
if d < self.n_codes:
|
37 |
+
n_repeats = (self.n_codes + d - 1) // d
|
38 |
+
std = 0.01 / np.sqrt(ew)
|
39 |
+
x = x.repeat(n_repeats, 1)
|
40 |
+
x = x + torch.randn_like(x) * std
|
41 |
+
return x
|
42 |
+
|
43 |
+
def _init_embeddings(self, z):
|
44 |
+
# z: [b, t, c]
|
45 |
+
self._need_init = False
|
46 |
+
flat_inputs = z.view(-1, self.embedding_dim)
|
47 |
+
y = self._tile(flat_inputs)
|
48 |
+
|
49 |
+
y.shape[0]
|
50 |
+
_k_rand = y[torch.randperm(y.shape[0])][: self.n_codes]
|
51 |
+
if dist.is_initialized():
|
52 |
+
dist.broadcast(_k_rand, 0)
|
53 |
+
self.embeddings.data.copy_(_k_rand)
|
54 |
+
self.z_avg.data.copy_(_k_rand)
|
55 |
+
self.N.data.copy_(torch.ones(self.n_codes))
|
56 |
+
|
57 |
+
def forward(self, z):
|
58 |
+
# z: [b, t, c]
|
59 |
+
if self._need_init and self.training and not self.freeze_codebook:
|
60 |
+
self._init_embeddings(z)
|
61 |
+
# z is of shape [batch_size, sequence length, channels]
|
62 |
+
flat_inputs = z.view(-1, self.embedding_dim)
|
63 |
+
distances = (
|
64 |
+
(flat_inputs**2).sum(dim=1, keepdim=True)
|
65 |
+
- 2 * flat_inputs @ self.embeddings.t()
|
66 |
+
+ (self.embeddings.t() ** 2).sum(dim=0, keepdim=True)
|
67 |
+
) # [bt, c]
|
68 |
+
|
69 |
+
encoding_indices = torch.argmin(distances, dim=1)
|
70 |
+
encoding_indices = encoding_indices.view(*z.shape[:2]) # [b, t, ncode]
|
71 |
+
|
72 |
+
embeddings = F.embedding(encoding_indices, self.embeddings) # [b, t, c]
|
73 |
+
|
74 |
+
commitment_loss = 0.25 * F.mse_loss(z, embeddings.detach())
|
75 |
+
|
76 |
+
# EMA codebook update
|
77 |
+
if self.training and not self.freeze_codebook:
|
78 |
+
assert False, "Not implemented"
|
79 |
+
embeddings_st = (embeddings - z).detach() + z
|
80 |
+
|
81 |
+
return embeddings_st, encoding_indices, commitment_loss
|
82 |
+
|
83 |
+
def dictionary_lookup(self, encodings):
|
84 |
+
embeddings = F.embedding(encodings, self.embeddings)
|
85 |
+
return embeddings
|
86 |
+
|
87 |
+
def soft_codebook_lookup(self, weights: torch.Tensor) -> torch.Tensor:
|
88 |
+
return weights @ self.embeddings
|
esm/layers/ffn.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch.nn as nn
|
2 |
+
import torch.nn.functional as F
|
3 |
+
from torch import Tensor
|
4 |
+
|
5 |
+
# NOT CURRENTLY USED
|
6 |
+
|
7 |
+
|
8 |
+
class SwiGLU(nn.Module):
|
9 |
+
def __init__(self) -> None:
|
10 |
+
super().__init__()
|
11 |
+
|
12 |
+
def forward(self, x: Tensor) -> Tensor:
|
13 |
+
x1, x2 = x.chunk(2, dim=-1)
|
14 |
+
hidden = F.silu(x1) * x2
|
15 |
+
return hidden
|
16 |
+
|
17 |
+
|
18 |
+
class FFN(nn.Module):
|
19 |
+
def __init__(self, in_proj, activation, out_proj) -> None:
|
20 |
+
super().__init__()
|
21 |
+
self.in_proj = in_proj
|
22 |
+
self.activation = activation
|
23 |
+
self.out_proj = out_proj
|
24 |
+
|
25 |
+
def forward(self, x: Tensor) -> Tensor:
|
26 |
+
x = self.in_proj(x)
|
27 |
+
x = self.activation(x)
|
28 |
+
x = self.out_proj(x)
|
29 |
+
return x
|
esm/layers/geom_attention.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from math import sqrt
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from einops import rearrange
|
5 |
+
from torch import nn
|
6 |
+
from torch.nn import functional as F
|
7 |
+
|
8 |
+
|
9 |
+
class GeometricReasoningOriginalImpl(nn.Module):
|
10 |
+
def __init__(
|
11 |
+
self,
|
12 |
+
c_s: int,
|
13 |
+
v_heads: int,
|
14 |
+
num_vector_messages: int = 1,
|
15 |
+
mask_and_zero_frameless: bool = True,
|
16 |
+
divide_residual_by_depth: bool = False,
|
17 |
+
bias: bool = False,
|
18 |
+
):
|
19 |
+
"""Approximate implementation:
|
20 |
+
|
21 |
+
ATTN(A, v) := (softmax_j A_ij) v_j
|
22 |
+
make_rot_vectors(x) := R(i->g) Linear(x).reshape(..., 3)
|
23 |
+
make_vectors(x) := T(i->g) Linear(x).reshape(..., 3)
|
24 |
+
|
25 |
+
v <- make_rot_vectors(x)
|
26 |
+
q_dir, k_dir <- make_rot_vectors(x)
|
27 |
+
q_dist, k_dist <- make_vectors(x)
|
28 |
+
|
29 |
+
A_ij <- dot(q_dir_i, k_dir_j) -||q_dist_i - k_dist_j||^2
|
30 |
+
x <- x + Linear(T(g->i) ATTN(A, v))
|
31 |
+
"""
|
32 |
+
super().__init__()
|
33 |
+
self.c_s = c_s
|
34 |
+
self.v_heads = v_heads
|
35 |
+
self.num_vector_messages = num_vector_messages
|
36 |
+
self.mask_and_zero_frameless = mask_and_zero_frameless
|
37 |
+
|
38 |
+
self.s_norm = nn.LayerNorm(c_s, bias=bias)
|
39 |
+
dim_proj = (
|
40 |
+
4 * self.v_heads * 3 + self.v_heads * 3 * self.num_vector_messages
|
41 |
+
) # 2 x (q, k) * number of heads * (x, y, z) + number of heads * number of vector messages * (x, y, z)
|
42 |
+
self.proj = nn.Linear(c_s, dim_proj, bias=bias)
|
43 |
+
channels_out = self.v_heads * 3 * self.num_vector_messages
|
44 |
+
self.out_proj = nn.Linear(channels_out, c_s, bias=bias)
|
45 |
+
|
46 |
+
# The basic idea is for some attention heads to pay more or less attention to rotation versus distance,
|
47 |
+
# as well as to control the sharpness of the softmax (i.e., should this head only attend to those residues
|
48 |
+
# very nearby or should there be shallower dropoff in attention weight?)
|
49 |
+
self.distance_scale_per_head = nn.Parameter(torch.zeros((self.v_heads)))
|
50 |
+
self.rotation_scale_per_head = nn.Parameter(torch.zeros((self.v_heads)))
|
51 |
+
|
52 |
+
def forward(self, s, affine, affine_mask, sequence_id, chain_id):
|
53 |
+
attn_bias = sequence_id.unsqueeze(-1) == sequence_id.unsqueeze(-2)
|
54 |
+
attn_bias = attn_bias.unsqueeze(1).float()
|
55 |
+
attn_bias = attn_bias.masked_fill(
|
56 |
+
~affine_mask[:, None, None, :], torch.finfo(attn_bias.dtype).min
|
57 |
+
)
|
58 |
+
chain_id_mask = chain_id.unsqueeze(1) != chain_id.unsqueeze(2)
|
59 |
+
attn_bias = attn_bias.masked_fill(
|
60 |
+
chain_id_mask.unsqueeze(1), torch.finfo(s.dtype).min
|
61 |
+
)
|
62 |
+
|
63 |
+
ns = self.s_norm(s)
|
64 |
+
vec_rot, vec_dist = self.proj(ns).split(
|
65 |
+
[
|
66 |
+
self.v_heads * 2 * 3 + self.v_heads * 3 * self.num_vector_messages,
|
67 |
+
self.v_heads * 2 * 3,
|
68 |
+
],
|
69 |
+
dim=-1,
|
70 |
+
)
|
71 |
+
|
72 |
+
# Rotate the queries and keys for the rotation term. We also rotate the values.
|
73 |
+
# NOTE(zeming, thayes): Values are only rotated, not translated. We may wish to change
|
74 |
+
# this in the future.
|
75 |
+
query_rot, key_rot, value = (
|
76 |
+
affine.rot[..., None]
|
77 |
+
.apply(rearrange(vec_rot, "... (h c) -> ... h c", c=3))
|
78 |
+
.split(
|
79 |
+
[
|
80 |
+
self.v_heads,
|
81 |
+
self.v_heads,
|
82 |
+
self.v_heads * self.num_vector_messages,
|
83 |
+
],
|
84 |
+
dim=-2,
|
85 |
+
)
|
86 |
+
)
|
87 |
+
|
88 |
+
# Rotate and translate the queries and keys for the distance term
|
89 |
+
# NOTE(thayes): a simple speedup would be to apply all rotations together, then
|
90 |
+
# separately apply the translations.
|
91 |
+
query_dist, key_dist = (
|
92 |
+
affine[..., None]
|
93 |
+
.apply(rearrange(vec_dist, "... (h c) -> ... h c", c=3))
|
94 |
+
.chunk(2, dim=-2)
|
95 |
+
)
|
96 |
+
|
97 |
+
query_dist = rearrange(query_dist, "b s h d -> b h s 1 d")
|
98 |
+
key_dist = rearrange(key_dist, "b s h d -> b h 1 s d")
|
99 |
+
query_rot = rearrange(query_rot, "b s h d -> b h s d")
|
100 |
+
key_rot = rearrange(key_rot, "b s h d -> b h d s")
|
101 |
+
value = rearrange(
|
102 |
+
value, "b s (h m) d -> b h s (m d)", m=self.num_vector_messages
|
103 |
+
)
|
104 |
+
|
105 |
+
distance_term = (query_dist - key_dist).norm(dim=-1) / sqrt(3)
|
106 |
+
rotation_term = query_rot.matmul(key_rot) / sqrt(3)
|
107 |
+
distance_term_weight = rearrange(
|
108 |
+
F.softplus(self.distance_scale_per_head), "h -> h 1 1"
|
109 |
+
)
|
110 |
+
rotation_term_weight = rearrange(
|
111 |
+
F.softplus(self.rotation_scale_per_head), "h -> h 1 1"
|
112 |
+
)
|
113 |
+
|
114 |
+
attn_weight = (
|
115 |
+
rotation_term * rotation_term_weight - distance_term * distance_term_weight
|
116 |
+
)
|
117 |
+
|
118 |
+
if attn_bias is not None:
|
119 |
+
# we can re-use the attention bias from the transformer layers
|
120 |
+
# NOTE(thayes): This attention bias is expected to handle two things:
|
121 |
+
# 1. Masking attention on padding tokens
|
122 |
+
# 2. Masking cross sequence attention in the case of bin packing
|
123 |
+
s_q = attn_weight.size(2)
|
124 |
+
s_k = attn_weight.size(3)
|
125 |
+
_s_q = max(0, attn_bias.size(2) - s_q)
|
126 |
+
_s_k = max(0, attn_bias.size(3) - s_k)
|
127 |
+
attn_bias = attn_bias[:, :, _s_q:, _s_k:]
|
128 |
+
attn_weight = attn_weight + attn_bias
|
129 |
+
|
130 |
+
attn_weight = torch.softmax(attn_weight, dim=-1)
|
131 |
+
|
132 |
+
attn_out = attn_weight.matmul(value)
|
133 |
+
|
134 |
+
attn_out = (
|
135 |
+
affine.rot[..., None]
|
136 |
+
.invert()
|
137 |
+
.apply(
|
138 |
+
rearrange(
|
139 |
+
attn_out, "b h s (m d) -> b s (h m) d", m=self.num_vector_messages
|
140 |
+
)
|
141 |
+
)
|
142 |
+
)
|
143 |
+
|
144 |
+
attn_out = rearrange(
|
145 |
+
attn_out, "b s (h m) d -> b s (h m d)", m=self.num_vector_messages
|
146 |
+
)
|
147 |
+
if self.mask_and_zero_frameless:
|
148 |
+
attn_out = attn_out.masked_fill(~affine_mask[..., None], 0.0)
|
149 |
+
s = self.out_proj(attn_out)
|
150 |
+
|
151 |
+
return s
|
esm/layers/regression_head.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch.nn as nn
|
2 |
+
|
3 |
+
|
4 |
+
def RegressionHead(
|
5 |
+
d_model: int,
|
6 |
+
output_dim: int,
|
7 |
+
hidden_dim: int | None = None,
|
8 |
+
) -> nn.Module:
|
9 |
+
"""Single-hidden layer MLP for supervised output.
|
10 |
+
|
11 |
+
Args:
|
12 |
+
d_model: input dimension
|
13 |
+
output_dim: dimensionality of the output.
|
14 |
+
hidden_dim: optional dimension of hidden layer, defaults to d_model.
|
15 |
+
Returns:
|
16 |
+
output MLP module.
|
17 |
+
"""
|
18 |
+
hidden_dim = hidden_dim if hidden_dim is not None else d_model
|
19 |
+
return nn.Sequential(
|
20 |
+
nn.Linear(d_model, hidden_dim),
|
21 |
+
nn.GELU(),
|
22 |
+
nn.LayerNorm(hidden_dim),
|
23 |
+
nn.Linear(hidden_dim, output_dim),
|
24 |
+
)
|
esm/layers/rotary.py
ADDED
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
|
2 |
+
#
|
3 |
+
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
|
4 |
+
# and OPT implementations in this library. It has been modified from its
|
5 |
+
# original forms to accommodate minor architectural differences compared
|
6 |
+
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
|
7 |
+
#
|
8 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
9 |
+
# you may not use this file except in compliance with the License.
|
10 |
+
# You may obtain a copy of the License at
|
11 |
+
#
|
12 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
13 |
+
#
|
14 |
+
# Unless required by applicable law or agreed to in writing, software
|
15 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
16 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
17 |
+
# See the License for the specific language governing permissions and
|
18 |
+
# limitations under the License.
|
19 |
+
# NOTE: this implementation is from LLaMA 2:
|
20 |
+
# https://huggingface.co/togethercomputer/LLaMA-2-7B-32K/blob/08639a72e17836184096ae6a7e2766f2a34c3e36/modeling_flash_llama.py#L114
|
21 |
+
# Flash attention rotary implementation can be installed like so: `pip install git+https://github.com/HazyResearch/flash-attention.git#subdirectory=csrc/rotary`
|
22 |
+
|
23 |
+
from typing import Tuple
|
24 |
+
|
25 |
+
import torch
|
26 |
+
from einops import rearrange, repeat
|
27 |
+
|
28 |
+
|
29 |
+
def rotate_half(x, interleaved=False):
|
30 |
+
if not interleaved:
|
31 |
+
x1, x2 = x.chunk(2, dim=-1)
|
32 |
+
return torch.cat((-x2, x1), dim=-1)
|
33 |
+
else:
|
34 |
+
x1, x2 = x[..., ::2], x[..., 1::2]
|
35 |
+
return rearrange(
|
36 |
+
torch.stack((-x2, x1), dim=-1), "... d two -> ... (d two)", two=2
|
37 |
+
)
|
38 |
+
|
39 |
+
|
40 |
+
def apply_rotary_emb_torch(x, cos, sin, interleaved=False, _inplace=False):
|
41 |
+
"""
|
42 |
+
x: (batch_size, seqlen, nheads, headdim)
|
43 |
+
cos, sin: (seqlen, rotary_dim / 2)
|
44 |
+
"""
|
45 |
+
ro_dim = cos.shape[-1] * 2
|
46 |
+
assert ro_dim <= x.shape[-1]
|
47 |
+
seqlen = x.size(1)
|
48 |
+
cos = cos[:seqlen]
|
49 |
+
sin = sin[:seqlen]
|
50 |
+
cos = repeat(cos, "s d -> s 1 (2 d)")
|
51 |
+
sin = repeat(sin, "s d -> s 1 (2 d)")
|
52 |
+
return torch.cat(
|
53 |
+
[
|
54 |
+
x[..., :ro_dim] * cos + rotate_half(x[..., :ro_dim], interleaved) * sin,
|
55 |
+
x[..., ro_dim:],
|
56 |
+
],
|
57 |
+
dim=-1,
|
58 |
+
)
|
59 |
+
|
60 |
+
|
61 |
+
class RotaryEmbedding(torch.nn.Module):
|
62 |
+
"""
|
63 |
+
The rotary position embeddings from RoFormer_ (Su et. al).
|
64 |
+
A crucial insight from the method is that the query and keys are
|
65 |
+
transformed by rotation matrices which depend on the relative positions.
|
66 |
+
Other implementations are available in the Rotary Transformer repo_ and in
|
67 |
+
GPT-NeoX_, GPT-NeoX was an inspiration
|
68 |
+
.. _RoFormer: https://arxiv.org/abs/2104.09864
|
69 |
+
.. _repo: https://github.com/ZhuiyiTechnology/roformer
|
70 |
+
.. _GPT-NeoX: https://github.com/EleutherAI/gpt-neox
|
71 |
+
If scale_base is not None, this implements XPos (Sun et al., https://arxiv.org/abs/2212.10554).
|
72 |
+
A recommended value for scale_base is 512: https://github.com/HazyResearch/flash-attention/issues/96
|
73 |
+
Reference: https://github.com/sunyt32/torchscale/blob/main/torchscale/component/xpos_relative_position.py
|
74 |
+
"""
|
75 |
+
|
76 |
+
def __init__(
|
77 |
+
self,
|
78 |
+
dim: int,
|
79 |
+
base=10000.0,
|
80 |
+
interleaved=False,
|
81 |
+
scale_base=None,
|
82 |
+
scaling_factor=1.0,
|
83 |
+
pos_idx_in_fp32=True,
|
84 |
+
device=None,
|
85 |
+
):
|
86 |
+
"""
|
87 |
+
interleaved: if True, rotate pairs of even and odd dimensions (GPT-J style) instead
|
88 |
+
of 1st half and 2nd half (GPT-NeoX style).
|
89 |
+
pos_idx_in_fp32: if True, the position indices [0.0, ..., seqlen - 1] are in fp32,
|
90 |
+
otherwise they might be in lower precision.
|
91 |
+
This option was added because previously (before 2023-07-02), when we construct
|
92 |
+
the position indices, we use the dtype of self.inv_freq. In most cases this would
|
93 |
+
be fp32, but if the model is trained in pure bf16 (not mixed precision), then
|
94 |
+
self.inv_freq would be bf16, and the position indices are also in bf16.
|
95 |
+
Because of the limited precision of bf16 (e.g. 1995.0 is rounded to 2000.0), the
|
96 |
+
embeddings for some positions will coincide.
|
97 |
+
To maintain compatibility with models previously trained in pure bf16,
|
98 |
+
we add this option.
|
99 |
+
scaling_factor: RotaryEmbedding extended with linear scaling.
|
100 |
+
"""
|
101 |
+
super().__init__()
|
102 |
+
self.dim = dim
|
103 |
+
self.base = float(base)
|
104 |
+
self.pos_idx_in_fp32 = pos_idx_in_fp32
|
105 |
+
# Generate and save the inverse frequency buffer (non trainable)
|
106 |
+
self.interleaved = interleaved
|
107 |
+
self.scale_base = scale_base
|
108 |
+
self.scaling_factor = scaling_factor
|
109 |
+
self.device = device
|
110 |
+
|
111 |
+
self._seq_len_cached = 0
|
112 |
+
self._cos_cached = None
|
113 |
+
self._sin_cached = None
|
114 |
+
self._cos_k_cached = None
|
115 |
+
self._sin_k_cached = None
|
116 |
+
self.reset_parameters()
|
117 |
+
|
118 |
+
def reset_parameters(self):
|
119 |
+
inv_freq = self._compute_inv_freq(self.device)
|
120 |
+
self.register_buffer("inv_freq", inv_freq, persistent=False)
|
121 |
+
arange = torch.arange(0, self.dim, 2, device=self.device, dtype=torch.float32)
|
122 |
+
scale = (
|
123 |
+
(arange + 0.4 * self.dim) / (1.4 * self.dim)
|
124 |
+
if self.scale_base is not None
|
125 |
+
else None
|
126 |
+
)
|
127 |
+
self.register_buffer("scale", scale)
|
128 |
+
|
129 |
+
def _compute_inv_freq(self, device=None):
|
130 |
+
return 1 / (
|
131 |
+
self.base
|
132 |
+
** (
|
133 |
+
torch.arange(0, self.dim, 2, device=device, dtype=torch.float32)
|
134 |
+
/ self.dim
|
135 |
+
)
|
136 |
+
)
|
137 |
+
|
138 |
+
def _update_cos_sin_cache(self, seqlen, device=None, dtype=None):
|
139 |
+
# Reset the tables if the sequence length has changed,
|
140 |
+
# if we're on a new device (possibly due to tracing for instance),
|
141 |
+
# or if we're switching from inference mode to training
|
142 |
+
if (
|
143 |
+
seqlen > self._seq_len_cached
|
144 |
+
or self._cos_cached is None
|
145 |
+
or self._cos_cached.device != device
|
146 |
+
or self._cos_cached.dtype != dtype
|
147 |
+
or (self.training and self._cos_cached.is_inference())
|
148 |
+
):
|
149 |
+
self._seq_len_cached = seqlen
|
150 |
+
# We want fp32 here, not self.inv_freq.dtype, since the model could be loaded in bf16
|
151 |
+
# And the output of arange can be quite large, so bf16 would lose a lot of precision.
|
152 |
+
# However, for compatibility reason, we add an option to use the dtype of self.inv_freq.
|
153 |
+
if self.pos_idx_in_fp32:
|
154 |
+
t = torch.arange(seqlen, device=device, dtype=torch.float32)
|
155 |
+
t /= self.scaling_factor
|
156 |
+
# We want fp32 here as well since inv_freq will be multiplied with t, and the output
|
157 |
+
# will be large. Having it in bf16 will lose a lot of precision and cause the
|
158 |
+
# cos & sin output to change significantly.
|
159 |
+
# We want to recompute self.inv_freq if it was not loaded in fp32
|
160 |
+
if self.inv_freq.dtype != torch.float32:
|
161 |
+
inv_freq = self.inv_freq.to(torch.float32)
|
162 |
+
else:
|
163 |
+
inv_freq = self.inv_freq
|
164 |
+
else:
|
165 |
+
t = torch.arange(seqlen, device=device, dtype=self.inv_freq.dtype)
|
166 |
+
t /= self.scaling_factor
|
167 |
+
inv_freq = self.inv_freq
|
168 |
+
# Don't do einsum, it converts fp32 to fp16 under AMP
|
169 |
+
# freqs = torch.einsum("i,j->ij", t, self.inv_freq)
|
170 |
+
freqs = torch.outer(t, inv_freq)
|
171 |
+
|
172 |
+
if self.scale is None:
|
173 |
+
self._cos_cached = torch.cos(freqs).to(dtype)
|
174 |
+
self._sin_cached = torch.sin(freqs).to(dtype)
|
175 |
+
else:
|
176 |
+
power = (
|
177 |
+
torch.arange(
|
178 |
+
seqlen, dtype=self.scale.dtype, device=self.scale.device
|
179 |
+
)
|
180 |
+
- seqlen // 2
|
181 |
+
) / self.scale_base
|
182 |
+
scale = self.scale.to(device=power.device) ** power.unsqueeze(-1)
|
183 |
+
# We want the multiplication by scale to happen in fp32
|
184 |
+
self._cos_cached = (torch.cos(freqs) * scale).to(dtype)
|
185 |
+
self._sin_cached = (torch.sin(freqs) * scale).to(dtype)
|
186 |
+
self._cos_k_cached = (torch.cos(freqs) / scale).to(dtype)
|
187 |
+
self._sin_k_cached = (torch.sin(freqs) / scale).to(dtype)
|
188 |
+
|
189 |
+
def forward(
|
190 |
+
self, q: torch.Tensor, k: torch.Tensor, seqlen_offset: int = 0
|
191 |
+
) -> Tuple[torch.Tensor, torch.Tensor]:
|
192 |
+
"""
|
193 |
+
q: (batch, seqlen, nheads, headdim)
|
194 |
+
k: (batch, seqlen, nheads, headdim)
|
195 |
+
seqlen_offset: can be used in generation where the qkv being passed in is only the last
|
196 |
+
token in the batch.
|
197 |
+
"""
|
198 |
+
self._update_cos_sin_cache(
|
199 |
+
q.shape[1] + seqlen_offset, device=q.device, dtype=q.dtype
|
200 |
+
)
|
201 |
+
assert self._cos_cached is not None
|
202 |
+
assert self._sin_cached is not None
|
203 |
+
if self.scale is None:
|
204 |
+
return (
|
205 |
+
apply_rotary_emb_torch(
|
206 |
+
q,
|
207 |
+
self._cos_cached[seqlen_offset:],
|
208 |
+
self._sin_cached[seqlen_offset:],
|
209 |
+
self.interleaved,
|
210 |
+
True, # inplace=True
|
211 |
+
),
|
212 |
+
apply_rotary_emb_torch(
|
213 |
+
k,
|
214 |
+
self._cos_cached[seqlen_offset:],
|
215 |
+
self._sin_cached[seqlen_offset:],
|
216 |
+
self.interleaved,
|
217 |
+
True, # inplace=True
|
218 |
+
),
|
219 |
+
) # type: ignore
|
220 |
+
else:
|
221 |
+
assert False
|
esm/layers/structure_proj.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
|
4 |
+
from esm.utils.constants.physics import (
|
5 |
+
BB_COORDINATES,
|
6 |
+
)
|
7 |
+
from esm.utils.structure.affine3d import (
|
8 |
+
Affine3D,
|
9 |
+
RotationMatrix,
|
10 |
+
)
|
11 |
+
|
12 |
+
|
13 |
+
class Dim6RotStructureHead(nn.Module):
|
14 |
+
# Normally, AF2 uses quaternions to specify rotations. There's some evidence that
|
15 |
+
# other representations are more well behaved - the best one according to
|
16 |
+
# https://openaccess.thecvf.com/content_CVPR_2019/papers/Zhou_On_the_Continuity_of_Rotation_Representations_in_Neural_Networks_CVPR_2019_paper.pdf
|
17 |
+
# is using graham schmidt on 2 vectors, which is implemented here.
|
18 |
+
def __init__(
|
19 |
+
self,
|
20 |
+
input_dim: int,
|
21 |
+
trans_scale_factor: float = 10,
|
22 |
+
norm_type: str = "layernorm",
|
23 |
+
activation_fn: str = "esm_gelu",
|
24 |
+
predict_torsion_angles: bool = True,
|
25 |
+
):
|
26 |
+
super().__init__()
|
27 |
+
self.ffn1 = nn.Linear(input_dim, input_dim)
|
28 |
+
self.activation_fn = nn.GELU()
|
29 |
+
self.norm = nn.LayerNorm(input_dim)
|
30 |
+
self.proj = nn.Linear(input_dim, 9 + 7 * 2)
|
31 |
+
self.trans_scale_factor = trans_scale_factor
|
32 |
+
self.predict_torsion_angles = predict_torsion_angles
|
33 |
+
self.bb_local_coords = torch.tensor(BB_COORDINATES).float()
|
34 |
+
|
35 |
+
def forward(self, x, affine, affine_mask, **kwargs):
|
36 |
+
if affine is None:
|
37 |
+
rigids = Affine3D.identity(
|
38 |
+
x.shape[:-1],
|
39 |
+
dtype=x.dtype,
|
40 |
+
device=x.device,
|
41 |
+
requires_grad=self.training,
|
42 |
+
rotation_type=RotationMatrix,
|
43 |
+
)
|
44 |
+
else:
|
45 |
+
rigids = affine
|
46 |
+
|
47 |
+
# [*, N]
|
48 |
+
x = self.ffn1(x)
|
49 |
+
x = self.activation_fn(x)
|
50 |
+
x = self.norm(x)
|
51 |
+
trans, x, y, angles = self.proj(x).split([3, 3, 3, 7 * 2], dim=-1)
|
52 |
+
trans = trans * self.trans_scale_factor
|
53 |
+
x = x / (x.norm(dim=-1, keepdim=True) + 1e-5)
|
54 |
+
y = y / (y.norm(dim=-1, keepdim=True) + 1e-5)
|
55 |
+
update = Affine3D.from_graham_schmidt(x + trans, trans, y + trans)
|
56 |
+
rigids = rigids.compose(update.mask(affine_mask))
|
57 |
+
affine = rigids.tensor
|
58 |
+
|
59 |
+
# We approximate the positions of the backbone atoms in the global frame by applying the rigid
|
60 |
+
# transformation to the mean of the backbone atoms in the local frame.
|
61 |
+
all_bb_coords_local = (
|
62 |
+
self.bb_local_coords[None, None, :, :]
|
63 |
+
.expand(*x.shape[:-1], 3, 3)
|
64 |
+
.to(x.device)
|
65 |
+
)
|
66 |
+
pred_xyz = rigids[..., None].apply(all_bb_coords_local)
|
67 |
+
|
68 |
+
return affine, pred_xyz
|
esm/layers/transformer_stack.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
|
3 |
+
import torch
|
4 |
+
import torch.nn as nn
|
5 |
+
|
6 |
+
from esm.layers.blocks import UnifiedTransformerBlock
|
7 |
+
from esm.utils.structure.affine3d import Affine3D
|
8 |
+
|
9 |
+
|
10 |
+
class TransformerStack(nn.Module):
|
11 |
+
"""
|
12 |
+
A stack of transformer blocks used in the ESM-3 model. Each block is a UnifiedTransformerBlock,
|
13 |
+
which can either be geometric attention or standard multi-head attention.
|
14 |
+
|
15 |
+
Args:
|
16 |
+
d_model (int): The dimensionality of the input and output feature vectors.
|
17 |
+
n_heads (int): The number of attention heads.
|
18 |
+
v_heads (int): The number of voting heads.
|
19 |
+
n_layers (int): The number of transformer blocks in the stack.
|
20 |
+
n_layers_geom (int, optional): The number of transformer blocks that use geometric attention.
|
21 |
+
scale_residue (bool, optional): Whether to scale the residue connections in each transformer block.
|
22 |
+
mask_and_zero_frameless (bool, optional): Whether to mask and zero frameless positions in the input.
|
23 |
+
Only applies in the geometric attention blocks, which is conditioned on the structure
|
24 |
+
"""
|
25 |
+
|
26 |
+
def __init__(
|
27 |
+
self,
|
28 |
+
d_model: int,
|
29 |
+
n_heads: int,
|
30 |
+
v_heads: int | None,
|
31 |
+
n_layers: int,
|
32 |
+
n_layers_geom: int = 1,
|
33 |
+
scale_residue: bool = True,
|
34 |
+
mask_and_zero_frameless: bool = False,
|
35 |
+
bias: bool = False,
|
36 |
+
qk_layernorm: bool = True,
|
37 |
+
ffn_type: str = "swiglu", # swiglu | gelu
|
38 |
+
expansion_ratio: float = 8 / 3,
|
39 |
+
):
|
40 |
+
super().__init__()
|
41 |
+
self.blocks = nn.ModuleList(
|
42 |
+
[
|
43 |
+
UnifiedTransformerBlock(
|
44 |
+
d_model,
|
45 |
+
n_heads,
|
46 |
+
v_heads=v_heads,
|
47 |
+
use_geom_attn=i < n_layers_geom,
|
48 |
+
residue_scaling_factor=(
|
49 |
+
math.sqrt(n_layers / 36) if scale_residue else 1.0
|
50 |
+
),
|
51 |
+
expansion_ratio=expansion_ratio,
|
52 |
+
mask_and_zero_frameless=mask_and_zero_frameless,
|
53 |
+
bias=bias,
|
54 |
+
qk_layernorm=qk_layernorm,
|
55 |
+
ffn_type=ffn_type,
|
56 |
+
)
|
57 |
+
for i in range(n_layers)
|
58 |
+
]
|
59 |
+
)
|
60 |
+
self.norm = nn.LayerNorm(d_model, bias=False)
|
61 |
+
|
62 |
+
def forward(
|
63 |
+
self,
|
64 |
+
x: torch.Tensor,
|
65 |
+
sequence_id: torch.Tensor | None = None,
|
66 |
+
affine: Affine3D | None = None,
|
67 |
+
affine_mask: torch.Tensor | None = None,
|
68 |
+
chain_id: torch.Tensor | None = None,
|
69 |
+
) -> tuple[torch.Tensor, torch.Tensor]:
|
70 |
+
"""
|
71 |
+
Forward pass of the TransformerStack.
|
72 |
+
|
73 |
+
Args:
|
74 |
+
x (torch.Tensor): The input tensor of shape (batch_size, sequence_length, d_model).
|
75 |
+
sequence_id (torch.Tensor): The sequence ID tensor of shape (batch_size, sequence_length).
|
76 |
+
affine (Affine3D | None): The affine transformation tensor or None.
|
77 |
+
affine_mask (torch.Tensor | None): The affine mask tensor or None.
|
78 |
+
chain_id (torch.Tensor): The protein chain tensor of shape (batch_size, sequence_length).
|
79 |
+
Only used in geometric attention.
|
80 |
+
|
81 |
+
Returns:
|
82 |
+
post_norm: The output tensor of shape (batch_size, sequence_length, d_model).
|
83 |
+
pre_norm: The embedding of shape (batch_size, sequence_length, d_model).
|
84 |
+
"""
|
85 |
+
*batch_dims, _ = x.shape
|
86 |
+
if sequence_id is None:
|
87 |
+
sequence_id = torch.ones(
|
88 |
+
size=batch_dims, dtype=torch.int64, device=x.device
|
89 |
+
)
|
90 |
+
if chain_id is None:
|
91 |
+
chain_id = torch.ones(size=batch_dims, dtype=torch.int64, device=x.device)
|
92 |
+
for block in self.blocks:
|
93 |
+
x = block(x, sequence_id, affine, affine_mask, chain_id)
|
94 |
+
return self.norm(x), x
|
esm/models/esm3.py
ADDED
@@ -0,0 +1,798 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import contextlib
|
4 |
+
from functools import partial
|
5 |
+
|
6 |
+
import attr
|
7 |
+
import einops
|
8 |
+
import torch
|
9 |
+
import torch.nn as nn
|
10 |
+
from attr import dataclass
|
11 |
+
|
12 |
+
from esm.layers.regression_head import RegressionHead
|
13 |
+
from esm.layers.transformer_stack import TransformerStack
|
14 |
+
from esm.models.function_decoder import FunctionTokenDecoder
|
15 |
+
from esm.models.vqvae import (
|
16 |
+
StructureTokenDecoder,
|
17 |
+
StructureTokenEncoder,
|
18 |
+
)
|
19 |
+
from esm.sdk.api import (
|
20 |
+
ESM3InferenceClient,
|
21 |
+
ESMProtein,
|
22 |
+
ESMProteinTensor,
|
23 |
+
ForwardAndSampleOutput,
|
24 |
+
ForwardConfig,
|
25 |
+
ForwardOutput,
|
26 |
+
ForwardTrackData,
|
27 |
+
GenerationConfig,
|
28 |
+
ProteinType,
|
29 |
+
ReturnLogitsConfig,
|
30 |
+
SamplingConfig,
|
31 |
+
SamplingTrackConfig,
|
32 |
+
)
|
33 |
+
from esm.tokenization import get_model_tokenizers
|
34 |
+
from esm.utils import encoding
|
35 |
+
from esm.utils.constants import esm3 as C
|
36 |
+
from esm.utils.constants.models import ESM3_OPEN_SMALL
|
37 |
+
from esm.utils.decoding import decode_protein_tensor
|
38 |
+
from esm.utils.generation import (
|
39 |
+
iterative_sampling_raw,
|
40 |
+
iterative_sampling_tokens,
|
41 |
+
)
|
42 |
+
from esm.utils.misc import rbf
|
43 |
+
from esm.utils.sampling import (
|
44 |
+
get_default_sampling_config,
|
45 |
+
sample_function_logits,
|
46 |
+
sample_logits,
|
47 |
+
sample_residue_annotation_logits,
|
48 |
+
)
|
49 |
+
from esm.utils.structure.affine3d import (
|
50 |
+
build_affine3d_from_coordinates,
|
51 |
+
)
|
52 |
+
|
53 |
+
|
54 |
+
@dataclass
|
55 |
+
class ESMOutput:
|
56 |
+
sequence_logits: torch.Tensor
|
57 |
+
structure_logits: torch.Tensor
|
58 |
+
secondary_structure_logits: torch.Tensor
|
59 |
+
sasa_logits: torch.Tensor
|
60 |
+
function_logits: torch.Tensor
|
61 |
+
residue_logits: torch.Tensor
|
62 |
+
embeddings: torch.Tensor
|
63 |
+
|
64 |
+
|
65 |
+
class EncodeInputs(nn.Module):
|
66 |
+
"""
|
67 |
+
Module for encoding input features in the ESM-3 model.
|
68 |
+
|
69 |
+
Args:
|
70 |
+
d_model (int): The dimensionality of the model's hidden states.
|
71 |
+
"""
|
72 |
+
|
73 |
+
def __init__(self, d_model: int):
|
74 |
+
super().__init__()
|
75 |
+
|
76 |
+
# Sequence
|
77 |
+
self.sequence_embed = nn.Embedding(64, d_model)
|
78 |
+
# Mandatory information
|
79 |
+
self.plddt_projection = nn.Linear(16, d_model)
|
80 |
+
self.structure_per_res_plddt_projection = nn.Linear(16, d_model)
|
81 |
+
|
82 |
+
# Structure
|
83 |
+
self.structure_tokens_embed = nn.Embedding(4096 + 5, d_model)
|
84 |
+
|
85 |
+
# "Structural" features
|
86 |
+
self.ss8_embed = nn.Embedding(8 + 3, d_model)
|
87 |
+
self.sasa_embed = nn.Embedding(16 + 3, d_model)
|
88 |
+
|
89 |
+
# "Functional" features
|
90 |
+
self.function_embed = nn.ModuleList(
|
91 |
+
[nn.Embedding(260, d_model // 8, padding_idx=0) for _ in range(8)]
|
92 |
+
)
|
93 |
+
|
94 |
+
self.residue_embed = nn.EmbeddingBag(1478, d_model, mode="sum", padding_idx=0)
|
95 |
+
|
96 |
+
def forward(
|
97 |
+
self,
|
98 |
+
sequence_tokens: torch.Tensor,
|
99 |
+
structure_tokens: torch.Tensor,
|
100 |
+
average_plddt: torch.Tensor,
|
101 |
+
per_res_plddt: torch.Tensor,
|
102 |
+
ss8_tokens: torch.Tensor,
|
103 |
+
sasa_tokens: torch.Tensor,
|
104 |
+
function_tokens: torch.Tensor,
|
105 |
+
residue_annotation_tokens: torch.Tensor,
|
106 |
+
) -> torch.Tensor:
|
107 |
+
sequence_embed = self.sequence_embed(sequence_tokens)
|
108 |
+
|
109 |
+
rbf_16_fn = partial(rbf, v_min=0.0, v_max=1.0, n_bins=16)
|
110 |
+
# the `masked_fill(padding_mask.unsqueeze(2), 0)` for the two below is unnecessary
|
111 |
+
# as pad tokens never even interact with the "real" tokens (due to sequence_id)
|
112 |
+
plddt_embed = self.plddt_projection(rbf_16_fn(average_plddt))
|
113 |
+
structure_per_res_plddt = self.structure_per_res_plddt_projection(
|
114 |
+
rbf_16_fn(per_res_plddt)
|
115 |
+
)
|
116 |
+
|
117 |
+
# Structure + "structural features" embeds
|
118 |
+
structure_embed = self.structure_tokens_embed(structure_tokens)
|
119 |
+
ss8_embed = self.ss8_embed(ss8_tokens)
|
120 |
+
sasa_embed = self.sasa_embed(sasa_tokens)
|
121 |
+
|
122 |
+
# "Functional" features embeds
|
123 |
+
function_embed = torch.cat(
|
124 |
+
[
|
125 |
+
embed_fn(funcs)
|
126 |
+
for embed_fn, funcs in zip(
|
127 |
+
self.function_embed, function_tokens.unbind(-1)
|
128 |
+
)
|
129 |
+
],
|
130 |
+
-1,
|
131 |
+
)
|
132 |
+
|
133 |
+
# Residue embeds
|
134 |
+
B, L, N = residue_annotation_tokens.shape
|
135 |
+
residue_embed = self.residue_embed(
|
136 |
+
einops.rearrange(
|
137 |
+
residue_annotation_tokens, "B L N -> (B L) N", B=B, L=L, N=N
|
138 |
+
)
|
139 |
+
)
|
140 |
+
residue_embed = einops.rearrange(residue_embed, "(B L) D -> B L D", B=B, L=L)
|
141 |
+
|
142 |
+
return (
|
143 |
+
sequence_embed
|
144 |
+
+ plddt_embed
|
145 |
+
+ structure_per_res_plddt
|
146 |
+
+ structure_embed
|
147 |
+
+ ss8_embed
|
148 |
+
+ sasa_embed
|
149 |
+
+ function_embed
|
150 |
+
+ residue_embed
|
151 |
+
)
|
152 |
+
|
153 |
+
|
154 |
+
class OutputHeads(nn.Module):
|
155 |
+
def __init__(self, d_model: int):
|
156 |
+
super().__init__()
|
157 |
+
self.sequence_head = RegressionHead(d_model, 64)
|
158 |
+
self.structure_head = RegressionHead(d_model, 4096)
|
159 |
+
self.ss8_head = RegressionHead(d_model, 8 + 3)
|
160 |
+
self.sasa_head = RegressionHead(d_model, 16 + 3)
|
161 |
+
self.function_head = RegressionHead(d_model, 260 * 8)
|
162 |
+
self.residue_head = RegressionHead(d_model, 1478)
|
163 |
+
|
164 |
+
def forward(self, x: torch.Tensor, embed: torch.Tensor) -> ESMOutput:
|
165 |
+
sequence_logits = self.sequence_head(x)
|
166 |
+
structure_logits = self.structure_head(x)
|
167 |
+
secondary_structure_logits = self.ss8_head(x)
|
168 |
+
sasa_logits = self.sasa_head(x)
|
169 |
+
function_logits = self.function_head(x)
|
170 |
+
function_logits = einops.rearrange(
|
171 |
+
function_logits,
|
172 |
+
"... (k v) -> ... k v",
|
173 |
+
k=8,
|
174 |
+
)
|
175 |
+
|
176 |
+
residue_logits = self.residue_head(x)
|
177 |
+
|
178 |
+
return ESMOutput(
|
179 |
+
sequence_logits=sequence_logits,
|
180 |
+
structure_logits=structure_logits,
|
181 |
+
secondary_structure_logits=secondary_structure_logits,
|
182 |
+
sasa_logits=sasa_logits,
|
183 |
+
function_logits=function_logits,
|
184 |
+
residue_logits=residue_logits,
|
185 |
+
embeddings=embed,
|
186 |
+
)
|
187 |
+
|
188 |
+
|
189 |
+
class ESM3(nn.Module, ESM3InferenceClient):
|
190 |
+
"""
|
191 |
+
ESM3 model implementation.
|
192 |
+
|
193 |
+
Args:
|
194 |
+
d_model (int): The dimensionality of the input and output feature vectors.
|
195 |
+
n_heads (int): The number of attention heads in the transformer layers.
|
196 |
+
v_heads (int): The number of attention heads in the variational transformer layers.
|
197 |
+
n_layers (int): The number of transformer layers.
|
198 |
+
"""
|
199 |
+
|
200 |
+
def __init__(
|
201 |
+
self,
|
202 |
+
d_model: int,
|
203 |
+
n_heads: int,
|
204 |
+
v_heads: int,
|
205 |
+
n_layers: int,
|
206 |
+
structure_encoder_name: str,
|
207 |
+
structure_decoder_name: str,
|
208 |
+
function_decoder_name: str,
|
209 |
+
):
|
210 |
+
super().__init__()
|
211 |
+
self.encoder = EncodeInputs(d_model)
|
212 |
+
self.transformer = TransformerStack(
|
213 |
+
d_model,
|
214 |
+
n_heads,
|
215 |
+
v_heads,
|
216 |
+
n_layers,
|
217 |
+
mask_and_zero_frameless=True,
|
218 |
+
)
|
219 |
+
self.output_heads = OutputHeads(d_model)
|
220 |
+
|
221 |
+
self.structure_encoder_name = structure_encoder_name
|
222 |
+
self.structure_decoder_name = structure_decoder_name
|
223 |
+
self.function_decoder_name = function_decoder_name
|
224 |
+
|
225 |
+
self.structure_encoder: StructureTokenEncoder | None = None # type: ignore
|
226 |
+
self.structure_decoder: StructureTokenDecoder | None = None # type: ignore
|
227 |
+
self.function_decoder: FunctionTokenDecoder | None = None # type: ignore
|
228 |
+
|
229 |
+
self.tokenizers = get_model_tokenizers(ESM3_OPEN_SMALL)
|
230 |
+
|
231 |
+
@classmethod
|
232 |
+
def from_pretrained(
|
233 |
+
cls,
|
234 |
+
model_name: str = ESM3_OPEN_SMALL,
|
235 |
+
device: torch.device | str = "cpu",
|
236 |
+
) -> ESM3:
|
237 |
+
from esm.pretrained import load_local_model
|
238 |
+
if model_name not in [ESM3_OPEN_SMALL]:
|
239 |
+
raise ValueError(f"Model name {model_name} is not a valid ESM3 model name.")
|
240 |
+
model: ESM3 = load_local_model(model_name, device=device) # type: ignore
|
241 |
+
return model
|
242 |
+
|
243 |
+
def get_structure_token_encoder(self) -> StructureTokenEncoder:
|
244 |
+
if self.structure_encoder is None:
|
245 |
+
self.structure_encoder = self.load_model(self.structure_encoder_name) # type: ignore
|
246 |
+
return self.structure_encoder # type: ignore
|
247 |
+
|
248 |
+
def get_structure_token_decoder(self) -> StructureTokenDecoder:
|
249 |
+
if self.structure_decoder is None:
|
250 |
+
self.structure_decoder = self.load_model(self.structure_decoder_name) # type: ignore
|
251 |
+
return self.structure_decoder # type: ignore
|
252 |
+
|
253 |
+
def get_function_token_decoder(self) -> FunctionTokenDecoder:
|
254 |
+
if self.function_decoder is None:
|
255 |
+
self.function_decoder = self.load_model(self.function_decoder_name) # type: ignore
|
256 |
+
return self.function_decoder # type: ignore
|
257 |
+
|
258 |
+
def load_model(self, model_name: str):
|
259 |
+
# Lazy import from pretrained
|
260 |
+
from esm.pretrained import load_local_model
|
261 |
+
|
262 |
+
return load_local_model(model_name, device=next(self.parameters()).device)
|
263 |
+
|
264 |
+
def forward(
|
265 |
+
self,
|
266 |
+
*,
|
267 |
+
sequence_tokens: torch.Tensor | None = None,
|
268 |
+
structure_tokens: torch.Tensor | None = None,
|
269 |
+
ss8_tokens: torch.Tensor | None = None,
|
270 |
+
sasa_tokens: torch.Tensor | None = None,
|
271 |
+
function_tokens: torch.Tensor | None = None,
|
272 |
+
residue_annotation_tokens: torch.Tensor | None = None,
|
273 |
+
average_plddt: torch.Tensor | None = None,
|
274 |
+
per_res_plddt: torch.Tensor | None = None,
|
275 |
+
structure_coords: torch.Tensor | None = None,
|
276 |
+
chain_id: torch.Tensor | None = None,
|
277 |
+
sequence_id: torch.Tensor | None = None,
|
278 |
+
) -> ESMOutput:
|
279 |
+
"""
|
280 |
+
Performs forward pass through the ESM3 model. Check utils to see how to tokenize inputs from raw data.
|
281 |
+
|
282 |
+
Args:
|
283 |
+
sequence_tokens (torch.Tensor, optional): The amino acid tokens.
|
284 |
+
structure_tokens (torch.Tensor, optional): The structure tokens.
|
285 |
+
ss8_tokens (torch.Tensor, optional): The secondary structure tokens.
|
286 |
+
sasa_tokens (torch.Tensor, optional): The solvent accessible surface area tokens.
|
287 |
+
function_tokens (torch.Tensor, optional): The function tokens.
|
288 |
+
residue_annotation_tokens (torch.Tensor, optional): The residue annotation tokens.
|
289 |
+
average_plddt (torch.Tensor, optional): The average plddt across the entire sequence.
|
290 |
+
per_res_plddt (torch.Tensor, optional): The per residue plddt, if you want to specify exact plddts, use this,
|
291 |
+
otherwise, use average_plddt.
|
292 |
+
structure_coords (torch.Tensor, optional): The structure coordinates, in the form of (B, L, 3, 3).
|
293 |
+
chain_id (torch.Tensor, optional): The chain ID
|
294 |
+
sequence_id (torch.Tensor, optional): The sequence ID.
|
295 |
+
|
296 |
+
Returns:
|
297 |
+
ESMOutput: The output of the ESM3 model.
|
298 |
+
|
299 |
+
Raises:
|
300 |
+
ValueError: If at least one of the inputs is None.
|
301 |
+
|
302 |
+
"""
|
303 |
+
# Reasonable defaults:
|
304 |
+
try:
|
305 |
+
L, device = next(
|
306 |
+
(x.shape[1], x.device)
|
307 |
+
for x in [
|
308 |
+
sequence_tokens,
|
309 |
+
structure_tokens,
|
310 |
+
ss8_tokens,
|
311 |
+
sasa_tokens,
|
312 |
+
structure_coords,
|
313 |
+
function_tokens,
|
314 |
+
residue_annotation_tokens,
|
315 |
+
]
|
316 |
+
if x is not None
|
317 |
+
)
|
318 |
+
except StopIteration:
|
319 |
+
raise ValueError("At least one of the inputs must be non-None")
|
320 |
+
|
321 |
+
t = self.tokenizers
|
322 |
+
defaults = lambda x, tok: (
|
323 |
+
torch.full((1, L), tok, dtype=torch.long, device=device) if x is None else x
|
324 |
+
)
|
325 |
+
sequence_tokens = defaults(sequence_tokens, t.sequence.mask_token_id)
|
326 |
+
ss8_tokens = defaults(ss8_tokens, C.SS8_UNK_TOKEN)
|
327 |
+
sasa_tokens = defaults(sasa_tokens, C.SASA_UNK_TOKEN)
|
328 |
+
average_plddt = defaults(average_plddt, 1).float()
|
329 |
+
per_res_plddt = defaults(per_res_plddt, 0).float()
|
330 |
+
chain_id = defaults(chain_id, 0)
|
331 |
+
sequence_id = defaults(sequence_id, 0)
|
332 |
+
|
333 |
+
if residue_annotation_tokens is None:
|
334 |
+
residue_annotation_tokens = torch.full(
|
335 |
+
(1, L, 16), C.RESIDUE_PAD_TOKEN, dtype=torch.long, device=device
|
336 |
+
)
|
337 |
+
|
338 |
+
if function_tokens is None:
|
339 |
+
function_tokens = torch.full(
|
340 |
+
(1, L, 8), C.INTERPRO_PAD_TOKEN, dtype=torch.long, device=device
|
341 |
+
)
|
342 |
+
|
343 |
+
if structure_coords is None:
|
344 |
+
structure_coords = torch.full(
|
345 |
+
(1, L, 3, 3), float("nan"), dtype=torch.float, device=device
|
346 |
+
)
|
347 |
+
|
348 |
+
structure_coords = structure_coords[
|
349 |
+
..., :3, :
|
350 |
+
] # In case we pass in an atom14 or atom37 repr
|
351 |
+
affine, affine_mask = build_affine3d_from_coordinates(structure_coords)
|
352 |
+
|
353 |
+
if structure_tokens is None:
|
354 |
+
_, structure_tokens = self.get_structure_token_encoder().encode(
|
355 |
+
structure_coords
|
356 |
+
)
|
357 |
+
assert structure_tokens is not None
|
358 |
+
structure_tokens = (
|
359 |
+
structure_tokens.masked_fill(
|
360 |
+
(structure_tokens == -1) | ~affine_mask, C.STRUCTURE_MASK_TOKEN
|
361 |
+
)
|
362 |
+
.masked_fill(sequence_tokens == C.SEQUENCE_BOS_TOKEN, C.STRUCTURE_BOS_TOKEN)
|
363 |
+
.masked_fill(sequence_tokens == C.SEQUENCE_PAD_TOKEN, C.STRUCTURE_PAD_TOKEN)
|
364 |
+
.masked_fill(sequence_tokens == C.SEQUENCE_EOS_TOKEN, C.STRUCTURE_EOS_TOKEN)
|
365 |
+
.masked_fill(
|
366 |
+
sequence_tokens == C.SEQUENCE_CHAINBREAK_TOKEN,
|
367 |
+
C.STRUCTURE_CHAINBREAK_TOKEN,
|
368 |
+
)
|
369 |
+
)
|
370 |
+
|
371 |
+
x = self.encoder(
|
372 |
+
sequence_tokens,
|
373 |
+
structure_tokens,
|
374 |
+
average_plddt,
|
375 |
+
per_res_plddt,
|
376 |
+
ss8_tokens,
|
377 |
+
sasa_tokens,
|
378 |
+
function_tokens,
|
379 |
+
residue_annotation_tokens,
|
380 |
+
)
|
381 |
+
x, embedding = self.transformer(x, sequence_id, affine, affine_mask, chain_id)
|
382 |
+
return self.output_heads(x, embedding)
|
383 |
+
|
384 |
+
# The following methods are for the ESM3InferenceClient interface
|
385 |
+
def generate(self, input: ProteinType, config: GenerationConfig) -> ProteinType:
|
386 |
+
if isinstance(input, ESMProtein):
|
387 |
+
return iterative_sampling_raw(self, input, config)
|
388 |
+
elif isinstance(input, ESMProteinTensor):
|
389 |
+
return iterative_sampling_tokens(self, input, config, self.tokenizers)
|
390 |
+
else:
|
391 |
+
raise ValueError("Input must be an ESMProtein or ESMProteinTensor")
|
392 |
+
|
393 |
+
def encode(self, input: ESMProtein) -> ESMProteinTensor:
|
394 |
+
input = attr.evolve(input) # Make a copy
|
395 |
+
|
396 |
+
sequence_tokens = None
|
397 |
+
structure_tokens = None
|
398 |
+
secondary_structure_tokens = None
|
399 |
+
sasa_tokens = None
|
400 |
+
function_tokens = None
|
401 |
+
residue_annotation_tokens = None
|
402 |
+
|
403 |
+
coordinates = None
|
404 |
+
|
405 |
+
if input.sequence is not None:
|
406 |
+
sequence_tokens = encoding.tokenize_sequence(
|
407 |
+
input.sequence, self.tokenizers.sequence, add_special_tokens=True
|
408 |
+
)
|
409 |
+
if input.secondary_structure is not None:
|
410 |
+
secondary_structure_tokens = encoding.tokenize_secondary_structure(
|
411 |
+
input.secondary_structure,
|
412 |
+
self.tokenizers.secondary_structure,
|
413 |
+
add_special_tokens=True,
|
414 |
+
)
|
415 |
+
if input.sasa is not None:
|
416 |
+
sasa_tokens = encoding.tokenize_sasa(
|
417 |
+
input.sasa, self.tokenizers.sasa, add_special_tokens=True
|
418 |
+
)
|
419 |
+
|
420 |
+
# Infer input length
|
421 |
+
sequence_length = -1
|
422 |
+
if sequence_tokens is not None:
|
423 |
+
sequence_length = len(sequence_tokens)
|
424 |
+
elif secondary_structure_tokens is not None:
|
425 |
+
sequence_length = len(secondary_structure_tokens)
|
426 |
+
elif sasa_tokens is not None:
|
427 |
+
sequence_length = len(sasa_tokens)
|
428 |
+
|
429 |
+
# Try to infer input length from structure data
|
430 |
+
if input.coordinates is not None:
|
431 |
+
coordinates, _, structure_tokens = encoding.tokenize_structure(
|
432 |
+
input.coordinates,
|
433 |
+
self.get_structure_token_encoder(),
|
434 |
+
structure_tokenizer=self.tokenizers.structure,
|
435 |
+
reference_sequence=input.sequence or "",
|
436 |
+
add_special_tokens=True,
|
437 |
+
)
|
438 |
+
if sequence_length == -1:
|
439 |
+
sequence_length = len(structure_tokens)
|
440 |
+
|
441 |
+
if sequence_length == -1:
|
442 |
+
raise ValueError(
|
443 |
+
"Cannot infer input length from input data. Please provide one of: sequence, structure, secondary_structure, sasa.\n"
|
444 |
+
"To condition on sequence length only, use ESM3LocalInferenceClient.get_default_sequence(sequence_length) to generate a default sequence input."
|
445 |
+
)
|
446 |
+
|
447 |
+
# Function and Residue annotations
|
448 |
+
if input.function_annotations is not None:
|
449 |
+
if input.sequence is None:
|
450 |
+
reference_sequence = encoding.get_default_sequence(sequence_length - 2)
|
451 |
+
else:
|
452 |
+
reference_sequence = input.sequence
|
453 |
+
(
|
454 |
+
function_tokens,
|
455 |
+
residue_annotation_tokens,
|
456 |
+
) = encoding.tokenize_function_annotations(
|
457 |
+
input.function_annotations,
|
458 |
+
reference_sequence=reference_sequence,
|
459 |
+
function_tokenizer=self.tokenizers.function,
|
460 |
+
residue_annotation_tokenizer=self.tokenizers.residue_annotations,
|
461 |
+
add_special_tokens=True,
|
462 |
+
)
|
463 |
+
|
464 |
+
return ESMProteinTensor(
|
465 |
+
sequence=sequence_tokens,
|
466 |
+
structure=structure_tokens,
|
467 |
+
secondary_structure=secondary_structure_tokens,
|
468 |
+
sasa=sasa_tokens,
|
469 |
+
function=function_tokens,
|
470 |
+
residue_annotations=residue_annotation_tokens,
|
471 |
+
coordinates=coordinates,
|
472 |
+
).to(next(self.parameters()).device)
|
473 |
+
|
474 |
+
def decode(
|
475 |
+
self,
|
476 |
+
input: ESMProteinTensor,
|
477 |
+
) -> ESMProtein:
|
478 |
+
return decode_protein_tensor(
|
479 |
+
input=input,
|
480 |
+
tokenizers=self.tokenizers,
|
481 |
+
structure_token_decoder=self.get_structure_token_decoder(),
|
482 |
+
function_token_decoder=self.get_function_token_decoder(),
|
483 |
+
)
|
484 |
+
|
485 |
+
def _forward(
|
486 |
+
self, input: ESMProteinTensor, config: ForwardConfig = ForwardConfig()
|
487 |
+
) -> ForwardOutput:
|
488 |
+
# Default plddt conditioning for inference. 1s where coordinates are provided.
|
489 |
+
if input.coordinates is None:
|
490 |
+
per_res_plddt = None
|
491 |
+
else:
|
492 |
+
# 1.0 if all coordinates at specific indices have valid non-nan values.
|
493 |
+
per_res_plddt = input.coordinates.isfinite().all(dim=-1).any(dim=-1).float()
|
494 |
+
|
495 |
+
with torch.no_grad() if self.eval else contextlib.nullcontext():
|
496 |
+
output = self.forward(
|
497 |
+
sequence_tokens=input.sequence,
|
498 |
+
structure_tokens=input.structure,
|
499 |
+
ss8_tokens=input.secondary_structure,
|
500 |
+
sasa_tokens=input.sasa,
|
501 |
+
function_tokens=input.function,
|
502 |
+
residue_annotation_tokens=input.residue_annotations,
|
503 |
+
average_plddt=torch.tensor(1.0, device=input.device),
|
504 |
+
per_res_plddt=per_res_plddt,
|
505 |
+
structure_coords=input.coordinates,
|
506 |
+
chain_id=None,
|
507 |
+
sequence_id=None,
|
508 |
+
)
|
509 |
+
|
510 |
+
if config.return_logits:
|
511 |
+
logits = ForwardTrackData(
|
512 |
+
sequence=output.sequence_logits,
|
513 |
+
structure=output.structure_logits,
|
514 |
+
secondary_structure=output.secondary_structure_logits,
|
515 |
+
sasa=output.sasa_logits,
|
516 |
+
function=output.function_logits,
|
517 |
+
)
|
518 |
+
else:
|
519 |
+
logits = None
|
520 |
+
|
521 |
+
return ForwardOutput(
|
522 |
+
logits=logits,
|
523 |
+
residue_annotation_logits=output.residue_logits,
|
524 |
+
embeddings=output.embeddings if config.return_embeddings else None,
|
525 |
+
)
|
526 |
+
|
527 |
+
def forward_and_sample(
|
528 |
+
self, input: ESMProteinTensor, sampling_configuration: SamplingConfig
|
529 |
+
) -> ForwardAndSampleOutput:
|
530 |
+
protein_tensor = attr.evolve(input) # Make a copy
|
531 |
+
|
532 |
+
def maybe_clone(x: torch.Tensor | None) -> torch.Tensor | None:
|
533 |
+
return x.clone() if x is not None else None
|
534 |
+
|
535 |
+
device = next(self.parameters()).device
|
536 |
+
|
537 |
+
sampling_config = sampling_configuration
|
538 |
+
if sampling_config is None:
|
539 |
+
sampling_config = get_default_sampling_config(self.tokenizers)
|
540 |
+
|
541 |
+
# Initialize default values for missing tracks
|
542 |
+
default_protein_tensor = ESMProteinTensor.empty(
|
543 |
+
len(input) - 2, tokenizers=self.tokenizers, device=input.device
|
544 |
+
)
|
545 |
+
for track in attr.fields(ESMProteinTensor):
|
546 |
+
if getattr(protein_tensor, track.name, None) is None:
|
547 |
+
setattr(
|
548 |
+
protein_tensor,
|
549 |
+
track.name,
|
550 |
+
getattr(default_protein_tensor, track.name, None),
|
551 |
+
)
|
552 |
+
|
553 |
+
# Preprocessing
|
554 |
+
sequence_length: int = -1
|
555 |
+
for track in [
|
556 |
+
"sequence",
|
557 |
+
"structure",
|
558 |
+
"secondary_structure",
|
559 |
+
"sasa",
|
560 |
+
"function",
|
561 |
+
"residue_annotations",
|
562 |
+
]:
|
563 |
+
input_tensor: torch.Tensor | None = getattr(protein_tensor, track, None)
|
564 |
+
if input_tensor is not None:
|
565 |
+
# Add batch dimension if necessary
|
566 |
+
if track in ["sequence", "structure", "secondary_structure", "sasa"]:
|
567 |
+
if len(input_tensor.size()) == 1:
|
568 |
+
input_tensor = input_tensor.unsqueeze(0) # (L,) -> (1, L)
|
569 |
+
elif track in ["function", "residue_annotations"]:
|
570 |
+
if len(input_tensor.size()) == 2:
|
571 |
+
input_tensor = input_tensor.unsqueeze(0) # (L, O) -> (1, L, O)
|
572 |
+
|
573 |
+
# Check length consistency
|
574 |
+
if sequence_length == -1:
|
575 |
+
sequence_length = input_tensor.size(1)
|
576 |
+
else:
|
577 |
+
if input_tensor.size(1) != sequence_length:
|
578 |
+
raise ValueError(
|
579 |
+
f"Length mismatch for track {track}. Expected {sequence_length}, got {input_tensor.size(1)}"
|
580 |
+
)
|
581 |
+
|
582 |
+
# Move input tensor to model device
|
583 |
+
input_tensor = input_tensor.to(device)
|
584 |
+
setattr(protein_tensor, track, input_tensor)
|
585 |
+
|
586 |
+
if protein_tensor.coordinates is not None:
|
587 |
+
coordinates = protein_tensor.coordinates
|
588 |
+
if len(coordinates.size()) == 3:
|
589 |
+
coordinates = coordinates.unsqueeze(0)
|
590 |
+
protein_tensor.coordinates = coordinates.to(device)
|
591 |
+
sequence_length = coordinates.size(1)
|
592 |
+
|
593 |
+
if sequence_length == -1:
|
594 |
+
raise ValueError("No input data provided")
|
595 |
+
|
596 |
+
# Forward pass
|
597 |
+
forward_output = self._forward(
|
598 |
+
protein_tensor,
|
599 |
+
ForwardConfig(
|
600 |
+
ReturnLogitsConfig(
|
601 |
+
sequence=True,
|
602 |
+
structure=True,
|
603 |
+
secondary_structure=True,
|
604 |
+
sasa=True,
|
605 |
+
function=True,
|
606 |
+
residue_annotations=True,
|
607 |
+
),
|
608 |
+
return_embeddings=True,
|
609 |
+
),
|
610 |
+
)
|
611 |
+
|
612 |
+
# Sampling
|
613 |
+
tokens_dir = {}
|
614 |
+
track_sampling_metadata_dir: dict[str, dict | None] = {}
|
615 |
+
for track in ["sequence", "structure", "secondary_structure", "sasa"]:
|
616 |
+
config = getattr(sampling_config, track)
|
617 |
+
if config is None:
|
618 |
+
tokens_dir[track] = maybe_clone(getattr(input, track))
|
619 |
+
continue
|
620 |
+
sampling_metadata = self._sample_track(
|
621 |
+
logits=getattr(forward_output.logits, track)[0, ...],
|
622 |
+
tokens=getattr(protein_tensor, track)[0, ...],
|
623 |
+
sampling_track_config=config,
|
624 |
+
mask_idx=getattr(self.tokenizers, track).mask_token_id,
|
625 |
+
)
|
626 |
+
tokens_dir[track] = sampling_metadata.pop("sampled_tokens") # (L,)
|
627 |
+
track_sampling_metadata_dir[track] = sampling_metadata
|
628 |
+
|
629 |
+
# Sample function and residue annotations separately
|
630 |
+
config = getattr(sampling_config, "function")
|
631 |
+
if config is None:
|
632 |
+
tokens_dir["function"] = maybe_clone(getattr(input, "function"))
|
633 |
+
tokens_dir["residue_annotations"] = maybe_clone(
|
634 |
+
getattr(input, "residue_annotations")
|
635 |
+
)
|
636 |
+
else:
|
637 |
+
sampling_metadata = self._sample_function_track(
|
638 |
+
tokens=getattr(protein_tensor, "function")[0, ...],
|
639 |
+
logits=getattr(forward_output.logits, "function")[0, ...],
|
640 |
+
sampling_track_config=config,
|
641 |
+
)
|
642 |
+
tokens_dir["function"] = sampling_metadata.pop("sampled_tokens") # (L, D)
|
643 |
+
track_sampling_metadata_dir["function"] = sampling_metadata
|
644 |
+
|
645 |
+
sampled_tokens, _ = sample_residue_annotation_logits(
|
646 |
+
logits=forward_output.residue_annotation_logits[0, ...] # type: ignore
|
647 |
+
)
|
648 |
+
tokens_dir["residue_annotations"] = sampled_tokens # (L, MAX_R)
|
649 |
+
|
650 |
+
# Format output
|
651 |
+
forward_and_sample_output_dir = {}
|
652 |
+
forward_and_sample_output_dir["protein_tensor"] = ESMProteinTensor(**tokens_dir)
|
653 |
+
for property in [
|
654 |
+
"entropy",
|
655 |
+
"prob",
|
656 |
+
"logprob",
|
657 |
+
"top_prob",
|
658 |
+
"topk_logprob",
|
659 |
+
"topk_tokens",
|
660 |
+
]:
|
661 |
+
is_all_none = True
|
662 |
+
forward_track_data_dir = {}
|
663 |
+
for track in track_sampling_metadata_dir.keys():
|
664 |
+
values = track_sampling_metadata_dir[track]
|
665 |
+
if values is not None and values.get(property, None) is not None:
|
666 |
+
forward_track_data_dir[track] = values.get(property, None)
|
667 |
+
is_all_none = False
|
668 |
+
if not is_all_none:
|
669 |
+
forward_and_sample_output_dir[property] = ForwardTrackData(
|
670 |
+
**forward_track_data_dir
|
671 |
+
)
|
672 |
+
else:
|
673 |
+
forward_and_sample_output_dir[property] = None
|
674 |
+
|
675 |
+
perres_embed = (
|
676 |
+
forward_output.embeddings[0] # type: ignore
|
677 |
+
if sampling_configuration.return_per_residue_embeddings
|
678 |
+
else None
|
679 |
+
)
|
680 |
+
mean_embedding = (
|
681 |
+
forward_output.embeddings[0].mean(1) # type: ignore
|
682 |
+
if sampling_configuration.return_per_residue_embeddings
|
683 |
+
else None
|
684 |
+
)
|
685 |
+
|
686 |
+
return ForwardAndSampleOutput(
|
687 |
+
per_residue_embedding=perres_embed,
|
688 |
+
mean_embedding=mean_embedding,
|
689 |
+
**forward_and_sample_output_dir,
|
690 |
+
)
|
691 |
+
|
692 |
+
def _sample_track(
|
693 |
+
self,
|
694 |
+
logits: torch.Tensor,
|
695 |
+
tokens: torch.Tensor,
|
696 |
+
sampling_track_config: SamplingTrackConfig,
|
697 |
+
mask_idx: int,
|
698 |
+
) -> dict[str, torch.Tensor]:
|
699 |
+
# Sample in all positions
|
700 |
+
temperature = sampling_track_config.temperature
|
701 |
+
sampled_tokens = sample_logits(
|
702 |
+
logits, temperature=temperature, top_p=sampling_track_config.top_p
|
703 |
+
)
|
704 |
+
log_probs = logits.log_softmax(-1)
|
705 |
+
|
706 |
+
# Do not sample at BOS and EOS tokens
|
707 |
+
sampling_mask = torch.ones_like(tokens, dtype=torch.bool) # (L, )
|
708 |
+
sampling_mask[0] = False
|
709 |
+
sampling_mask[-1] = False
|
710 |
+
|
711 |
+
# Do not sample at special token positions but allow sampling at mask token
|
712 |
+
special_minus_mask = list(set(sampling_track_config.invalid_ids) - {mask_idx})
|
713 |
+
if len(special_minus_mask) > 0:
|
714 |
+
special_tokens = torch.tensor(special_minus_mask, device=tokens.device)
|
715 |
+
assert special_tokens.numel() > 0
|
716 |
+
sampling_mask = sampling_mask & (
|
717 |
+
tokens[..., None] != special_tokens[None, :]
|
718 |
+
).all(-1)
|
719 |
+
|
720 |
+
# Keep only samples from masked positions (if specified)
|
721 |
+
if sampling_track_config.only_sample_masked_tokens:
|
722 |
+
masked_tokens = tokens == mask_idx
|
723 |
+
sampling_mask = sampling_mask & masked_tokens
|
724 |
+
sampled_tokens = torch.where(sampling_mask, sampled_tokens, tokens)
|
725 |
+
|
726 |
+
return self._compute_track_metadata(
|
727 |
+
sampled_tokens,
|
728 |
+
log_probs,
|
729 |
+
sampling_mask,
|
730 |
+
top_k=sampling_track_config.topk_logprobs,
|
731 |
+
)
|
732 |
+
|
733 |
+
def _sample_function_track(
|
734 |
+
self,
|
735 |
+
tokens: torch.Tensor,
|
736 |
+
logits: torch.Tensor,
|
737 |
+
sampling_track_config: SamplingTrackConfig,
|
738 |
+
) -> dict[str, torch.Tensor]:
|
739 |
+
# Do not sample at BOS and EOS tokens
|
740 |
+
sampling_mask = torch.ones_like(tokens, dtype=torch.bool)
|
741 |
+
sampling_mask[0] = False
|
742 |
+
sampling_mask[-1] = False
|
743 |
+
|
744 |
+
sampled_tokens, probs = sample_function_logits(
|
745 |
+
logits,
|
746 |
+
self.tokenizers.function,
|
747 |
+
top_p=sampling_track_config.top_p,
|
748 |
+
temperature=sampling_track_config.temperature,
|
749 |
+
)
|
750 |
+
|
751 |
+
if sampling_track_config.only_sample_masked_tokens:
|
752 |
+
raise ValueError(
|
753 |
+
"Sampling only masked tokens is undefined for function tokens."
|
754 |
+
)
|
755 |
+
|
756 |
+
sampled_tokens = torch.where(sampling_mask, sampled_tokens, tokens) # (L, D)
|
757 |
+
|
758 |
+
return self._compute_track_metadata(
|
759 |
+
sampled_tokens,
|
760 |
+
probs,
|
761 |
+
sampling_mask,
|
762 |
+
top_k=sampling_track_config.topk_logprobs,
|
763 |
+
)
|
764 |
+
|
765 |
+
@staticmethod
|
766 |
+
def _compute_track_metadata(
|
767 |
+
sampled_tokens: torch.Tensor,
|
768 |
+
log_probs: torch.Tensor,
|
769 |
+
sampling_mask: torch.Tensor,
|
770 |
+
top_k: int,
|
771 |
+
) -> dict:
|
772 |
+
probs = torch.exp(log_probs) # (B, L)
|
773 |
+
entropy = torch.distributions.Categorical(probs=probs).entropy() # (B, L)
|
774 |
+
|
775 |
+
# Only compute probabilities for sampled tokens
|
776 |
+
sampled_logprob = torch.zeros_like(
|
777 |
+
sampled_tokens, dtype=torch.float32
|
778 |
+
) # (B, L)
|
779 |
+
sampled_tokens_valid = sampled_tokens[sampling_mask]
|
780 |
+
sampled_log_probs_valid = log_probs[sampling_mask, sampled_tokens_valid]
|
781 |
+
sampled_logprob[sampling_mask] = sampled_log_probs_valid
|
782 |
+
|
783 |
+
# Calculate extra metadata
|
784 |
+
sampled_prob = torch.exp(sampled_logprob)
|
785 |
+
top_prob = torch.max(probs, dim=-1).values
|
786 |
+
topk_logprobs, topk_tokens = torch.topk(log_probs, top_k, dim=-1)
|
787 |
+
topk_logprobs = None if top_k == 0 else topk_logprobs
|
788 |
+
topk_tokens = None if top_k == 0 else topk_tokens
|
789 |
+
|
790 |
+
return {
|
791 |
+
"entropy": entropy,
|
792 |
+
"sampled_tokens": sampled_tokens,
|
793 |
+
"prob": sampled_prob,
|
794 |
+
"logprob": sampled_logprob,
|
795 |
+
"top_prob": top_prob,
|
796 |
+
"topk_logprob": topk_logprobs,
|
797 |
+
"topk_tokens": topk_tokens,
|
798 |
+
}
|
esm/models/function_decoder.py
ADDED
@@ -0,0 +1,338 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Function Token Decoder."""
|
2 |
+
from collections import defaultdict
|
3 |
+
from dataclasses import dataclass, field
|
4 |
+
|
5 |
+
import numpy as np
|
6 |
+
import pandas as pd
|
7 |
+
import torch
|
8 |
+
import torch.nn as nn
|
9 |
+
import torch.nn.functional as F
|
10 |
+
|
11 |
+
from esm.layers.regression_head import RegressionHead
|
12 |
+
from esm.layers.transformer_stack import TransformerStack
|
13 |
+
from esm.tokenization.function_tokenizer import (
|
14 |
+
InterProQuantizedTokenizer,
|
15 |
+
)
|
16 |
+
from esm.utils.constants import esm3 as C
|
17 |
+
from esm.utils.misc import merge_ranges
|
18 |
+
from esm.utils.types import FunctionAnnotation
|
19 |
+
|
20 |
+
|
21 |
+
@dataclass(frozen=True)
|
22 |
+
class FunctionTokenDecoderConfig:
|
23 |
+
"""Configures function token decoder."""
|
24 |
+
|
25 |
+
# Embedding dimension of decoder.
|
26 |
+
d_model: int = 1024
|
27 |
+
# Number of attention heads of decoder.
|
28 |
+
n_heads: int = 8
|
29 |
+
# Number of layers of decoder.
|
30 |
+
n_layers: int = 3
|
31 |
+
# Number of integer values that function tokens may assume.
|
32 |
+
function_token_vocab_size: int = 260
|
33 |
+
# Number of function tokens at each position.
|
34 |
+
function_token_depth: int = 8
|
35 |
+
# Number of InterPro labels that can be decoded.
|
36 |
+
num_interpro_classes: int = 29026
|
37 |
+
# Number of function keywords that can be decoded.
|
38 |
+
keyword_vocabulary_size: int = 58641
|
39 |
+
# List of supported InterPro ids.
|
40 |
+
interpro_entry_list: str = field(
|
41 |
+
default_factory=lambda: str(C.data_root() / C.INTERPRO_ENTRY)
|
42 |
+
)
|
43 |
+
# Path to keywords vocabulary.
|
44 |
+
keyword_vocabulary_path: str = field(
|
45 |
+
default_factory=lambda: str(C.data_root() / C.KEYWORDS_VOCABULARY)
|
46 |
+
)
|
47 |
+
# Whether to unpack LSH bits into single-bit tokens.
|
48 |
+
unpack_lsh_bits: bool = True
|
49 |
+
# The number of special tokens in the function tokenizer vocabulary which come
|
50 |
+
# before the LSH tokens.
|
51 |
+
num_special_tokens: int = 4
|
52 |
+
# The number of bits per LSH token in the function tokenizer.
|
53 |
+
bits_per_token: int = 8
|
54 |
+
|
55 |
+
|
56 |
+
class FunctionTokenDecoder(nn.Module):
|
57 |
+
def __init__(self, config: FunctionTokenDecoderConfig | None = None):
|
58 |
+
"""Constructs function token decoder."""
|
59 |
+
super().__init__()
|
60 |
+
if config is None:
|
61 |
+
config = FunctionTokenDecoderConfig()
|
62 |
+
self.config = config
|
63 |
+
|
64 |
+
# Get the supported set of InterPro ids.
|
65 |
+
df = pd.read_csv(config.interpro_entry_list, sep="\t")
|
66 |
+
self.interpro_ids = sorted(df.ENTRY_AC)
|
67 |
+
self.interpro2index = {
|
68 |
+
interpro_id: i for i, interpro_id in enumerate(self.interpro_ids)
|
69 |
+
}
|
70 |
+
assert len(self.interpro_ids) == config.num_interpro_classes
|
71 |
+
|
72 |
+
with open(config.keyword_vocabulary_path, "r") as f:
|
73 |
+
self.keywords_vocabulary: list[str] = list(f.read().strip().split("\n"))
|
74 |
+
assert len(self.keywords_vocabulary) == config.keyword_vocabulary_size
|
75 |
+
|
76 |
+
if config.unpack_lsh_bits:
|
77 |
+
vocab_size = 2 * config.function_token_depth * config.bits_per_token
|
78 |
+
else:
|
79 |
+
# Function-token id's re-use the same token ids at each position along the depth
|
80 |
+
# dimension, despite distinct meanings. The decoder should take this into
|
81 |
+
# account so create distinct embeddings for tokens at each position.
|
82 |
+
vocab_size = (
|
83 |
+
self.config.function_token_depth * self.config.function_token_vocab_size
|
84 |
+
)
|
85 |
+
|
86 |
+
self.embedding = nn.Embedding(
|
87 |
+
# Function-token id's re-use the same token ids at each position along the
|
88 |
+
# depth dimension, despite distinct meanings. The decoder should take this
|
89 |
+
# into account so create distinct embeddings for tokens at each position.
|
90 |
+
num_embeddings=(vocab_size),
|
91 |
+
embedding_dim=config.d_model,
|
92 |
+
)
|
93 |
+
self.decoder = TransformerStack(
|
94 |
+
d_model=config.d_model,
|
95 |
+
n_heads=config.n_heads,
|
96 |
+
v_heads=None,
|
97 |
+
n_layers=config.n_layers,
|
98 |
+
n_layers_geom=0,
|
99 |
+
scale_residue=False,
|
100 |
+
bias=True,
|
101 |
+
qk_layernorm=False,
|
102 |
+
ffn_type="gelu",
|
103 |
+
expansion_ratio=4,
|
104 |
+
)
|
105 |
+
self.heads = nn.ModuleDict(
|
106 |
+
{
|
107 |
+
# Binary classification head predicting which keywords are present.
|
108 |
+
"keyword_logits": RegressionHead(
|
109 |
+
d_model=config.d_model,
|
110 |
+
output_dim=config.keyword_vocabulary_size,
|
111 |
+
hidden_dim=4 * config.d_model,
|
112 |
+
),
|
113 |
+
# Regresses the TF-IDF value of each present keyword.
|
114 |
+
"keyword_tfidf": RegressionHead(
|
115 |
+
d_model=config.d_model,
|
116 |
+
output_dim=config.keyword_vocabulary_size,
|
117 |
+
hidden_dim=4 * config.d_model,
|
118 |
+
),
|
119 |
+
# Predicts which InterPro annotations are present.
|
120 |
+
"interpro_logits": RegressionHead(
|
121 |
+
d_model=config.d_model,
|
122 |
+
output_dim=config.num_interpro_classes,
|
123 |
+
hidden_dim=4 * config.d_model,
|
124 |
+
),
|
125 |
+
}
|
126 |
+
)
|
127 |
+
|
128 |
+
def forward(self, token_ids: torch.Tensor) -> dict[str, torch.Tensor]:
|
129 |
+
"""Forward pass through function token decoder.
|
130 |
+
|
131 |
+
Args:
|
132 |
+
token_ids: <int>[batch_size, function_token_depth] batch of function tokens
|
133 |
+
ids to decode.
|
134 |
+
Returns:
|
135 |
+
interpro_logits: binary classification logits tensor of shape
|
136 |
+
<float>[batch_size, num_interpro_classes]
|
137 |
+
"""
|
138 |
+
assert token_ids.ndim == 2
|
139 |
+
assert token_ids.shape[1] == self.config.function_token_depth
|
140 |
+
batch_size, depth = token_ids.shape
|
141 |
+
|
142 |
+
if self.config.unpack_lsh_bits:
|
143 |
+
# Shift values into [0, 2^bits/token)
|
144 |
+
lsh_bits = token_ids - self.config.num_special_tokens
|
145 |
+
# extract each bit. (hob stands for highest-order bit)
|
146 |
+
bits = torch.concat(
|
147 |
+
[
|
148 |
+
torch.bitwise_and(lsh_bits, 1 << hob).gt(0).to(torch.int32)
|
149 |
+
for hob in range(self.config.bits_per_token)
|
150 |
+
],
|
151 |
+
dim=1,
|
152 |
+
)
|
153 |
+
assert bits.shape == (batch_size, depth * self.config.bits_per_token)
|
154 |
+
|
155 |
+
# Shift each bit into individual vocabulary ranges, so they get distinct
|
156 |
+
# embeddings.
|
157 |
+
vocab_offsets = 2 * torch.arange(
|
158 |
+
depth * self.config.bits_per_token, device=token_ids.device
|
159 |
+
)
|
160 |
+
inputs = vocab_offsets[None, :] + bits
|
161 |
+
|
162 |
+
# zero-out special tokens, i.e. non LSH tokens.
|
163 |
+
where_special = token_ids < self.config.num_special_tokens
|
164 |
+
inputs = torch.where(where_special.any(dim=1, keepdim=True), 0, inputs)
|
165 |
+
else:
|
166 |
+
# Apply depth-position offset to use distinct vocabs. See __init__ for
|
167 |
+
# explaination.
|
168 |
+
vocab_offsets = self.config.function_token_vocab_size * torch.arange(
|
169 |
+
self.config.function_token_depth,
|
170 |
+
device=token_ids.device,
|
171 |
+
)
|
172 |
+
inputs = token_ids + vocab_offsets[None, :]
|
173 |
+
|
174 |
+
embed = self.embedding(inputs)
|
175 |
+
encoding, _ = self.decoder(embed)
|
176 |
+
pooled = torch.mean(encoding, dim=1)
|
177 |
+
|
178 |
+
return {name: head(pooled) for name, head in self.heads.items()}
|
179 |
+
|
180 |
+
@property
|
181 |
+
def device(self) -> torch.device:
|
182 |
+
return next(self.parameters()).device
|
183 |
+
|
184 |
+
def decode(
|
185 |
+
self,
|
186 |
+
function_token_ids: torch.Tensor,
|
187 |
+
tokenizer: InterProQuantizedTokenizer,
|
188 |
+
decode_annotations: bool = True,
|
189 |
+
annotation_threshold: float = 0.1,
|
190 |
+
decode_keywords=True,
|
191 |
+
keywords_threshold: float = 0.5,
|
192 |
+
annotation_min_length: int | None = 5,
|
193 |
+
annotation_gap_merge_max: int | None = 3,
|
194 |
+
):
|
195 |
+
"""Decodes function tokens into predicted annotations and keywords.
|
196 |
+
|
197 |
+
Args:
|
198 |
+
function_token_ids: <int>[length, depth] function token ids. NOTE:
|
199 |
+
without <bos>/<eos> prefix
|
200 |
+
tokenizer: function tokenizer.
|
201 |
+
decode_annotations: whether to decode InterPro annotations.
|
202 |
+
annotation_threshold: threshold for emitting a function annotation.
|
203 |
+
decode_keywords: whether to decode function keywords.
|
204 |
+
keywords_threshold: threshold for emitting a keyword.
|
205 |
+
annotation_min_length: optional minimum length of predicted annotations for
|
206 |
+
size filtering.
|
207 |
+
annotation_gap_merge_max: optional merge adjacent annotation of the same type
|
208 |
+
Returns:
|
209 |
+
Decoder outputs:
|
210 |
+
- "interpro_logits": <float>[length, num_interpro] predicted interpro logits.
|
211 |
+
- "interpro_preds": <bool>[length, num_interpro] predicted intepro labels.
|
212 |
+
- "interpro_annotations": list[FunctionAnnotation] predicted InterPro
|
213 |
+
annotations
|
214 |
+
- "keyword_logits": <float>[length, keyword_vocabulary] binary prediciton
|
215 |
+
logits for keywrods.
|
216 |
+
- "function_keywords": list[FunctionAnnotation] predicted function keyword
|
217 |
+
ranges.
|
218 |
+
"""
|
219 |
+
assert function_token_ids.ndim == 2
|
220 |
+
assert function_token_ids.shape[1] == tokenizer.depth
|
221 |
+
assert self.config.function_token_depth == tokenizer.depth
|
222 |
+
|
223 |
+
outputs = {}
|
224 |
+
|
225 |
+
outputs = self(function_token_ids.to(self.device))
|
226 |
+
|
227 |
+
# Only decode in positions that have function tokens.
|
228 |
+
where_decode = torch.all(
|
229 |
+
(function_token_ids != tokenizer.vocab_to_index["<pad>"])
|
230 |
+
& (function_token_ids != tokenizer.vocab_to_index["<none>"])
|
231 |
+
& (function_token_ids != tokenizer.vocab_to_index["<unk>"]),
|
232 |
+
dim=1,
|
233 |
+
)
|
234 |
+
|
235 |
+
# Decode InterPro annotations ranges.
|
236 |
+
interpro_preds = F.sigmoid(outputs["interpro_logits"])
|
237 |
+
interpro_preds = interpro_preds >= annotation_threshold
|
238 |
+
interpro_preds[~where_decode, :] = False
|
239 |
+
outputs["interpro_preds"] = interpro_preds
|
240 |
+
if decode_annotations:
|
241 |
+
annotations: list[FunctionAnnotation] = []
|
242 |
+
preds: np.ndarray = interpro_preds.detach().cpu().numpy()
|
243 |
+
for position_index, class_index in zip(*preds.nonzero()):
|
244 |
+
interpro_id = self.interpro_ids[class_index]
|
245 |
+
annotation = FunctionAnnotation(
|
246 |
+
label=interpro_id,
|
247 |
+
start=position_index + 1, # zero-index -> one-index inclusive
|
248 |
+
end=position_index + 1, # zero-index -> one-index inclusive
|
249 |
+
)
|
250 |
+
annotations.append(annotation)
|
251 |
+
|
252 |
+
annotations = _merge_annotations(
|
253 |
+
annotations,
|
254 |
+
merge_gap_max=annotation_gap_merge_max,
|
255 |
+
)
|
256 |
+
|
257 |
+
# Drop very small annotations.
|
258 |
+
if annotation_min_length is not None:
|
259 |
+
annotations = [
|
260 |
+
annotation
|
261 |
+
for annotation in annotations
|
262 |
+
if annotation.end - annotation.start + 1 >= annotation_min_length
|
263 |
+
]
|
264 |
+
|
265 |
+
outputs["interpro_annotations"] = annotations
|
266 |
+
|
267 |
+
# Decode function keyword ranges.
|
268 |
+
keyword_logits = outputs["keyword_logits"]
|
269 |
+
keyword_logits[~where_decode, :] = -torch.inf
|
270 |
+
if decode_keywords:
|
271 |
+
keyword_preds = F.sigmoid(keyword_logits) >= keywords_threshold
|
272 |
+
outputs["function_keywords"] = self._preds_to_keywords(
|
273 |
+
keyword_preds.detach().cpu().numpy()
|
274 |
+
)
|
275 |
+
|
276 |
+
return outputs
|
277 |
+
|
278 |
+
def _preds_to_keywords(self, keyword_preds: np.ndarray) -> list[FunctionAnnotation]:
|
279 |
+
"""Converts output log-TFDF to predicted keywords over the sequence.
|
280 |
+
|
281 |
+
Args:
|
282 |
+
keyword_precs: <bool>[length, keyword_vocab] positional predictions of
|
283 |
+
function keywords from the keyword prediction head.
|
284 |
+
Returns:
|
285 |
+
Non-overlapping keyword annotated ranges along the sequence. Note that indices
|
286 |
+
will index into the *sequence*, not the function token array which has a
|
287 |
+
<pad> prefix.
|
288 |
+
"""
|
289 |
+
assert keyword_preds.ndim == 2
|
290 |
+
assert keyword_preds.shape[1] == self.config.keyword_vocabulary_size
|
291 |
+
|
292 |
+
keyword_positions: dict[str, list[range]] = defaultdict(list)
|
293 |
+
for position, keyword_id in zip(*np.nonzero(keyword_preds)):
|
294 |
+
keyword = self.keywords_vocabulary[keyword_id]
|
295 |
+
keyword_positions[keyword].append(range(position, position + 1))
|
296 |
+
|
297 |
+
annotations: list[FunctionAnnotation] = []
|
298 |
+
for keyword, ranges in keyword_positions.items():
|
299 |
+
for range_ in merge_ranges(ranges):
|
300 |
+
annotation = FunctionAnnotation(
|
301 |
+
label=keyword,
|
302 |
+
start=range_.start + 1, # zero-index -> one-index
|
303 |
+
end=range_.stop + 1 - 1, # zero-index excl -> one-index incl
|
304 |
+
)
|
305 |
+
annotations.append(annotation)
|
306 |
+
|
307 |
+
return annotations
|
308 |
+
|
309 |
+
|
310 |
+
def _merge_annotations(
|
311 |
+
annotations: list[FunctionAnnotation],
|
312 |
+
merge_gap_max: int | None = None,
|
313 |
+
) -> list[FunctionAnnotation]:
|
314 |
+
"""Merges annotations into non-overlapping segments.
|
315 |
+
|
316 |
+
Args:
|
317 |
+
annotations: annotations to merge.
|
318 |
+
merge_gap_max: optionally merge neighboring ranges that are separated by a gap
|
319 |
+
no larger than this size.
|
320 |
+
Returns:
|
321 |
+
non-overlapping annotations with gaps merged.
|
322 |
+
"""
|
323 |
+
grouped: dict[str, list[range]] = defaultdict(list)
|
324 |
+
for a in annotations:
|
325 |
+
# Convert one-indexed inclusive-inclusive, to range()
|
326 |
+
grouped[a.label].append(range(a.start, a.end + 1))
|
327 |
+
|
328 |
+
merged = []
|
329 |
+
for label, ranges in grouped.items():
|
330 |
+
merged_ranges = merge_ranges(ranges, merge_gap_max=merge_gap_max)
|
331 |
+
for range_ in merged_ranges:
|
332 |
+
annotation = FunctionAnnotation(
|
333 |
+
label=label,
|
334 |
+
start=range_.start + 1, # zero-index -> one-index
|
335 |
+
end=range_.stop - 1, # zero-index excl -> one-index incl
|
336 |
+
)
|
337 |
+
merged.append(annotation)
|
338 |
+
return merged
|
esm/models/vqvae.py
ADDED
@@ -0,0 +1,450 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
|
4 |
+
from esm.layers.blocks import UnifiedTransformerBlock
|
5 |
+
from esm.layers.codebook import EMACodebook
|
6 |
+
from esm.layers.structure_proj import Dim6RotStructureHead
|
7 |
+
from esm.layers.transformer_stack import TransformerStack
|
8 |
+
from esm.utils.constants import esm3 as C
|
9 |
+
from esm.utils.misc import knn_graph
|
10 |
+
from esm.utils.structure.affine3d import (
|
11 |
+
Affine3D,
|
12 |
+
build_affine3d_from_coordinates,
|
13 |
+
)
|
14 |
+
from esm.utils.structure.predicted_aligned_error import (
|
15 |
+
compute_predicted_aligned_error,
|
16 |
+
compute_tm,
|
17 |
+
)
|
18 |
+
|
19 |
+
|
20 |
+
class RelativePositionEmbedding(nn.Module):
|
21 |
+
"""
|
22 |
+
Embedding layer for relative position embeddings. `bins` is the number of positions relative
|
23 |
+
to the query position that are considered before clipping. For instance, if `bins=10`, then
|
24 |
+
the relative position embedding will have 21 positions, [-10, 10].
|
25 |
+
"""
|
26 |
+
|
27 |
+
def __init__(self, bins, embedding_dim, init_std=0.02):
|
28 |
+
super().__init__()
|
29 |
+
self.bins = bins
|
30 |
+
|
31 |
+
self.embedding = torch.nn.Embedding(2 * bins + 2, embedding_dim)
|
32 |
+
self.embedding.weight.data.normal_(0, init_std)
|
33 |
+
|
34 |
+
def forward(self, query_residue_index, key_residue_index):
|
35 |
+
"""
|
36 |
+
Input:
|
37 |
+
query_residue_index: (B, ) tensor of source indices (dytpe=torch.long)
|
38 |
+
key_residue_index: (B, L) tensor of target indices (dytpe=torch.long)
|
39 |
+
Output:
|
40 |
+
embeddings: B x L x embedding_dim tensor of embeddings
|
41 |
+
"""
|
42 |
+
|
43 |
+
assert query_residue_index.dtype == torch.long
|
44 |
+
assert key_residue_index.dtype == torch.long
|
45 |
+
assert query_residue_index.ndim == 1
|
46 |
+
assert key_residue_index.ndim == 2
|
47 |
+
|
48 |
+
diff = key_residue_index - query_residue_index.unsqueeze(1)
|
49 |
+
diff = diff.clamp(-self.bins, self.bins)
|
50 |
+
diff = diff + self.bins + 1 # add 1 to adjust for padding index
|
51 |
+
output = self.embedding(diff)
|
52 |
+
return output
|
53 |
+
|
54 |
+
|
55 |
+
class PairwisePredictionHead(nn.Module):
|
56 |
+
def __init__(
|
57 |
+
self,
|
58 |
+
input_dim: int,
|
59 |
+
downproject_dim: int,
|
60 |
+
hidden_dim: int,
|
61 |
+
n_bins: int,
|
62 |
+
bias: bool = True,
|
63 |
+
pairwise_state_dim: int = 0,
|
64 |
+
):
|
65 |
+
super().__init__()
|
66 |
+
self.downproject = nn.Linear(input_dim, downproject_dim, bias=bias)
|
67 |
+
self.linear1 = nn.Linear(
|
68 |
+
downproject_dim + pairwise_state_dim, hidden_dim, bias=bias
|
69 |
+
)
|
70 |
+
self.activation_fn = nn.GELU()
|
71 |
+
self.norm = nn.LayerNorm(hidden_dim)
|
72 |
+
self.linear2 = nn.Linear(hidden_dim, n_bins, bias=bias)
|
73 |
+
|
74 |
+
def forward(self, x, pairwise: torch.Tensor | None = None):
|
75 |
+
"""
|
76 |
+
Args:
|
77 |
+
x: [B x L x D]
|
78 |
+
|
79 |
+
Output:
|
80 |
+
[B x L x L x K]
|
81 |
+
"""
|
82 |
+
x = self.downproject(x)
|
83 |
+
# Let x_i be a vector of size (B, D).
|
84 |
+
# Input is {x_1, ..., x_L} of size (B, L, D)
|
85 |
+
# Output is 2D where x_ij = cat([x_i * x_j, x_i - x_j])
|
86 |
+
q, k = x.chunk(2, dim=-1)
|
87 |
+
|
88 |
+
prod = q[:, None, :, :] * k[:, :, None, :]
|
89 |
+
diff = q[:, None, :, :] - k[:, :, None, :]
|
90 |
+
x_2d = [
|
91 |
+
prod,
|
92 |
+
diff,
|
93 |
+
]
|
94 |
+
if pairwise is not None:
|
95 |
+
x_2d.append(pairwise)
|
96 |
+
x = torch.cat(x_2d, dim=-1)
|
97 |
+
x = self.linear1(x)
|
98 |
+
x = self.activation_fn(x)
|
99 |
+
x = self.norm(x)
|
100 |
+
x = self.linear2(x)
|
101 |
+
return x
|
102 |
+
|
103 |
+
|
104 |
+
class RegressionHead(nn.Module):
|
105 |
+
def __init__(self, embed_dim: int, output_dim: int):
|
106 |
+
super().__init__()
|
107 |
+
self.dense = nn.Linear(embed_dim, embed_dim)
|
108 |
+
self.activation_fn = nn.GELU()
|
109 |
+
self.norm = nn.LayerNorm(embed_dim)
|
110 |
+
self.output = nn.Linear(embed_dim, output_dim)
|
111 |
+
|
112 |
+
def forward(self, features):
|
113 |
+
x = self.dense(features)
|
114 |
+
x = self.activation_fn(x)
|
115 |
+
x = self.norm(x)
|
116 |
+
x = self.output(x)
|
117 |
+
return x
|
118 |
+
|
119 |
+
|
120 |
+
class CategoricalMixture:
|
121 |
+
def __init__(self, param, bins=50, start=0, end=1):
|
122 |
+
# All tensors are of shape ..., bins.
|
123 |
+
self.logits = param
|
124 |
+
bins = torch.linspace(
|
125 |
+
start, end, bins + 1, device=self.logits.device, dtype=torch.float32
|
126 |
+
)
|
127 |
+
self.v_bins = (bins[:-1] + bins[1:]) / 2
|
128 |
+
|
129 |
+
def log_prob(self, true):
|
130 |
+
# Shapes are:
|
131 |
+
# self.probs: ... x bins
|
132 |
+
# true : ... (floating point # for target)
|
133 |
+
true_index = (
|
134 |
+
(true.unsqueeze(-1) - self.v_bins[[None] * true.ndim]).abs().argmin(-1)
|
135 |
+
)
|
136 |
+
nll = self.logits.log_softmax(-1)
|
137 |
+
return torch.take_along_dim(nll, true_index.unsqueeze(-1), dim=-1).squeeze(-1)
|
138 |
+
|
139 |
+
def mean(self):
|
140 |
+
return (
|
141 |
+
self.logits.to(self.v_bins.dtype).softmax(-1) @ self.v_bins.unsqueeze(1)
|
142 |
+
).squeeze(-1)
|
143 |
+
|
144 |
+
def median(self):
|
145 |
+
return self.v_bins[self.logits.max(-1).indices]
|
146 |
+
|
147 |
+
|
148 |
+
class GeometricEncoderStack(TransformerStack):
|
149 |
+
def __init__(self, d_model, n_heads, v_heads, n_layers):
|
150 |
+
super().__init__(d_model, n_heads, v_heads, 0)
|
151 |
+
self.blocks = nn.ModuleList(
|
152 |
+
[
|
153 |
+
UnifiedTransformerBlock(
|
154 |
+
d_model,
|
155 |
+
n_heads,
|
156 |
+
v_heads=v_heads,
|
157 |
+
use_geom_attn=True,
|
158 |
+
use_plain_attn=False,
|
159 |
+
expansion_ratio=4,
|
160 |
+
bias=True,
|
161 |
+
)
|
162 |
+
for i in range(n_layers)
|
163 |
+
]
|
164 |
+
)
|
165 |
+
self.norm = nn.Identity()
|
166 |
+
|
167 |
+
|
168 |
+
def batched_gather(data, inds, dim=0, no_batch_dims=0):
|
169 |
+
ranges = []
|
170 |
+
for i, s in enumerate(data.shape[:no_batch_dims]):
|
171 |
+
r = torch.arange(s)
|
172 |
+
r = r.view(*(*((1,) * i), -1, *((1,) * (len(inds.shape) - i - 1))))
|
173 |
+
ranges.append(r)
|
174 |
+
|
175 |
+
remaining_dims = [slice(None) for _ in range(len(data.shape) - no_batch_dims)]
|
176 |
+
remaining_dims[dim - no_batch_dims if dim >= 0 else dim] = inds
|
177 |
+
ranges.extend(remaining_dims)
|
178 |
+
return data[ranges]
|
179 |
+
|
180 |
+
|
181 |
+
def node_gather(s: torch.Tensor, edges: torch.Tensor) -> torch.Tensor:
|
182 |
+
return batched_gather(s.unsqueeze(-3), edges, -2, no_batch_dims=len(s.shape) - 1)
|
183 |
+
|
184 |
+
|
185 |
+
class StructureTokenEncoder(nn.Module):
|
186 |
+
def __init__(self, d_model, n_heads, v_heads, n_layers, d_out, n_codes):
|
187 |
+
super().__init__()
|
188 |
+
# We only support fully-geometric structure token encoders for now...
|
189 |
+
# setting n_layers_geom to something that's not n_layers won't work because
|
190 |
+
# sequence ID isn't supported fully in this repo for plain-old transformers
|
191 |
+
self.transformer = GeometricEncoderStack(d_model, n_heads, v_heads, n_layers)
|
192 |
+
self.pre_vq_proj = nn.Linear(d_model, d_out)
|
193 |
+
self.codebook = EMACodebook(n_codes, d_out)
|
194 |
+
self.relative_positional_embedding = RelativePositionEmbedding(
|
195 |
+
32, d_model, init_std=0.02
|
196 |
+
)
|
197 |
+
self.knn = 16
|
198 |
+
|
199 |
+
def encode_local_structure(
|
200 |
+
self,
|
201 |
+
coords: torch.Tensor,
|
202 |
+
affine: Affine3D,
|
203 |
+
attention_mask: torch.Tensor,
|
204 |
+
sequence_id: torch.Tensor | None,
|
205 |
+
affine_mask: torch.Tensor,
|
206 |
+
residue_index: torch.Tensor | None = None,
|
207 |
+
):
|
208 |
+
"""This function allows for a multi-layered encoder to encode tokens with a local receptive fields. The implementation is as follows:
|
209 |
+
|
210 |
+
1. Starting with (B, L) frames, we find the KNN in structure space. This now gives us (B, L, K) where the last dimension is the local
|
211 |
+
neighborhood of all (B, L) residues.
|
212 |
+
2. We reshape these frames to (B*L, K) so now we have a large batch of a bunch of local neighborhoods.
|
213 |
+
3. Pass the (B*L, K) local neighborhoods through a stack of geometric reasoning blocks, effectively getting all to all communication between
|
214 |
+
all frames in the local neighborhood.
|
215 |
+
4. This gives (B*L, K, d_model) embeddings, from which we need to get a single embedding per local neighborhood. We do this by simply
|
216 |
+
taking the embedding corresponding to the query node. This gives us (B*L, d_model) embeddings.
|
217 |
+
5. Reshape back to (B, L, d_model) embeddings
|
218 |
+
"""
|
219 |
+
assert coords.size(-1) == 3 and coords.size(-2) == 3, "need N, CA, C"
|
220 |
+
with torch.no_grad():
|
221 |
+
knn_edges, _ = self.find_knn_edges(
|
222 |
+
coords,
|
223 |
+
~attention_mask,
|
224 |
+
coord_mask=affine_mask,
|
225 |
+
sequence_id=sequence_id,
|
226 |
+
knn=self.knn,
|
227 |
+
)
|
228 |
+
B, L, E = knn_edges.shape
|
229 |
+
|
230 |
+
affine_tensor = affine.tensor # for easier manipulation
|
231 |
+
T_D = affine_tensor.size(-1)
|
232 |
+
knn_affine_tensor = node_gather(affine_tensor, knn_edges)
|
233 |
+
knn_affine_tensor = knn_affine_tensor.view(-1, E, T_D).contiguous()
|
234 |
+
affine = Affine3D.from_tensor(knn_affine_tensor)
|
235 |
+
knn_sequence_id = (
|
236 |
+
node_gather(sequence_id.unsqueeze(-1), knn_edges).view(-1, E)
|
237 |
+
if sequence_id is not None
|
238 |
+
else torch.zeros(L, E, dtype=torch.int64, device=coords.device)
|
239 |
+
)
|
240 |
+
knn_affine_mask = node_gather(affine_mask.unsqueeze(-1), knn_edges).view(
|
241 |
+
-1, E
|
242 |
+
)
|
243 |
+
knn_chain_id = torch.zeros(L, E, dtype=torch.int64, device=coords.device)
|
244 |
+
|
245 |
+
if residue_index is None:
|
246 |
+
res_idxs = knn_edges.view(-1, E)
|
247 |
+
else:
|
248 |
+
res_idxs = node_gather(residue_index.unsqueeze(-1), knn_edges).view(
|
249 |
+
-1, E
|
250 |
+
)
|
251 |
+
|
252 |
+
z = self.relative_positional_embedding(res_idxs[:, 0], res_idxs)
|
253 |
+
|
254 |
+
z, _ = self.transformer.forward(
|
255 |
+
x=z,
|
256 |
+
sequence_id=knn_sequence_id,
|
257 |
+
affine=affine,
|
258 |
+
affine_mask=knn_affine_mask,
|
259 |
+
chain_id=knn_chain_id,
|
260 |
+
)
|
261 |
+
|
262 |
+
# Unflatten the output and take the query node embedding, which will always be the first one because
|
263 |
+
# a node has distance 0 with itself and the KNN are sorted.
|
264 |
+
z = z.view(B, L, E, -1)
|
265 |
+
z = z[:, :, 0, :]
|
266 |
+
|
267 |
+
return z
|
268 |
+
|
269 |
+
@staticmethod
|
270 |
+
def find_knn_edges(
|
271 |
+
coords,
|
272 |
+
padding_mask,
|
273 |
+
coord_mask,
|
274 |
+
sequence_id: torch.Tensor | None = None,
|
275 |
+
knn: int | None = None,
|
276 |
+
) -> tuple:
|
277 |
+
assert knn is not None, "Must specify a non-null knn to find_knn_edges"
|
278 |
+
# Coords are N, CA, C
|
279 |
+
coords = coords.clone()
|
280 |
+
coords[~coord_mask] = 0
|
281 |
+
|
282 |
+
if sequence_id is None:
|
283 |
+
sequence_id = torch.zeros(
|
284 |
+
(coords.shape[0], coords.shape[1]), device=coords.device
|
285 |
+
).long()
|
286 |
+
|
287 |
+
with torch.no_grad(), torch.cuda.amp.autocast(enabled=False): # type: ignore
|
288 |
+
ca = coords[..., 1, :]
|
289 |
+
edges, edge_mask = knn_graph(
|
290 |
+
ca,
|
291 |
+
coord_mask,
|
292 |
+
padding_mask,
|
293 |
+
sequence_id,
|
294 |
+
no_knn=knn,
|
295 |
+
)
|
296 |
+
|
297 |
+
return edges, edge_mask
|
298 |
+
|
299 |
+
def encode(
|
300 |
+
self,
|
301 |
+
coords: torch.Tensor,
|
302 |
+
attention_mask: torch.Tensor | None = None,
|
303 |
+
sequence_id: torch.Tensor | None = None,
|
304 |
+
residue_index: torch.Tensor | None = None,
|
305 |
+
):
|
306 |
+
coords = coords[..., :3, :]
|
307 |
+
affine, affine_mask = build_affine3d_from_coordinates(coords=coords)
|
308 |
+
|
309 |
+
if attention_mask is None:
|
310 |
+
attention_mask = torch.ones_like(affine_mask, dtype=torch.bool)
|
311 |
+
attention_mask = attention_mask.bool()
|
312 |
+
|
313 |
+
if sequence_id is None:
|
314 |
+
sequence_id = torch.zeros_like(affine_mask, dtype=torch.int64)
|
315 |
+
|
316 |
+
z = self.encode_local_structure(
|
317 |
+
coords=coords,
|
318 |
+
affine=affine,
|
319 |
+
attention_mask=attention_mask,
|
320 |
+
sequence_id=sequence_id,
|
321 |
+
affine_mask=affine_mask,
|
322 |
+
residue_index=residue_index,
|
323 |
+
)
|
324 |
+
|
325 |
+
z = z.masked_fill(~affine_mask.unsqueeze(2), 0)
|
326 |
+
z = self.pre_vq_proj(z)
|
327 |
+
|
328 |
+
z_q, min_encoding_indices, _ = self.codebook(z)
|
329 |
+
|
330 |
+
return z_q, min_encoding_indices
|
331 |
+
|
332 |
+
|
333 |
+
class StructureTokenDecoder(nn.Module):
|
334 |
+
def __init__(
|
335 |
+
self,
|
336 |
+
d_model,
|
337 |
+
n_heads,
|
338 |
+
n_layers,
|
339 |
+
):
|
340 |
+
super().__init__()
|
341 |
+
self.decoder_channels = d_model
|
342 |
+
|
343 |
+
self.vqvae_codebook_size = C.VQVAE_CODEBOOK_SIZE
|
344 |
+
self.special_tokens = C.VQVAE_SPECIAL_TOKENS
|
345 |
+
self.max_pae_bin = C.VQVAE_MAX_PAE_BIN
|
346 |
+
|
347 |
+
self.embed = nn.Embedding(
|
348 |
+
self.vqvae_codebook_size + len(self.special_tokens), d_model
|
349 |
+
)
|
350 |
+
self.decoder_stack = TransformerStack(
|
351 |
+
d_model, n_heads, 1, n_layers, scale_residue=False, n_layers_geom=0
|
352 |
+
)
|
353 |
+
|
354 |
+
self.affine_output_projection = Dim6RotStructureHead(
|
355 |
+
self.decoder_channels, 10, predict_torsion_angles=False
|
356 |
+
)
|
357 |
+
|
358 |
+
direction_loss_bins = C.VQVAE_DIRECTION_LOSS_BINS
|
359 |
+
pae_bins = C.VQVAE_PAE_BINS
|
360 |
+
self.pairwise_bins = [
|
361 |
+
64, # distogram
|
362 |
+
direction_loss_bins * 6, # direction bins
|
363 |
+
pae_bins, # predicted aligned error
|
364 |
+
]
|
365 |
+
self.pairwise_classification_head = PairwisePredictionHead(
|
366 |
+
self.decoder_channels,
|
367 |
+
downproject_dim=128,
|
368 |
+
hidden_dim=128,
|
369 |
+
n_bins=sum(self.pairwise_bins),
|
370 |
+
bias=False,
|
371 |
+
)
|
372 |
+
|
373 |
+
plddt_bins = C.VQVAE_PLDDT_BINS
|
374 |
+
self.plddt_head = RegressionHead(
|
375 |
+
embed_dim=self.decoder_channels, output_dim=plddt_bins
|
376 |
+
)
|
377 |
+
|
378 |
+
def decode(
|
379 |
+
self,
|
380 |
+
structure_tokens: torch.Tensor,
|
381 |
+
attention_mask: torch.Tensor | None = None,
|
382 |
+
sequence_id: torch.Tensor | None = None,
|
383 |
+
):
|
384 |
+
if attention_mask is None:
|
385 |
+
attention_mask = torch.ones_like(structure_tokens, dtype=torch.bool)
|
386 |
+
|
387 |
+
attention_mask = attention_mask.bool()
|
388 |
+
if sequence_id is None:
|
389 |
+
sequence_id = torch.zeros_like(structure_tokens, dtype=torch.int64)
|
390 |
+
# not supported for now
|
391 |
+
chain_id = torch.zeros_like(structure_tokens, dtype=torch.int64)
|
392 |
+
|
393 |
+
# check that BOS and EOS are set correctly
|
394 |
+
assert (
|
395 |
+
structure_tokens[:, 0].eq(self.special_tokens["BOS"]).all()
|
396 |
+
), "First token in structure_tokens must be BOS token"
|
397 |
+
assert (
|
398 |
+
structure_tokens[
|
399 |
+
torch.arange(structure_tokens.shape[0]), attention_mask.sum(1) - 1
|
400 |
+
]
|
401 |
+
.eq(self.special_tokens["EOS"])
|
402 |
+
.all()
|
403 |
+
), "Last token in structure_tokens must be EOS token"
|
404 |
+
assert (
|
405 |
+
(structure_tokens < 0).sum() == 0
|
406 |
+
), "All structure tokens set to -1 should be replaced with BOS, EOS, PAD, or MASK tokens by now, but that isn't the case!"
|
407 |
+
|
408 |
+
x = self.embed(structure_tokens)
|
409 |
+
# !!! NOTE: Attention mask is actually unused here so watch out
|
410 |
+
x, _ = self.decoder_stack.forward(
|
411 |
+
x, affine=None, affine_mask=None, sequence_id=sequence_id, chain_id=chain_id
|
412 |
+
)
|
413 |
+
|
414 |
+
tensor7_affine, bb_pred = self.affine_output_projection(
|
415 |
+
x, affine=None, affine_mask=torch.zeros_like(attention_mask)
|
416 |
+
)
|
417 |
+
|
418 |
+
pae, ptm = None, None
|
419 |
+
pairwise_logits = self.pairwise_classification_head(x)
|
420 |
+
_, _, pae_logits = [
|
421 |
+
(o if o.numel() > 0 else None)
|
422 |
+
for o in pairwise_logits.split(self.pairwise_bins, dim=-1)
|
423 |
+
]
|
424 |
+
|
425 |
+
special_tokens_mask = structure_tokens >= min(self.special_tokens.values())
|
426 |
+
pae = compute_predicted_aligned_error(
|
427 |
+
pae_logits, # type: ignore
|
428 |
+
aa_mask=~special_tokens_mask,
|
429 |
+
sequence_id=sequence_id,
|
430 |
+
max_bin=self.max_pae_bin,
|
431 |
+
)
|
432 |
+
# This might be broken for chainbreak tokens? We might align to the chainbreak
|
433 |
+
ptm = compute_tm(
|
434 |
+
pae_logits, # type: ignore
|
435 |
+
aa_mask=~special_tokens_mask,
|
436 |
+
max_bin=self.max_pae_bin,
|
437 |
+
)
|
438 |
+
|
439 |
+
plddt_logits = self.plddt_head(x)
|
440 |
+
plddt_value = CategoricalMixture(
|
441 |
+
plddt_logits, bins=plddt_logits.shape[-1]
|
442 |
+
).mean()
|
443 |
+
|
444 |
+
return dict(
|
445 |
+
tensor7_affine=tensor7_affine,
|
446 |
+
bb_pred=bb_pred,
|
447 |
+
plddt=plddt_value,
|
448 |
+
ptm=ptm,
|
449 |
+
predicted_aligned_error=pae,
|
450 |
+
)
|