anonymousforpaper commited on
Commit
224a33f
·
verified ·
1 Parent(s): 3bf32d0

Upload 103 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM continuumio/miniconda3
2
+
3
+ WORKDIR /code
4
+ COPY ./requirements.txt /code/requirements.txt
5
+
6
+ RUN conda create -n m3site python=3.11 dssp -c ostrokach -y
7
+ RUN conda run -n m3site pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ RUN useradd -m -u 1000 user
10
+
11
+ USER user
12
+
13
+ ENV HOME=/home/user \
14
+ PATH=/home/user/.local/bin:$PATH \
15
+ PYTHONPATH=$HOME/app \
16
+ PYTHONUNBUFFERED=1 \
17
+ GRADIO_ALLOW_FLAGGING=never \
18
+ GRADIO_NUM_PORTS=1 \
19
+ GRADIO_SERVER_NAME=0.0.0.0 \
20
+ GRADIO_THEME=huggingface \
21
+ SYSTEM=spaces
22
+
23
+ WORKDIR $HOME/app
24
+
25
+ COPY --chown=user . $HOME/app
26
+
27
+ CMD ["conda", "run", "--no-capture-output", "-n", "m3site", "python", "app.py"]
README.md CHANGED
@@ -1,10 +1,17 @@
1
  ---
2
  title: M3Site
3
- emoji: 🏆
4
- colorFrom: red
5
- colorTo: yellow
6
  sdk: docker
7
- pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
1
  ---
2
  title: M3Site
3
+ emoji: 📉
4
+ colorFrom: indigo
5
+ colorTo: indigo
6
  sdk: docker
7
+ pinned: true
8
+ license: cc-by-4.0
9
+ short_description: 'An interactive demo for M3Site.'
10
  ---
11
 
12
+ # M<sup>3</sup>Site: Leveraging Multi-Class Multi-Modal Learning for Accurate Protein Active Site Identification and Classification
13
+
14
+ This an interactive demo for the inference, and the source code of the paper can be found in [Github](). We provide some example `.pdb` files in `case_study` folder. You can use these files to use this demo. Take `A0A384E143.pdb` as an example, the predicted protein active site results using M<sup>3</sup>Site are shown below:
15
+ ![image1](img/image1.png)
16
+ Besides, you can visualize and analyze the predicted results interactively like below:
17
+ ![image2](img/image2.png)
app.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import gradio as gr
3
+ from gradio_molecule3d import Molecule3D
4
+ import Bio
5
+ import Bio.SeqUtils
6
+
7
+ from utils.util_functions import merge_ranges
8
+ from predict import model_predict
9
+ from constants import *
10
+
11
+
12
+ def update_reps_based_on_radio(*args):
13
+ struct, text = args[0], args[1]
14
+ background, model, active_sites = args[2:4], args[4], args[5:]
15
+
16
+ predicted_sites, confs, sequence = model_predict(model, struct, text)
17
+ merged_sites = merge_ranges(predicted_sites, max_value=len(sequence))
18
+
19
+ confidence_details = []
20
+ new_reps = []
21
+
22
+ # 1. cal summary
23
+ summary_text = []
24
+ for k, v in predicted_sites.items():
25
+ if len(v) > 0:
26
+ summary_text.append(f"{len(v)} {no_cat_dict[k]} site(s)")
27
+ if len(summary_text) == 0:
28
+ summary_text = ["No active sites identified."]
29
+ summary_text = '; '.join(summary_text)
30
+
31
+ # 2. cal dataframe
32
+ detail_predicted_sites = {'b':[], '0':[], '1':[], '2':[], '3':[], '4':[], '5':[]}
33
+ ass = []
34
+ for k, v in predicted_sites.items():
35
+ for vv in v:
36
+ detail_predicted_sites[k].append(
37
+ {'residue_type': sequence[vv-1], 'number': vv, 'confidence': confs[vv-1]}
38
+ )
39
+ ass.append(vv)
40
+ for i in range(len(sequence)):
41
+ if i+1 not in ass:
42
+ detail_predicted_sites['b'].append(
43
+ {'residue_type': sequence[i], 'number': i+1, 'confidence': confs[i]}
44
+ )
45
+ # 2.1 处理背景
46
+ backgrounds = detail_predicted_sites.get('b', [])
47
+ for r in backgrounds:
48
+ confidence_details.append([
49
+ 'Background',
50
+ Bio.SeqUtils.seq3(r['residue_type']).upper(),
51
+ r['number'],
52
+ r.get('confidence', 'N/A')
53
+ ])
54
+ # 2.2 处理活性位点
55
+ for i in range(0, len(active_sites), 2):
56
+ x, y = active_sites[i], active_sites[i+1]
57
+ site_key = str(i//2)
58
+ sites = detail_predicted_sites.get(site_key, [])
59
+ for s in sites:
60
+ confidence_details.append([
61
+ no_cat_dict[site_key],
62
+ Bio.SeqUtils.seq3(s['residue_type']).upper(),
63
+ s['number'],
64
+ s.get('confidence', 'N/A')
65
+ ])
66
+
67
+ # 3. cal reps
68
+ # 3.1 background
69
+ ranges = merged_sites['b']
70
+ for r in ranges:
71
+ old_reps = copy.deepcopy(default_reps)[0]
72
+ old_reps['style'] = background[0][0].lower() + background[0][1:]
73
+ old_reps['color'] = background[1][0].lower() + background[1][1:] + "Carbon"
74
+ old_reps['residue_range'] = r
75
+ new_reps.append(old_reps)
76
+ # 3.2 active sites
77
+ for i in range(0, len(active_sites), 2):
78
+ x, y = active_sites[i], active_sites[i+1]
79
+ ranges = merged_sites[str(i//2)]
80
+ for r in ranges:
81
+ old_reps = copy.deepcopy(default_reps)[0]
82
+ old_reps['style'] = x[0].lower() + x[1:]
83
+ old_reps['color'] = y[0].lower() + y[1:] + "Carbon"
84
+ old_reps['residue_range'] = r
85
+ new_reps.append(old_reps)
86
+
87
+ return summary_text, confidence_details, Molecule3D(label="Identified Functional Sites", reps=new_reps)
88
+
89
+ def disable_fn(*x):
90
+ return [gr.update(interactive=False)] * len(x)
91
+
92
+ def able_tip():
93
+ return gr.update(visible=True)
94
+
95
+ def check_input(input):
96
+ if input is not None:
97
+ return gr.update(interactive=True)
98
+ return gr.update(interactive=False)
99
+
100
+
101
+ with gr.Blocks(title="M3Site-app", theme=gr.themes.Default()) as demo:
102
+ gr.Markdown("# M<sup>3</sup>Site: Leveraging Multi-Class Multi-Modal Learning for Accurate Protein Active Site Identification and Classification")
103
+ gr.Markdown("""
104
+ ## Overview
105
+ **M<sup>3</sup>Site** is an advanced tool designed to accurately identify and classify protein active sites using a multi-modal learning approach. By integrating protein sequences, structural data, and functional annotations, M<sup>3</sup>Site provides comprehensive insights into protein functionality, aiding in drug design, synthetic biology, and understanding protein mechanisms.
106
+ """)
107
+ gr.Markdown("""
108
+ ## How to Use
109
+ 1. **Select the Model**: Choose the pre-trained model for site prediction from the dropdown list.
110
+ 2. **Adjust Visual Settings**: Customize the visual style and color for background and active sites.
111
+ 3. **Upload Protein Structure**: Provide the 3D structure of the protein. You can upload from local or download from PDB Assym. Unit, PDB BioAssembly, AlphaFold DB, or ESMFold DB.
112
+ 4. **Enter Function Prompt**: Optionally provide a text description of the protein's function. If unsure, leave it blank.
113
+ 5. **Click "Predict"**: Hit the 'Predict' button to initiate the prediction. The predicted active sites will be highlighted in the structure visualization.
114
+ 6. **View Results**: The detailed results will be displayed below, including the identified active sites, their types, and confidence scores.
115
+ """)
116
+
117
+ with gr.Accordion("General Settings (Set before prediction)"):
118
+ with gr.Row():
119
+ model_drop = gr.Dropdown(model_list, label="Model Selection", value=model_list[0])
120
+ gr.Markdown("")
121
+ gr.Markdown("")
122
+ with gr.Row():
123
+ with gr.Row():
124
+ style_dropb = gr.Dropdown(style_list, label="Style (Background)", value=style_list[0], min_width=1)
125
+ color_dropb = gr.Dropdown(color_list, label="Color (Background)", value=color_list[0], min_width=1)
126
+ with gr.Row():
127
+ style_drop1 = gr.Dropdown(style_list, label="Style (CRI)", value=style_list[1], min_width=1)
128
+ color_drop1 = gr.Dropdown(color_list, label="Color (CRI)", value=color_list[1], min_width=1)
129
+ with gr.Row():
130
+ style_drop2 = gr.Dropdown(style_list, label="Style (SCI)", value=style_list[1], min_width=1)
131
+ color_drop2 = gr.Dropdown(color_list, label="Color (SCI)", value=color_list[2], min_width=1)
132
+ with gr.Row():
133
+ style_drop3 = gr.Dropdown(style_list, label="Style (PI)", value=style_list[1], min_width=1)
134
+ color_drop3 = gr.Dropdown(color_list, label="Color (PI)", value=color_list[3], min_width=1)
135
+ with gr.Row():
136
+ with gr.Row():
137
+ style_drop4 = gr.Dropdown(style_list, label="Style (PTCR)", value=style_list[1], min_width=1)
138
+ color_drop4 = gr.Dropdown(color_list, label="Color (PTCR)", value=color_list[4], min_width=1)
139
+ with gr.Row():
140
+ style_drop5 = gr.Dropdown(style_list, label="Style (IA)", value=style_list[1], min_width=1)
141
+ color_drop5 = gr.Dropdown(color_list, label="Color (IA)", value=color_list[5], min_width=1)
142
+ with gr.Row():
143
+ style_drop6 = gr.Dropdown(style_list, label="Style (SSA)", value=style_list[1], min_width=1)
144
+ color_drop6 = gr.Dropdown(color_list, label="Color (SSA)", value=color_list[6], min_width=1)
145
+ with gr.Row():
146
+ gr.Markdown("")
147
+
148
+ gr.Markdown('''
149
+ *NOTE:* CRI indicates Covalent Reaction Intermediates, SCI indicates Sulfur-containing Covalent Intermediates, PI indicates Phosphorylated Intermediates,
150
+ PTCR indicates Proton Transfer & Charge Relay Systems, IA indicates Isomerization Activity, SSA indicates Substrate-specific Activities.
151
+ ''')
152
+
153
+ with gr.Row():
154
+ gr.Markdown("<center><font size=5><b>Input Structure</b></font></center>")
155
+ gr.Markdown("<center><font size=5><b>Output Predictions</b></font></center>")
156
+
157
+ with gr.Row(equal_height=True):
158
+ input_struct = Molecule3D(label="Input Protein Structure (Default Style)", reps=reps1)
159
+ output_struct = Molecule3D(label="Output Protein Structure", reps=[])
160
+
161
+ with gr.Row(equal_height=True):
162
+ input_text = gr.Textbox(lines=1, label="Function Prompt", scale=16, min_width=1, placeholder="I don't know the function of this protein.")
163
+ btn = gr.Button("Predict", variant="primary", scale=1, min_width=1, interactive=False)
164
+ summary_output = gr.Label(label="", scale=18, min_width=1, show_label=False, elem_classes="info")
165
+
166
+ gr.Markdown("### Result Details")
167
+ confidence_output = gr.DataFrame(headers=["Active Site Type", "Residue Type", "Residue Number", "Confidence"])
168
+
169
+ option_list = [
170
+ style_dropb, color_dropb, model_drop,
171
+ style_drop1, color_drop1,
172
+ style_drop2, color_drop2,
173
+ style_drop3, color_drop3,
174
+ style_drop4, color_drop4,
175
+ style_drop5, color_drop5,
176
+ style_drop6, color_drop6
177
+ ]
178
+
179
+ tips = gr.Markdown("### *Tips: Please refresh the page to make a new prediction.*", visible=False)
180
+ # gr.Markdown("## Citation")
181
+ # gr.Markdown("If you find this tool helpful, please consider citing the following papers:")
182
+ # with gr.Accordion("Citations", open=False):
183
+ # gr.Markdown('''```
184
+ # @inproceedings{ouyangmmsite,
185
+ # title={MMSite: A Multi-modal Framework for the Identification of Active Sites in Proteins},
186
+ # author={Ouyang, Song and Cai, Huiyu and Luo, Yong and Su, Kehua and Zhang, Lefei and Du, Bo},
187
+ # booktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems}
188
+ # }
189
+ # @article{ouyangm3site,
190
+ # title={M3Site: Leveraging Multi-Class Multi-Modal Learning for Accurate Protein Active Site Iden-tification and Classification},
191
+ # author={Ouyang, Song and Luo, Yong and Su, Kehua and Zhang, Lefei and Du, Bo},
192
+ # journal={xxxx},
193
+ # year={xxxx},
194
+ # }
195
+ # ```''')
196
+
197
+ # 绑定事件
198
+ input_struct.change(check_input, inputs=input_struct, outputs=btn)
199
+ btn.click(
200
+ fn=able_tip,
201
+ inputs=[],
202
+ outputs=tips
203
+ ).then(
204
+ fn=disable_fn,
205
+ inputs=option_list,
206
+ outputs=option_list
207
+ ).then(
208
+ fn=update_reps_based_on_radio,
209
+ inputs=[input_struct, input_text] + option_list,
210
+ outputs=[summary_output, confidence_output, output_struct]
211
+ ).then(
212
+ fn=lambda x: x,
213
+ inputs=[input_struct],
214
+ outputs=[output_struct]
215
+ )
216
+
217
+
218
+ if __name__ == "__main__":
219
+ demo.launch(share=True, debug=True)
case_study/A0A067FT93.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/A0A126P745.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/A0A1H1XG33.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/A0A242M8J4.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/A0A2P2GK84.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/A0A2U7QU15.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/A0A2U7R6V5.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/A0A384E138.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/A0A384E143.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/A0A3Q0KJ78.pdb ADDED
@@ -0,0 +1,1187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ HEADER 01-JUN-22
2
+ TITLE ALPHAFOLD MONOMER V2.0 PREDICTION FOR NUCLEOSIDE DIPHOSPHATE KINASE
3
+ TITLE 2 (A0A3Q0KJ78)
4
+ COMPND MOL_ID: 1;
5
+ COMPND 2 MOLECULE: NUCLEOSIDE DIPHOSPHATE KINASE;
6
+ COMPND 3 CHAIN: A
7
+ SOURCE MOL_ID: 1;
8
+ SOURCE 2 ORGANISM_SCIENTIFIC: SCHISTOSOMA MANSONI;
9
+ SOURCE 3 ORGANISM_TAXID: 6183
10
+ REMARK 1
11
+ REMARK 1 REFERENCE 1
12
+ REMARK 1 AUTH JOHN JUMPER, RICHARD EVANS, ALEXANDER PRITZEL, TIM GREEN,
13
+ REMARK 1 AUTH 2 MICHAEL FIGURNOV, OLAF RONNEBERGER, KATHRYN TUNYASUVUNAKOOL,
14
+ REMARK 1 AUTH 3 RUSS BATES, AUGUSTIN ZIDEK, ANNA POTAPENKO, ALEX BRIDGLAND,
15
+ REMARK 1 AUTH 4 CLEMENS MEYER, SIMON A A KOHL, ANDREW J BALLARD,
16
+ REMARK 1 AUTH 5 ANDREW COWIE, BERNARDINO ROMERA-PAREDES, STANISLAV NIKOLOV,
17
+ REMARK 1 AUTH 6 RISHUB JAIN, JONAS ADLER, TREVOR BACK, STIG PETERSEN,
18
+ REMARK 1 AUTH 7 DAVID REIMAN, ELLEN CLANCY, MICHAL ZIELINSKI,
19
+ REMARK 1 AUTH 8 MARTIN STEINEGGER, MICHALINA PACHOLSKA, TAMAS BERGHAMMER,
20
+ REMARK 1 AUTH 9 DAVID SILVER, ORIOL VINYALS, ANDREW W SENIOR,
21
+ REMARK 1 AUTH10 KORAY KAVUKCUOGLU, PUSHMEET KOHLI, DEMIS HASSABIS
22
+ REMARK 1 TITL HIGHLY ACCURATE PROTEIN STRUCTURE PREDICTION WITH ALPHAFOLD
23
+ REMARK 1 REF NATURE V. 596 583 2021
24
+ REMARK 1 REFN ISSN 0028-0836
25
+ REMARK 1 PMID 34265844
26
+ REMARK 1 DOI 10.1038/s41586-021-03819-2
27
+ REMARK 1
28
+ REMARK 1 DISCLAIMERS
29
+ REMARK 1 ALPHAFOLD DATA, COPYRIGHT (2021) DEEPMIND TECHNOLOGIES LIMITED. THE
30
+ REMARK 1 INFORMATION PROVIDED IS THEORETICAL MODELLING ONLY AND CAUTION SHOULD
31
+ REMARK 1 BE EXERCISED IN ITS USE. IT IS PROVIDED "AS-IS" WITHOUT ANY WARRANTY
32
+ REMARK 1 OF ANY KIND, WHETHER EXPRESSED OR IMPLIED. NO WARRANTY IS GIVEN THAT
33
+ REMARK 1 USE OF THE INFORMATION SHALL NOT INFRINGE THE RIGHTS OF ANY THIRD
34
+ REMARK 1 PARTY. THE INFORMATION IS NOT INTENDED TO BE A SUBSTITUTE FOR
35
+ REMARK 1 PROFESSIONAL MEDICAL ADVICE, DIAGNOSIS, OR TREATMENT, AND DOES NOT
36
+ REMARK 1 CONSTITUTE MEDICAL OR OTHER PROFESSIONAL ADVICE. IT IS AVAILABLE FOR
37
+ REMARK 1 ACADEMIC AND COMMERCIAL PURPOSES, UNDER CC-BY 4.0 LICENCE.
38
+ DBREF XXXX A 1 143 UNP A0A3Q0KJ78 A0A3Q0KJ78_SCHMA 1 143
39
+ SEQRES 1 A 143 MET VAL LYS PRO ASP GLY VAL GLN ARG GLY LEU VAL GLY
40
+ SEQRES 2 A 143 GLU VAL ILE GLN ARG PHE GLU ARG ARG GLY TYR LYS LEU
41
+ SEQRES 3 A 143 VAL ALA ILE LYS MET MET HIS ALA SER GLU GLN LEU LEU
42
+ SEQRES 4 A 143 GLN THR HIS TYR GLU ALA LEU LYS SER LEU SER PHE PHE
43
+ SEQRES 5 A 143 PRO LYS LEU VAL ALA TYR MET SER SER GLY PRO VAL VAL
44
+ SEQRES 6 A 143 PRO MET VAL PHE GLU GLY ARG LYS VAL VAL GLU ASN GLY
45
+ SEQRES 7 A 143 ARG THR MET LEU GLY ALA THR LYS PRO GLU ALA SER CYS
46
+ SEQRES 8 A 143 PRO GLY SER ILE ARG GLY ASP TYR CYS GLN ASP VAL GLY
47
+ SEQRES 9 A 143 ARG ASN VAL VAL HIS GLY SER ASP SER THR GLU SER ALA
48
+ SEQRES 10 A 143 ASN ARG GLU ILE ASN LEU TRP PHE SER PRO GLN GLU LEU
49
+ SEQRES 11 A 143 CYS GLN TYR LYS GLN ALA VAL ASP PRO TRP ILE HIS GLU
50
+ CRYST1 1.000 1.000 1.000 90.00 90.00 90.00 P 1 1
51
+ ORIGX1 1.000000 0.000000 0.000000 0.00000
52
+ ORIGX2 0.000000 1.000000 0.000000 0.00000
53
+ ORIGX3 0.000000 0.000000 1.000000 0.00000
54
+ SCALE1 1.000000 0.000000 0.000000 0.00000
55
+ SCALE2 0.000000 1.000000 0.000000 0.00000
56
+ SCALE3 0.000000 0.000000 1.000000 0.00000
57
+ MODEL 1
58
+ ATOM 1 N MET A 1 0.643 4.346 0.767 1.00 98.62 N
59
+ ATOM 2 CA MET A 1 0.545 5.526 -0.124 1.00 98.62 C
60
+ ATOM 3 C MET A 1 0.325 5.076 -1.559 1.00 98.62 C
61
+ ATOM 4 CB MET A 1 1.804 6.406 -0.018 1.00 98.62 C
62
+ ATOM 5 O MET A 1 1.081 4.230 -2.029 1.00 98.62 O
63
+ ATOM 6 CG MET A 1 1.853 7.539 -1.054 1.00 98.62 C
64
+ ATOM 7 SD MET A 1 3.214 8.711 -0.842 1.00 98.62 S
65
+ ATOM 8 CE MET A 1 4.672 7.639 -0.954 1.00 98.62 C
66
+ ATOM 9 N VAL A 2 -0.665 5.645 -2.249 1.00 98.85 N
67
+ ATOM 10 CA VAL A 2 -0.805 5.525 -3.712 1.00 98.85 C
68
+ ATOM 11 C VAL A 2 0.110 6.556 -4.373 1.00 98.85 C
69
+ ATOM 12 CB VAL A 2 -2.265 5.698 -4.176 1.00 98.85 C
70
+ ATOM 13 O VAL A 2 0.079 7.732 -4.011 1.00 98.85 O
71
+ ATOM 14 CG1 VAL A 2 -2.415 5.503 -5.685 1.00 98.85 C
72
+ ATOM 15 CG2 VAL A 2 -3.176 4.670 -3.500 1.00 98.85 C
73
+ ATOM 16 N LYS A 3 0.963 6.087 -5.282 1.00 98.81 N
74
+ ATOM 17 CA LYS A 3 1.969 6.858 -6.024 1.00 98.81 C
75
+ ATOM 18 C LYS A 3 1.333 7.589 -7.223 1.00 98.81 C
76
+ ATOM 19 CB LYS A 3 3.065 5.882 -6.481 1.00 98.81 C
77
+ ATOM 20 O LYS A 3 0.179 7.296 -7.547 1.00 98.81 O
78
+ ATOM 21 CG LYS A 3 3.831 5.230 -5.325 1.00 98.81 C
79
+ ATOM 22 CD LYS A 3 4.961 4.375 -5.903 1.00 98.81 C
80
+ ATOM 23 CE LYS A 3 5.828 3.810 -4.782 1.00 98.81 C
81
+ ATOM 24 NZ LYS A 3 7.015 3.134 -5.342 1.00 98.81 N
82
+ ATOM 25 N PRO A 4 2.058 8.498 -7.908 1.00 98.85 N
83
+ ATOM 26 CA PRO A 4 1.474 9.295 -8.990 1.00 98.85 C
84
+ ATOM 27 C PRO A 4 0.857 8.466 -10.123 1.00 98.85 C
85
+ ATOM 28 CB PRO A 4 2.606 10.201 -9.486 1.00 98.85 C
86
+ ATOM 29 O PRO A 4 -0.227 8.789 -10.600 1.00 98.85 O
87
+ ATOM 30 CG PRO A 4 3.466 10.378 -8.238 1.00 98.85 C
88
+ ATOM 31 CD PRO A 4 3.405 8.986 -7.618 1.00 98.85 C
89
+ ATOM 32 N ASP A 5 1.486 7.355 -10.512 1.00 98.78 N
90
+ ATOM 33 CA ASP A 5 0.936 6.418 -11.499 1.00 98.78 C
91
+ ATOM 34 C ASP A 5 -0.389 5.788 -11.051 1.00 98.78 C
92
+ ATOM 35 CB ASP A 5 1.967 5.320 -11.812 1.00 98.78 C
93
+ ATOM 36 O ASP A 5 -1.309 5.658 -11.852 1.00 98.78 O
94
+ ATOM 37 CG ASP A 5 2.331 4.418 -10.619 1.00 98.78 C
95
+ ATOM 38 OD1 ASP A 5 2.308 4.888 -9.456 1.00 98.78 O
96
+ ATOM 39 OD2 ASP A 5 2.712 3.253 -10.848 1.00 98.78 O
97
+ ATOM 40 N GLY A 6 -0.514 5.425 -9.774 1.00 98.83 N
98
+ ATOM 41 CA GLY A 6 -1.751 4.890 -9.216 1.00 98.83 C
99
+ ATOM 42 C GLY A 6 -2.896 5.899 -9.231 1.00 98.83 C
100
+ ATOM 43 O GLY A 6 -4.032 5.525 -9.522 1.00 98.83 O
101
+ ATOM 44 N VAL A 7 -2.602 7.179 -8.980 1.00 98.78 N
102
+ ATOM 45 CA VAL A 7 -3.602 8.252 -9.080 1.00 98.78 C
103
+ ATOM 46 C VAL A 7 -4.015 8.471 -10.533 1.00 98.78 C
104
+ ATOM 47 CB VAL A 7 -3.120 9.564 -8.431 1.00 98.78 C
105
+ ATOM 48 O VAL A 7 -5.204 8.417 -10.837 1.00 98.78 O
106
+ ATOM 49 CG1 VAL A 7 -4.220 10.631 -8.515 1.00 98.78 C
107
+ ATOM 50 CG2 VAL A 7 -2.789 9.349 -6.949 1.00 98.78 C
108
+ ATOM 51 N GLN A 8 -3.047 8.629 -11.440 1.00 98.69 N
109
+ ATOM 52 CA GLN A 8 -3.300 8.848 -12.870 1.00 98.69 C
110
+ ATOM 53 C GLN A 8 -4.085 7.702 -13.525 1.00 98.69 C
111
+ ATOM 54 CB GLN A 8 -1.954 9.029 -13.593 1.00 98.69 C
112
+ ATOM 55 O GLN A 8 -4.831 7.914 -14.475 1.00 98.69 O
113
+ ATOM 56 CG GLN A 8 -1.250 10.358 -13.270 1.00 98.69 C
114
+ ATOM 57 CD GLN A 8 -1.926 11.583 -13.880 1.00 98.69 C
115
+ ATOM 58 NE2 GLN A 8 -1.318 12.744 -13.787 1.00 98.69 N
116
+ ATOM 59 OE1 GLN A 8 -3.000 11.536 -14.450 1.00 98.69 O
117
+ ATOM 60 N ARG A 9 -3.939 6.477 -13.009 1.00 98.75 N
118
+ ATOM 61 CA ARG A 9 -4.631 5.282 -13.510 1.00 98.75 C
119
+ ATOM 62 C ARG A 9 -5.968 4.997 -12.823 1.00 98.75 C
120
+ ATOM 63 CB ARG A 9 -3.677 4.088 -13.425 1.00 98.75 C
121
+ ATOM 64 O ARG A 9 -6.558 3.955 -13.084 1.00 98.75 O
122
+ ATOM 65 CG ARG A 9 -2.492 4.241 -14.394 1.00 98.75 C
123
+ ATOM 66 CD ARG A 9 -1.452 3.156 -14.132 1.00 98.75 C
124
+ ATOM 67 NE ARG A 9 -2.032 1.821 -14.347 1.00 98.75 N
125
+ ATOM 68 NH1 ARG A 9 -0.571 0.632 -13.047 1.00 98.75 N
126
+ ATOM 69 NH2 ARG A 9 -2.256 -0.411 -14.057 1.00 98.75 N
127
+ ATOM 70 CZ ARG A 9 -1.618 0.695 -13.816 1.00 98.75 C
128
+ ATOM 71 N GLY A 10 -6.437 5.874 -11.933 1.00 98.56 N
129
+ ATOM 72 CA GLY A 10 -7.723 5.702 -11.250 1.00 98.56 C
130
+ ATOM 73 C GLY A 10 -7.751 4.563 -10.223 1.00 98.56 C
131
+ ATOM 74 O GLY A 10 -8.817 4.045 -9.909 1.00 98.56 O
132
+ ATOM 75 N LEU A 11 -6.598 4.168 -9.672 1.00 98.86 N
133
+ ATOM 76 CA LEU A 11 -6.479 3.019 -8.762 1.00 98.86 C
134
+ ATOM 77 C LEU A 11 -6.620 3.385 -7.275 1.00 98.86 C
135
+ ATOM 78 CB LEU A 11 -5.167 2.263 -9.047 1.00 98.86 C
136
+ ATOM 79 O LEU A 11 -6.448 2.527 -6.414 1.00 98.86 O
137
+ ATOM 80 CG LEU A 11 -5.062 1.676 -10.467 1.00 98.86 C
138
+ ATOM 81 CD1 LEU A 11 -3.680 1.048 -10.647 1.00 98.86 C
139
+ ATOM 82 CD2 LEU A 11 -6.115 0.600 -10.734 1.00 98.86 C
140
+ ATOM 83 N VAL A 12 -6.932 4.643 -6.943 1.00 98.90 N
141
+ ATOM 84 CA VAL A 12 -7.062 5.089 -5.542 1.00 98.90 C
142
+ ATOM 85 C VAL A 12 -8.161 4.313 -4.815 1.00 98.90 C
143
+ ATOM 86 CB VAL A 12 -7.308 6.607 -5.447 1.00 98.90 C
144
+ ATOM 87 O VAL A 12 -7.897 3.723 -3.769 1.00 98.90 O
145
+ ATOM 88 CG1 VAL A 12 -7.497 7.070 -3.997 1.00 98.90 C
146
+ ATOM 89 CG2 VAL A 12 -6.127 7.389 -6.034 1.00 98.90 C
147
+ ATOM 90 N GLY A 13 -9.369 4.271 -5.388 1.00 98.78 N
148
+ ATOM 91 CA GLY A 13 -10.502 3.557 -4.792 1.00 98.78 C
149
+ ATOM 92 C GLY A 13 -10.252 2.053 -4.683 1.00 98.78 C
150
+ ATOM 93 O GLY A 13 -10.553 1.454 -3.658 1.00 98.78 O
151
+ ATOM 94 N GLU A 14 -9.621 1.464 -5.697 1.00 98.73 N
152
+ ATOM 95 CA GLU A 14 -9.267 0.043 -5.720 1.00 98.73 C
153
+ ATOM 96 C GLU A 14 -8.277 -0.328 -4.603 1.00 98.73 C
154
+ ATOM 97 CB GLU A 14 -8.688 -0.251 -7.109 1.00 98.73 C
155
+ ATOM 98 O GLU A 14 -8.463 -1.316 -3.893 1.00 98.73 O
156
+ ATOM 99 CG GLU A 14 -8.243 -1.697 -7.305 1.00 98.73 C
157
+ ATOM 100 CD GLU A 14 -9.348 -2.736 -7.091 1.00 98.73 C
158
+ ATOM 101 OE1 GLU A 14 -8.944 -3.883 -6.756 1.00 98.73 O
159
+ ATOM 102 OE2 GLU A 14 -10.530 -2.406 -7.315 1.00 98.73 O
160
+ ATOM 103 N VAL A 15 -7.241 0.492 -4.393 1.00 98.78 N
161
+ ATOM 104 CA VAL A 15 -6.285 0.282 -3.299 1.00 98.78 C
162
+ ATOM 105 C VAL A 15 -6.981 0.405 -1.945 1.00 98.78 C
163
+ ATOM 106 CB VAL A 15 -5.081 1.236 -3.413 1.00 98.78 C
164
+ ATOM 107 O VAL A 15 -6.806 -0.479 -1.110 1.00 98.78 O
165
+ ATOM 108 CG1 VAL A 15 -4.180 1.195 -2.168 1.00 98.78 C
166
+ ATOM 109 CG2 VAL A 15 -4.211 0.848 -4.619 1.00 98.78 C
167
+ ATOM 110 N ILE A 16 -7.782 1.454 -1.728 1.00 98.81 N
168
+ ATOM 111 CA ILE A 16 -8.519 1.656 -0.468 1.00 98.81 C
169
+ ATOM 112 C ILE A 16 -9.438 0.464 -0.191 1.00 98.81 C
170
+ ATOM 113 CB ILE A 16 -9.305 2.987 -0.506 1.00 98.81 C
171
+ ATOM 114 O ILE A 16 -9.350 -0.140 0.879 1.00 98.81 O
172
+ ATOM 115 CG1 ILE A 16 -8.322 4.174 -0.480 1.00 98.81 C
173
+ ATOM 116 CG2 ILE A 16 -10.277 3.101 0.682 1.00 98.81 C
174
+ ATOM 117 CD1 ILE A 16 -8.964 5.515 -0.846 1.00 98.81 C
175
+ ATOM 118 N GLN A 17 -10.232 0.062 -1.185 1.00 98.73 N
176
+ ATOM 119 CA GLN A 17 -11.197 -1.024 -1.060 1.00 98.73 C
177
+ ATOM 120 C GLN A 17 -10.538 -2.328 -0.601 1.00 98.73 C
178
+ ATOM 121 CB GLN A 17 -11.906 -1.225 -2.410 1.00 98.73 C
179
+ ATOM 122 O GLN A 17 -11.109 -3.048 0.218 1.00 98.73 O
180
+ ATOM 123 CG GLN A 17 -12.945 -2.351 -2.329 1.00 98.73 C
181
+ ATOM 124 CD GLN A 17 -13.650 -2.596 -3.651 1.00 98.73 C
182
+ ATOM 125 NE2 GLN A 17 -14.808 -2.021 -3.875 1.00 98.73 N
183
+ ATOM 126 OE1 GLN A 17 -13.223 -3.377 -4.477 1.00 98.73 O
184
+ ATOM 127 N ARG A 18 -9.336 -2.658 -1.095 1.00 98.71 N
185
+ ATOM 128 CA ARG A 18 -8.632 -3.882 -0.678 1.00 98.71 C
186
+ ATOM 129 C ARG A 18 -8.279 -3.869 0.810 1.00 98.71 C
187
+ ATOM 130 CB ARG A 18 -7.390 -4.108 -1.551 1.00 98.71 C
188
+ ATOM 131 O ARG A 18 -8.406 -4.905 1.459 1.00 98.71 O
189
+ ATOM 132 CG ARG A 18 -7.776 -4.559 -2.968 1.00 98.71 C
190
+ ATOM 133 CD ARG A 18 -6.520 -4.808 -3.803 1.00 98.71 C
191
+ ATOM 134 NE ARG A 18 -6.854 -5.181 -5.187 1.00 98.71 N
192
+ ATOM 135 NH1 ARG A 18 -5.187 -6.748 -5.608 1.00 98.71 N
193
+ ATOM 136 NH2 ARG A 18 -6.615 -6.241 -7.174 1.00 98.71 N
194
+ ATOM 137 CZ ARG A 18 -6.216 -6.038 -5.965 1.00 98.71 C
195
+ ATOM 138 N PHE A 19 -7.870 -2.725 1.360 1.00 98.73 N
196
+ ATOM 139 CA PHE A 19 -7.597 -2.583 2.796 1.00 98.73 C
197
+ ATOM 140 C PHE A 19 -8.880 -2.583 3.634 1.00 98.73 C
198
+ ATOM 141 CB PHE A 19 -6.755 -1.330 3.056 1.00 98.73 C
199
+ ATOM 142 O PHE A 19 -8.931 -3.280 4.648 1.00 98.73 O
200
+ ATOM 143 CG PHE A 19 -5.275 -1.507 2.775 1.00 98.73 C
201
+ ATOM 144 CD1 PHE A 19 -4.400 -1.847 3.823 1.00 98.73 C
202
+ ATOM 145 CD2 PHE A 19 -4.765 -1.338 1.476 1.00 98.73 C
203
+ ATOM 146 CE1 PHE A 19 -3.031 -2.032 3.568 1.00 98.73 C
204
+ ATOM 147 CE2 PHE A 19 -3.396 -1.517 1.216 1.00 98.73 C
205
+ ATOM 148 CZ PHE A 19 -2.531 -1.869 2.267 1.00 98.73 C
206
+ ATOM 149 N GLU A 20 -9.931 -1.892 3.188 1.00 98.51 N
207
+ ATOM 150 CA GLU A 20 -11.235 -1.889 3.866 1.00 98.51 C
208
+ ATOM 151 C GLU A 20 -11.846 -3.295 3.925 1.00 98.51 C
209
+ ATOM 152 CB GLU A 20 -12.210 -0.956 3.140 1.00 98.51 C
210
+ ATOM 153 O GLU A 20 -12.225 -3.764 4.995 1.00 98.51 O
211
+ ATOM 154 CG GLU A 20 -11.842 0.525 3.282 1.00 98.51 C
212
+ ATOM 155 CD GLU A 20 -12.870 1.415 2.571 1.00 98.51 C
213
+ ATOM 156 OE1 GLU A 20 -13.269 2.430 3.181 1.00 98.51 O
214
+ ATOM 157 OE2 GLU A 20 -13.235 1.076 1.419 1.00 98.51 O
215
+ ATOM 158 N ARG A 21 -11.866 -4.020 2.797 1.00 97.86 N
216
+ ATOM 159 CA ARG A 21 -12.381 -5.401 2.724 1.00 97.86 C
217
+ ATOM 160 C ARG A 21 -11.568 -6.390 3.550 1.00 97.86 C
218
+ ATOM 161 CB ARG A 21 -12.422 -5.885 1.271 1.00 97.86 C
219
+ ATOM 162 O ARG A 21 -12.092 -7.431 3.935 1.00 97.86 O
220
+ ATOM 163 CG ARG A 21 -13.579 -5.264 0.485 1.00 97.86 C
221
+ ATOM 164 CD ARG A 21 -13.613 -5.874 -0.917 1.00 97.86 C
222
+ ATOM 165 NE ARG A 21 -14.782 -5.408 -1.681 1.00 97.86 N
223
+ ATOM 166 NH1 ARG A 21 -14.241 -6.417 -3.672 1.00 97.86 N
224
+ ATOM 167 NH2 ARG A 21 -16.091 -5.183 -3.530 1.00 97.86 N
225
+ ATOM 168 CZ ARG A 21 -15.032 -5.673 -2.952 1.00 97.86 C
226
+ ATOM 169 N ARG A 22 -10.295 -6.089 3.818 1.00 97.31 N
227
+ ATOM 170 CA ARG A 22 -9.465 -6.893 4.720 1.00 97.31 C
228
+ ATOM 171 C ARG A 22 -9.830 -6.681 6.193 1.00 97.31 C
229
+ ATOM 172 CB ARG A 22 -7.983 -6.602 4.427 1.00 97.31 C
230
+ ATOM 173 O ARG A 22 -9.452 -7.515 7.011 1.00 97.31 O
231
+ ATOM 174 CG ARG A 22 -6.988 -7.512 5.160 1.00 97.31 C
232
+ ATOM 175 CD ARG A 22 -7.237 -8.994 4.870 1.00 97.31 C
233
+ ATOM 176 NE ARG A 22 -6.149 -9.835 5.399 1.00 97.31 N
234
+ ATOM 177 NH1 ARG A 22 -7.252 -11.840 5.225 1.00 97.31 N
235
+ ATOM 178 NH2 ARG A 22 -5.173 -11.792 6.037 1.00 97.31 N
236
+ ATOM 179 CZ ARG A 22 -6.197 -11.146 5.551 1.00 97.31 C
237
+ ATOM 180 N GLY A 23 -10.548 -5.606 6.517 1.00 98.08 N
238
+ ATOM 181 CA GLY A 23 -10.900 -5.228 7.883 1.00 98.08 C
239
+ ATOM 182 C GLY A 23 -9.899 -4.278 8.540 1.00 98.08 C
240
+ ATOM 183 O GLY A 23 -9.909 -4.145 9.756 1.00 98.08 O
241
+ ATOM 184 N TYR A 24 -9.016 -3.629 7.772 1.00 98.41 N
242
+ ATOM 185 CA TYR A 24 -8.137 -2.603 8.331 1.00 98.41 C
243
+ ATOM 186 C TYR A 24 -8.875 -1.279 8.518 1.00 98.41 C
244
+ ATOM 187 CB TYR A 24 -6.865 -2.442 7.485 1.00 98.41 C
245
+ ATOM 188 O TYR A 24 -9.672 -0.868 7.673 1.00 98.41 O
246
+ ATOM 189 CG TYR A 24 -5.918 -3.621 7.588 1.00 98.41 C
247
+ ATOM 190 CD1 TYR A 24 -5.487 -4.025 8.863 1.00 98.41 C
248
+ ATOM 191 CD2 TYR A 24 -5.480 -4.319 6.443 1.00 98.41 C
249
+ ATOM 192 CE1 TYR A 24 -4.679 -5.158 9.017 1.00 98.41 C
250
+ ATOM 193 CE2 TYR A 24 -4.633 -5.438 6.587 1.00 98.41 C
251
+ ATOM 194 OH TYR A 24 -3.545 -6.997 8.090 1.00 98.41 O
252
+ ATOM 195 CZ TYR A 24 -4.271 -5.875 7.880 1.00 98.41 C
253
+ ATOM 196 N LYS A 25 -8.568 -0.585 9.615 1.00 98.53 N
254
+ ATOM 197 CA LYS A 25 -9.174 0.698 9.969 1.00 98.53 C
255
+ ATOM 198 C LYS A 25 -8.412 1.835 9.293 1.00 98.53 C
256
+ ATOM 199 CB LYS A 25 -9.176 0.810 11.502 1.00 98.53 C
257
+ ATOM 200 O LYS A 25 -7.224 2.012 9.552 1.00 98.53 O
258
+ ATOM 201 CG LYS A 25 -10.043 1.974 11.992 1.00 98.53 C
259
+ ATOM 202 CD LYS A 25 -10.153 2.052 13.520 1.00 98.53 C
260
+ ATOM 203 CE LYS A 25 -8.808 2.430 14.142 1.00 98.53 C
261
+ ATOM 204 NZ LYS A 25 -8.819 2.340 15.615 1.00 98.53 N
262
+ ATOM 205 N LEU A 26 -9.060 2.615 8.430 1.00 98.74 N
263
+ ATOM 206 CA LEU A 26 -8.453 3.817 7.845 1.00 98.74 C
264
+ ATOM 207 C LEU A 26 -8.442 4.933 8.894 1.00 98.74 C
265
+ ATOM 208 CB LEU A 26 -9.241 4.224 6.585 1.00 98.74 C
266
+ ATOM 209 O LEU A 26 -9.513 5.368 9.307 1.00 98.74 O
267
+ ATOM 210 CG LEU A 26 -8.692 5.477 5.873 1.00 98.74 C
268
+ ATOM 211 CD1 LEU A 26 -7.289 5.246 5.306 1.00 98.74 C
269
+ ATOM 212 CD2 LEU A 26 -9.611 5.856 4.713 1.00 98.74 C
270
+ ATOM 213 N VAL A 27 -7.266 5.391 9.328 1.00 98.68 N
271
+ ATOM 214 CA VAL A 27 -7.136 6.418 10.385 1.00 98.68 C
272
+ ATOM 215 C VAL A 27 -6.636 7.765 9.876 1.00 98.68 C
273
+ ATOM 216 CB VAL A 27 -6.300 5.932 11.583 1.00 98.68 C
274
+ ATOM 217 O VAL A 27 -6.801 8.763 10.564 1.00 98.68 O
275
+ ATOM 218 CG1 VAL A 27 -6.940 4.699 12.224 1.00 98.68 C
276
+ ATOM 219 CG2 VAL A 27 -4.851 5.600 11.226 1.00 98.68 C
277
+ ATOM 220 N ALA A 28 -6.071 7.825 8.670 1.00 98.85 N
278
+ ATOM 221 CA ALA A 28 -5.763 9.084 7.998 1.00 98.85 C
279
+ ATOM 222 C ALA A 28 -5.690 8.891 6.484 1.00 98.85 C
280
+ ATOM 223 CB ALA A 28 -4.444 9.647 8.530 1.00 98.85 C
281
+ ATOM 224 O ALA A 28 -5.229 7.853 6.001 1.00 98.85 O
282
+ ATOM 225 N ILE A 29 -6.105 9.909 5.734 1.00 98.83 N
283
+ ATOM 226 CA ILE A 29 -6.009 9.969 4.275 1.00 98.83 C
284
+ ATOM 227 C ILE A 29 -5.952 11.429 3.827 1.00 98.83 C
285
+ ATOM 228 CB ILE A 29 -7.182 9.208 3.609 1.00 98.83 C
286
+ ATOM 229 O ILE A 29 -6.684 12.269 4.341 1.00 98.83 O
287
+ ATOM 230 CG1 ILE A 29 -7.034 9.219 2.072 1.00 98.83 C
288
+ ATOM 231 CG2 ILE A 29 -8.568 9.733 4.033 1.00 98.83 C
289
+ ATOM 232 CD1 ILE A 29 -7.922 8.189 1.364 1.00 98.83 C
290
+ ATOM 233 N LYS A 30 -5.101 11.733 2.845 1.00 98.71 N
291
+ ATOM 234 CA LYS A 30 -5.139 13.004 2.109 1.00 98.71 C
292
+ ATOM 235 C LYS A 30 -4.627 12.813 0.691 1.00 98.71 C
293
+ ATOM 236 CB LYS A 30 -4.370 14.107 2.859 1.00 98.71 C
294
+ ATOM 237 O LYS A 30 -3.844 11.903 0.440 1.00 98.71 O
295
+ ATOM 238 CG LYS A 30 -2.868 13.836 3.051 1.00 98.71 C
296
+ ATOM 239 CD LYS A 30 -2.253 14.940 3.917 1.00 98.71 C
297
+ ATOM 240 CE LYS A 30 -0.853 14.573 4.410 1.00 98.71 C
298
+ ATOM 241 NZ LYS A 30 -0.397 15.535 5.440 1.00 98.71 N
299
+ ATOM 242 N MET A 31 -5.042 13.684 -0.224 1.00 98.75 N
300
+ ATOM 243 CA MET A 31 -4.399 13.836 -1.528 1.00 98.75 C
301
+ ATOM 244 C MET A 31 -3.517 15.080 -1.486 1.00 98.75 C
302
+ ATOM 245 CB MET A 31 -5.447 13.894 -2.647 1.00 98.75 C
303
+ ATOM 246 O MET A 31 -3.956 16.134 -1.034 1.00 98.75 O
304
+ ATOM 247 CG MET A 31 -4.783 14.022 -4.024 1.00 98.75 C
305
+ ATOM 248 SD MET A 31 -5.921 13.905 -5.428 1.00 98.75 S
306
+ ATOM 249 CE MET A 31 -6.123 12.108 -5.546 1.00 98.75 C
307
+ ATOM 250 N MET A 32 -2.272 14.959 -1.935 1.00 98.63 N
308
+ ATOM 251 CA MET A 32 -1.336 16.081 -1.945 1.00 98.63 C
309
+ ATOM 252 C MET A 32 -0.298 15.939 -3.051 1.00 98.63 C
310
+ ATOM 253 CB MET A 32 -0.657 16.222 -0.570 1.00 98.63 C
311
+ ATOM 254 O MET A 32 0.004 14.833 -3.496 1.00 98.63 O
312
+ ATOM 255 CG MET A 32 0.242 15.033 -0.200 1.00 98.63 C
313
+ ATOM 256 SD MET A 32 1.033 15.162 1.426 1.00 98.63 S
314
+ ATOM 257 CE MET A 32 2.119 16.593 1.174 1.00 98.63 C
315
+ ATOM 258 N HIS A 33 0.299 17.059 -3.446 1.00 98.59 N
316
+ ATOM 259 CA HIS A 33 1.522 17.073 -4.241 1.00 98.59 C
317
+ ATOM 260 C HIS A 33 2.714 17.150 -3.280 1.00 98.59 C
318
+ ATOM 261 CB HIS A 33 1.457 18.250 -5.221 1.00 98.59 C
319
+ ATOM 262 O HIS A 33 2.903 18.161 -2.607 1.00 98.59 O
320
+ ATOM 263 CG HIS A 33 2.368 18.101 -6.406 1.00 98.59 C
321
+ ATOM 264 CD2 HIS A 33 3.656 17.653 -6.401 1.00 98.59 C
322
+ ATOM 265 ND1 HIS A 33 1.994 18.398 -7.716 1.00 98.59 N
323
+ ATOM 266 CE1 HIS A 33 3.068 18.141 -8.472 1.00 98.59 C
324
+ ATOM 267 NE2 HIS A 33 4.072 17.691 -7.708 1.00 98.59 N
325
+ ATOM 268 N ALA A 34 3.483 16.067 -3.150 1.00 98.22 N
326
+ ATOM 269 CA ALA A 34 4.604 16.017 -2.210 1.00 98.22 C
327
+ ATOM 270 C ALA A 34 5.795 16.849 -2.713 1.00 98.22 C
328
+ ATOM 271 CB ALA A 34 4.981 14.555 -1.951 1.00 98.22 C
329
+ ATOM 272 O ALA A 34 6.183 16.740 -3.877 1.00 98.22 O
330
+ ATOM 273 N SER A 35 6.393 17.657 -1.832 1.00 98.15 N
331
+ ATOM 274 CA SER A 35 7.605 18.408 -2.167 1.00 98.15 C
332
+ ATOM 275 C SER A 35 8.804 17.472 -2.324 1.00 98.15 C
333
+ ATOM 276 CB SER A 35 7.892 19.502 -1.128 1.00 98.15 C
334
+ ATOM 277 O SER A 35 8.881 16.420 -1.684 1.00 98.15 O
335
+ ATOM 278 OG SER A 35 8.277 18.958 0.122 1.00 98.15 O
336
+ ATOM 279 N GLU A 36 9.782 17.869 -3.139 1.00 97.77 N
337
+ ATOM 280 CA GLU A 36 11.019 17.094 -3.282 1.00 97.77 C
338
+ ATOM 281 C GLU A 36 11.753 16.941 -1.950 1.00 97.77 C
339
+ ATOM 282 CB GLU A 36 11.959 17.736 -4.300 1.00 97.77 C
340
+ ATOM 283 O GLU A 36 12.257 15.860 -1.665 1.00 97.77 O
341
+ ATOM 284 CG GLU A 36 11.370 17.687 -5.712 1.00 97.77 C
342
+ ATOM 285 CD GLU A 36 12.391 18.040 -6.798 1.00 97.77 C
343
+ ATOM 286 OE1 GLU A 36 12.005 17.875 -7.975 1.00 97.77 O
344
+ ATOM 287 OE2 GLU A 36 13.536 18.408 -6.450 1.00 97.77 O
345
+ ATOM 288 N GLN A 37 11.751 17.972 -1.098 1.00 98.11 N
346
+ ATOM 289 CA GLN A 37 12.347 17.905 0.238 1.00 98.11 C
347
+ ATOM 290 C GLN A 37 11.688 16.816 1.098 1.00 98.11 C
348
+ ATOM 291 CB GLN A 37 12.236 19.283 0.904 1.00 98.11 C
349
+ ATOM 292 O GLN A 37 12.388 16.002 1.697 1.00 98.11 O
350
+ ATOM 293 CG GLN A 37 12.994 19.344 2.240 1.00 98.11 C
351
+ ATOM 294 CD GLN A 37 12.865 20.697 2.933 1.00 98.11 C
352
+ ATOM 295 NE2 GLN A 37 13.519 20.887 4.057 1.00 98.11 N
353
+ ATOM 296 OE1 GLN A 37 12.170 21.598 2.496 1.00 98.11 O
354
+ ATOM 297 N LEU A 38 10.350 16.750 1.119 1.00 98.42 N
355
+ ATOM 298 CA LEU A 38 9.618 15.710 1.849 1.00 98.42 C
356
+ ATOM 299 C LEU A 38 9.969 14.310 1.324 1.00 98.42 C
357
+ ATOM 300 CB LEU A 38 8.109 16.000 1.732 1.00 98.42 C
358
+ ATOM 301 O LEU A 38 10.207 13.387 2.102 1.00 98.42 O
359
+ ATOM 302 CG LEU A 38 7.199 14.957 2.408 1.00 98.42 C
360
+ ATOM 303 CD1 LEU A 38 7.459 14.870 3.908 1.00 98.42 C
361
+ ATOM 304 CD2 LEU A 38 5.732 15.331 2.193 1.00 98.42 C
362
+ ATOM 305 N LEU A 39 10.036 14.157 0.000 1.00 98.43 N
363
+ ATOM 306 CA LEU A 39 10.369 12.890 -0.652 1.00 98.43 C
364
+ ATOM 307 C LEU A 39 11.825 12.471 -0.418 1.00 98.43 C
365
+ ATOM 308 CB LEU A 39 10.074 13.018 -2.151 1.00 98.43 C
366
+ ATOM 309 O LEU A 39 12.089 11.282 -0.239 1.00 98.43 O
367
+ ATOM 310 CG LEU A 39 8.572 13.112 -2.468 1.00 98.43 C
368
+ ATOM 311 CD1 LEU A 39 8.408 13.542 -3.920 1.00 98.43 C
369
+ ATOM 312 CD2 LEU A 39 7.863 11.774 -2.237 1.00 98.43 C
370
+ ATOM 313 N GLN A 40 12.756 13.424 -0.380 1.00 98.03 N
371
+ ATOM 314 CA GLN A 40 14.157 13.171 -0.051 1.00 98.03 C
372
+ ATOM 315 C GLN A 40 14.307 12.660 1.383 1.00 98.03 C
373
+ ATOM 316 CB GLN A 40 14.992 14.443 -0.252 1.00 98.03 C
374
+ ATOM 317 O GLN A 40 15.019 11.681 1.587 1.00 98.03 O
375
+ ATOM 318 CG GLN A 40 15.274 14.750 -1.730 1.00 98.03 C
376
+ ATOM 319 CD GLN A 40 15.960 16.100 -1.927 1.00 98.03 C
377
+ ATOM 320 NE2 GLN A 40 16.176 16.521 -3.153 1.00 98.03 N
378
+ ATOM 321 OE1 GLN A 40 16.347 16.789 -0.997 1.00 98.03 O
379
+ ATOM 322 N THR A 41 13.599 13.254 2.349 1.00 98.08 N
380
+ ATOM 323 CA THR A 41 13.566 12.764 3.737 1.00 98.08 C
381
+ ATOM 324 C THR A 41 12.902 11.390 3.831 1.00 98.08 C
382
+ ATOM 325 CB THR A 41 12.818 13.756 4.642 1.00 98.08 C
383
+ ATOM 326 O THR A 41 13.382 10.494 4.524 1.00 98.08 O
384
+ ATOM 327 CG2 THR A 41 12.887 13.376 6.120 1.00 98.08 C
385
+ ATOM 328 OG1 THR A 41 13.393 15.035 4.531 1.00 98.08 O
386
+ ATOM 329 N HIS A 42 11.803 11.171 3.104 1.00 98.16 N
387
+ ATOM 330 CA HIS A 42 11.109 9.885 3.115 1.00 98.16 C
388
+ ATOM 331 C HIS A 42 12.008 8.744 2.611 1.00 98.16 C
389
+ ATOM 332 CB HIS A 42 9.819 10.002 2.298 1.00 98.16 C
390
+ ATOM 333 O HIS A 42 12.106 7.706 3.272 1.00 98.16 O
391
+ ATOM 334 CG HIS A 42 9.057 8.707 2.214 1.00 98.16 C
392
+ ATOM 335 CD2 HIS A 42 8.955 7.913 1.111 1.00 98.16 C
393
+ ATOM 336 ND1 HIS A 42 8.364 8.083 3.224 1.00 98.16 N
394
+ ATOM 337 CE1 HIS A 42 7.836 6.952 2.723 1.00 98.16 C
395
+ ATOM 338 NE2 HIS A 42 8.157 6.811 1.428 1.00 98.16 N
396
+ ATOM 339 N TYR A 43 12.699 8.965 1.488 1.00 96.79 N
397
+ ATOM 340 CA TYR A 43 13.594 8.001 0.842 1.00 96.79 C
398
+ ATOM 341 C TYR A 43 15.070 8.158 1.236 1.00 96.79 C
399
+ ATOM 342 CB TYR A 43 13.390 8.049 -0.684 1.00 96.79 C
400
+ ATOM 343 O TYR A 43 15.939 7.677 0.512 1.00 96.79 O
401
+ ATOM 344 CG TYR A 43 12.044 7.551 -1.158 1.00 96.79 C
402
+ ATOM 345 CD1 TYR A 43 11.654 6.230 -0.869 1.00 96.79 C
403
+ ATOM 346 CD2 TYR A 43 11.185 8.400 -1.880 1.00 96.79 C
404
+ ATOM 347 CE1 TYR A 43 10.392 5.762 -1.272 1.00 96.79 C
405
+ ATOM 348 CE2 TYR A 43 9.926 7.930 -2.301 1.00 96.79 C
406
+ ATOM 349 OH TYR A 43 8.275 6.170 -2.289 1.00 96.79 O
407
+ ATOM 350 CZ TYR A 43 9.524 6.613 -1.988 1.00 96.79 C
408
+ ATOM 351 N GLU A 44 15.382 8.795 2.367 1.00 96.28 N
409
+ ATOM 352 CA GLU A 44 16.760 9.113 2.770 1.00 96.28 C
410
+ ATOM 353 C GLU A 44 17.688 7.886 2.776 1.00 96.28 C
411
+ ATOM 354 CB GLU A 44 16.720 9.777 4.151 1.00 96.28 C
412
+ ATOM 355 O GLU A 44 18.819 7.958 2.295 1.00 96.28 O
413
+ ATOM 356 CG GLU A 44 18.122 10.147 4.651 1.00 96.28 C
414
+ ATOM 357 CD GLU A 44 18.090 10.974 5.940 1.00 96.28 C
415
+ ATOM 358 OE1 GLU A 44 19.077 11.717 6.141 1.00 96.28 O
416
+ ATOM 359 OE2 GLU A 44 17.092 10.868 6.687 1.00 96.28 O
417
+ ATOM 360 N ALA A 45 17.179 6.727 3.206 1.00 94.35 N
418
+ ATOM 361 CA ALA A 45 17.915 5.460 3.198 1.00 94.35 C
419
+ ATOM 362 C ALA A 45 18.390 5.019 1.795 1.00 94.35 C
420
+ ATOM 363 CB ALA A 45 17.008 4.395 3.826 1.00 94.35 C
421
+ ATOM 364 O ALA A 45 19.291 4.193 1.674 1.00 94.35 O
422
+ ATOM 365 N LEU A 46 17.799 5.562 0.727 1.00 94.78 N
423
+ ATOM 366 CA LEU A 46 18.123 5.256 -0.667 1.00 94.78 C
424
+ ATOM 367 C LEU A 46 18.995 6.335 -1.325 1.00 94.78 C
425
+ ATOM 368 CB LEU A 46 16.821 5.025 -1.460 1.00 94.78 C
426
+ ATOM 369 O LEU A 46 19.298 6.213 -2.509 1.00 94.78 O
427
+ ATOM 370 CG LEU A 46 15.841 4.009 -0.846 1.00 94.78 C
428
+ ATOM 371 CD1 LEU A 46 14.628 3.860 -1.766 1.00 94.78 C
429
+ ATOM 372 CD2 LEU A 46 16.449 2.620 -0.642 1.00 94.78 C
430
+ ATOM 373 N LYS A 47 19.407 7.382 -0.596 1.00 95.48 N
431
+ ATOM 374 CA LYS A 47 20.086 8.569 -1.149 1.00 95.48 C
432
+ ATOM 375 C LYS A 47 21.388 8.259 -1.895 1.00 95.48 C
433
+ ATOM 376 CB LYS A 47 20.307 9.563 0.001 1.00 95.48 C
434
+ ATOM 377 O LYS A 47 21.747 8.987 -2.816 1.00 95.48 O
435
+ ATOM 378 CG LYS A 47 20.865 10.921 -0.449 1.00 95.48 C
436
+ ATOM 379 CD LYS A 47 20.980 11.852 0.763 1.00 95.48 C
437
+ ATOM 380 CE LYS A 47 21.497 13.233 0.352 1.00 95.48 C
438
+ ATOM 381 NZ LYS A 47 21.596 14.124 1.536 1.00 95.48 N
439
+ ATOM 382 N SER A 48 22.088 7.188 -1.521 1.00 96.25 N
440
+ ATOM 383 CA SER A 48 23.331 6.747 -2.171 1.00 96.25 C
441
+ ATOM 384 C SER A 48 23.110 5.939 -3.457 1.00 96.25 C
442
+ ATOM 385 CB SER A 48 24.163 5.930 -1.178 1.00 96.25 C
443
+ ATOM 386 O SER A 48 24.069 5.671 -4.180 1.00 96.25 O
444
+ ATOM 387 OG SER A 48 23.413 4.818 -0.725 1.00 96.25 O
445
+ ATOM 388 N LEU A 49 21.872 5.538 -3.765 1.00 96.06 N
446
+ ATOM 389 CA LEU A 49 21.566 4.711 -4.930 1.00 96.06 C
447
+ ATOM 390 C LEU A 49 21.432 5.567 -6.193 1.00 96.06 C
448
+ ATOM 391 CB LEU A 49 20.312 3.855 -4.668 1.00 96.06 C
449
+ ATOM 392 O LEU A 49 20.797 6.620 -6.196 1.00 96.06 O
450
+ ATOM 393 CG LEU A 49 20.417 2.948 -3.425 1.00 96.06 C
451
+ ATOM 394 CD1 LEU A 49 19.131 2.150 -3.238 1.00 96.06 C
452
+ ATOM 395 CD2 LEU A 49 21.563 1.939 -3.537 1.00 96.06 C
453
+ ATOM 396 N SER A 50 21.966 5.073 -7.312 1.00 96.40 N
454
+ ATOM 397 CA SER A 50 22.013 5.803 -8.592 1.00 96.40 C
455
+ ATOM 398 C SER A 50 20.639 6.202 -9.146 1.00 96.40 C
456
+ ATOM 399 CB SER A 50 22.749 4.951 -9.630 1.00 96.40 C
457
+ ATOM 400 O SER A 50 20.527 7.160 -9.910 1.00 96.40 O
458
+ ATOM 401 OG SER A 50 22.095 3.704 -9.805 1.00 96.40 O
459
+ ATOM 402 N PHE A 51 19.574 5.495 -8.759 1.00 94.42 N
460
+ ATOM 403 CA PHE A 51 18.205 5.800 -9.171 1.00 94.42 C
461
+ ATOM 404 C PHE A 51 17.486 6.806 -8.259 1.00 94.42 C
462
+ ATOM 405 CB PHE A 51 17.416 4.494 -9.323 1.00 94.42 C
463
+ ATOM 406 O PHE A 51 16.370 7.206 -8.587 1.00 94.42 O
464
+ ATOM 407 CG PHE A 51 17.294 3.676 -8.053 1.00 94.42 C
465
+ ATOM 408 CD1 PHE A 51 18.103 2.538 -7.867 1.00 94.42 C
466
+ ATOM 409 CD2 PHE A 51 16.363 4.042 -7.061 1.00 94.42 C
467
+ ATOM 410 CE1 PHE A 51 17.957 1.754 -6.711 1.00 94.42 C
468
+ ATOM 411 CE2 PHE A 51 16.229 3.266 -5.898 1.00 94.42 C
469
+ ATOM 412 CZ PHE A 51 17.010 2.111 -5.735 1.00 94.42 C
470
+ ATOM 413 N PHE A 52 18.086 7.235 -7.144 1.00 96.82 N
471
+ ATOM 414 CA PHE A 52 17.443 8.126 -6.175 1.00 96.82 C
472
+ ATOM 415 C PHE A 52 16.937 9.448 -6.780 1.00 96.82 C
473
+ ATOM 416 CB PHE A 52 18.379 8.356 -4.986 1.00 96.82 C
474
+ ATOM 417 O PHE A 52 15.767 9.768 -6.559 1.00 96.82 O
475
+ ATOM 418 CG PHE A 52 17.825 9.302 -3.945 1.00 96.82 C
476
+ ATOM 419 CD1 PHE A 52 18.251 10.644 -3.904 1.00 96.82 C
477
+ ATOM 420 CD2 PHE A 52 16.897 8.833 -3.000 1.00 96.82 C
478
+ ATOM 421 CE1 PHE A 52 17.767 11.508 -2.907 1.00 96.82 C
479
+ ATOM 422 CE2 PHE A 52 16.424 9.699 -2.002 1.00 96.82 C
480
+ ATOM 423 CZ PHE A 52 16.865 11.028 -1.945 1.00 96.82 C
481
+ ATOM 424 N PRO A 53 17.703 10.174 -7.625 1.00 97.37 N
482
+ ATOM 425 CA PRO A 53 17.192 11.393 -8.255 1.00 97.37 C
483
+ ATOM 426 C PRO A 53 15.956 11.129 -9.123 1.00 97.37 C
484
+ ATOM 427 CB PRO A 53 18.354 11.950 -9.087 1.00 97.37 C
485
+ ATOM 428 O PRO A 53 14.989 11.886 -9.088 1.00 97.37 O
486
+ ATOM 429 CG PRO A 53 19.593 11.366 -8.412 1.00 97.37 C
487
+ ATOM 430 CD PRO A 53 19.111 9.992 -7.962 1.00 97.37 C
488
+ ATOM 431 N LYS A 54 15.946 10.006 -9.856 1.00 97.30 N
489
+ ATOM 432 CA LYS A 54 14.801 9.597 -10.682 1.00 97.30 C
490
+ ATOM 433 C LYS A 54 13.595 9.206 -9.830 1.00 97.30 C
491
+ ATOM 434 CB LYS A 54 15.181 8.444 -11.623 1.00 97.30 C
492
+ ATOM 435 O LYS A 54 12.471 9.469 -10.240 1.00 97.30 O
493
+ ATOM 436 CG LYS A 54 16.218 8.849 -12.681 1.00 97.30 C
494
+ ATOM 437 CD LYS A 54 16.484 7.686 -13.648 1.00 97.30 C
495
+ ATOM 438 CE LYS A 54 17.513 8.104 -14.706 1.00 97.30 C
496
+ ATOM 439 NZ LYS A 54 17.801 7.009 -15.668 1.00 97.30 N
497
+ ATOM 440 N LEU A 55 13.812 8.591 -8.665 1.00 97.30 N
498
+ ATOM 441 CA LEU A 55 12.749 8.249 -7.718 1.00 97.30 C
499
+ ATOM 442 C LEU A 55 12.082 9.508 -7.156 1.00 97.30 C
500
+ ATOM 443 CB LEU A 55 13.324 7.362 -6.598 1.00 97.30 C
501
+ ATOM 444 O LEU A 55 10.855 9.581 -7.160 1.00 97.30 O
502
+ ATOM 445 CG LEU A 55 12.335 7.044 -5.459 1.00 97.30 C
503
+ ATOM 446 CD1 LEU A 55 11.116 6.257 -5.949 1.00 97.30 C
504
+ ATOM 447 CD2 LEU A 55 13.051 6.214 -4.396 1.00 97.30 C
505
+ ATOM 448 N VAL A 56 12.874 10.490 -6.713 1.00 98.14 N
506
+ ATOM 449 CA VAL A 56 12.349 11.763 -6.196 1.00 98.14 C
507
+ ATOM 450 C VAL A 56 11.582 12.500 -7.292 1.00 98.14 C
508
+ ATOM 451 CB VAL A 56 13.463 12.637 -5.587 1.00 98.14 C
509
+ ATOM 452 O VAL A 56 10.415 12.804 -7.078 1.00 98.14 O
510
+ ATOM 453 CG1 VAL A 56 12.940 14.012 -5.150 1.00 98.14 C
511
+ ATOM 454 CG2 VAL A 56 14.064 11.956 -4.349 1.00 98.14 C
512
+ ATOM 455 N ALA A 57 12.175 12.671 -8.480 1.00 98.01 N
513
+ ATOM 456 CA ALA A 57 11.528 13.335 -9.616 1.00 98.01 C
514
+ ATOM 457 C ALA A 57 10.234 12.631 -10.062 1.00 98.01 C
515
+ ATOM 458 CB ALA A 57 12.535 13.390 -10.771 1.00 98.01 C
516
+ ATOM 459 O ALA A 57 9.245 13.266 -10.414 1.00 98.01 O
517
+ ATOM 460 N TYR A 58 10.216 11.296 -10.040 1.00 98.32 N
518
+ ATOM 461 CA TYR A 58 9.013 10.526 -10.328 1.00 98.32 C
519
+ ATOM 462 C TYR A 58 7.919 10.777 -9.286 1.00 98.32 C
520
+ ATOM 463 CB TYR A 58 9.369 9.038 -10.387 1.00 98.32 C
521
+ ATOM 464 O TYR A 58 6.773 11.041 -9.640 1.00 98.32 O
522
+ ATOM 465 CG TYR A 58 8.156 8.143 -10.327 1.00 98.32 C
523
+ ATOM 466 CD1 TYR A 58 7.957 7.298 -9.216 1.00 98.32 C
524
+ ATOM 467 CD2 TYR A 58 7.183 8.241 -11.336 1.00 98.32 C
525
+ ATOM 468 CE1 TYR A 58 6.784 6.529 -9.125 1.00 98.32 C
526
+ ATOM 469 CE2 TYR A 58 6.017 7.472 -11.250 1.00 98.32 C
527
+ ATOM 470 OH TYR A 58 4.693 5.898 -10.139 1.00 98.32 O
528
+ ATOM 471 CZ TYR A 58 5.825 6.619 -10.152 1.00 98.32 C
529
+ ATOM 472 N MET A 59 8.261 10.705 -8.003 1.00 98.50 N
530
+ ATOM 473 CA MET A 59 7.293 10.863 -6.921 1.00 98.50 C
531
+ ATOM 474 C MET A 59 6.785 12.304 -6.789 1.00 98.50 C
532
+ ATOM 475 CB MET A 59 7.924 10.361 -5.622 1.00 98.50 C
533
+ ATOM 476 O MET A 59 5.641 12.489 -6.384 1.00 98.50 O
534
+ ATOM 477 CG MET A 59 8.056 8.831 -5.610 1.00 98.50 C
535
+ ATOM 478 SD MET A 59 6.512 7.871 -5.554 1.00 98.50 S
536
+ ATOM 479 CE MET A 59 5.785 8.512 -4.021 1.00 98.50 C
537
+ ATOM 480 N SER A 60 7.589 13.302 -7.172 1.00 98.09 N
538
+ ATOM 481 CA SER A 60 7.193 14.714 -7.243 1.00 98.09 C
539
+ ATOM 482 C SER A 60 6.512 15.089 -8.564 1.00 98.09 C
540
+ ATOM 483 CB SER A 60 8.396 15.638 -6.995 1.00 98.09 C
541
+ ATOM 484 O SER A 60 6.116 16.238 -8.729 1.00 98.09 O
542
+ ATOM 485 OG SER A 60 9.364 15.504 -8.018 1.00 98.09 O
543
+ ATOM 486 N SER A 61 6.335 14.159 -9.511 1.00 98.35 N
544
+ ATOM 487 CA SER A 61 5.731 14.467 -10.821 1.00 98.35 C
545
+ ATOM 488 C SER A 61 4.209 14.650 -10.795 1.00 98.35 C
546
+ ATOM 489 CB SER A 61 6.106 13.413 -11.867 1.00 98.35 C
547
+ ATOM 490 O SER A 61 3.630 15.110 -11.777 1.00 98.35 O
548
+ ATOM 491 OG SER A 61 5.484 12.163 -11.617 1.00 98.35 O
549
+ ATOM 492 N GLY A 62 3.539 14.297 -9.697 1.00 98.47 N
550
+ ATOM 493 CA GLY A 62 2.088 14.388 -9.599 1.00 98.47 C
551
+ ATOM 494 C GLY A 62 1.555 14.125 -8.191 1.00 98.47 C
552
+ ATOM 495 O GLY A 62 2.334 13.942 -7.252 1.00 98.47 O
553
+ ATOM 496 N PRO A 63 0.221 14.105 -8.032 1.00 98.80 N
554
+ ATOM 497 CA PRO A 63 -0.407 13.896 -6.739 1.00 98.80 C
555
+ ATOM 498 C PRO A 63 -0.174 12.476 -6.220 1.00 98.80 C
556
+ ATOM 499 CB PRO A 63 -1.894 14.192 -6.947 1.00 98.80 C
557
+ ATOM 500 O PRO A 63 -0.158 11.501 -6.970 1.00 98.80 O
558
+ ATOM 501 CG PRO A 63 -2.105 13.840 -8.419 1.00 98.80 C
559
+ ATOM 502 CD PRO A 63 -0.791 14.266 -9.069 1.00 98.80 C
560
+ ATOM 503 N VAL A 64 -0.066 12.366 -4.902 1.00 98.88 N
561
+ ATOM 504 CA VAL A 64 -0.026 11.110 -4.155 1.00 98.88 C
562
+ ATOM 505 C VAL A 64 -1.173 11.062 -3.155 1.00 98.88 C
563
+ ATOM 506 CB VAL A 64 1.329 10.907 -3.451 1.00 98.88 C
564
+ ATOM 507 O VAL A 64 -1.696 12.098 -2.738 1.00 98.88 O
565
+ ATOM 508 CG1 VAL A 64 2.441 10.691 -4.479 1.00 98.88 C
566
+ ATOM 509 CG2 VAL A 64 1.738 12.067 -2.534 1.00 98.88 C
567
+ ATOM 510 N VAL A 65 -1.542 9.849 -2.742 1.00 98.92 N
568
+ ATOM 511 CA VAL A 65 -2.523 9.620 -1.671 1.00 98.92 C
569
+ ATOM 512 C VAL A 65 -1.841 8.869 -0.525 1.00 98.92 C
570
+ ATOM 513 CB VAL A 65 -3.807 8.935 -2.180 1.00 98.92 C
571
+ ATOM 514 O VAL A 65 -1.855 7.628 -0.492 1.00 98.92 O
572
+ ATOM 515 CG1 VAL A 65 -4.889 8.893 -1.098 1.00 98.92 C
573
+ ATOM 516 CG2 VAL A 65 -4.396 9.677 -3.387 1.00 98.92 C
574
+ ATOM 517 N PRO A 66 -1.135 9.571 0.381 1.00 98.83 N
575
+ ATOM 518 CA PRO A 66 -0.705 8.991 1.643 1.00 98.83 C
576
+ ATOM 519 C PRO A 66 -1.928 8.611 2.485 1.00 98.83 C
577
+ ATOM 520 CB PRO A 66 0.186 10.044 2.307 1.00 98.83 C
578
+ ATOM 521 O PRO A 66 -2.954 9.290 2.486 1.00 98.83 O
579
+ ATOM 522 CG PRO A 66 -0.382 11.351 1.763 1.00 98.83 C
580
+ ATOM 523 CD PRO A 66 -0.742 10.974 0.331 1.00 98.83 C
581
+ ATOM 524 N MET A 67 -1.808 7.487 3.185 1.00 98.88 N
582
+ ATOM 525 CA MET A 67 -2.851 6.911 4.032 1.00 98.88 C
583
+ ATOM 526 C MET A 67 -2.195 6.206 5.210 1.00 98.88 C
584
+ ATOM 527 CB MET A 67 -3.686 5.876 3.265 1.00 98.88 C
585
+ ATOM 528 O MET A 67 -1.088 5.679 5.057 1.00 98.88 O
586
+ ATOM 529 CG MET A 67 -4.579 6.503 2.200 1.00 98.88 C
587
+ ATOM 530 SD MET A 67 -5.417 5.303 1.143 1.00 98.88 S
588
+ ATOM 531 CE MET A 67 -4.063 4.791 0.070 1.00 98.88 C
589
+ ATOM 532 N VAL A 68 -2.907 6.140 6.329 1.00 98.88 N
590
+ ATOM 533 CA VAL A 68 -2.532 5.350 7.503 1.00 98.88 C
591
+ ATOM 534 C VAL A 68 -3.652 4.356 7.787 1.00 98.88 C
592
+ ATOM 535 CB VAL A 68 -2.235 6.245 8.719 1.00 98.88 C
593
+ ATOM 536 O VAL A 68 -4.807 4.749 7.951 1.00 98.88 O
594
+ ATOM 537 CG1 VAL A 68 -1.684 5.415 9.885 1.00 98.88 C
595
+ ATOM 538 CG2 VAL A 68 -1.198 7.323 8.382 1.00 98.88 C
596
+ ATOM 539 N PHE A 69 -3.300 3.072 7.827 1.00 98.81 N
597
+ ATOM 540 CA PHE A 69 -4.197 1.982 8.201 1.00 98.81 C
598
+ ATOM 541 C PHE A 69 -3.748 1.377 9.530 1.00 98.81 C
599
+ ATOM 542 CB PHE A 69 -4.250 0.912 7.101 1.00 98.81 C
600
+ ATOM 543 O PHE A 69 -2.555 1.165 9.746 1.00 98.81 O
601
+ ATOM 544 CG PHE A 69 -5.010 1.325 5.857 1.00 98.81 C
602
+ ATOM 545 CD1 PHE A 69 -6.415 1.236 5.829 1.00 98.81 C
603
+ ATOM 546 CD2 PHE A 69 -4.317 1.788 4.722 1.00 98.81 C
604
+ ATOM 547 CE1 PHE A 69 -7.122 1.605 4.672 1.00 98.81 C
605
+ ATOM 548 CE2 PHE A 69 -5.025 2.158 3.564 1.00 98.81 C
606
+ ATOM 549 CZ PHE A 69 -6.428 2.065 3.539 1.00 98.81 C
607
+ ATOM 550 N GLU A 70 -4.706 1.067 10.394 1.00 98.56 N
608
+ ATOM 551 CA GLU A 70 -4.506 0.444 11.699 1.00 98.56 C
609
+ ATOM 552 C GLU A 70 -5.092 -0.978 11.702 1.00 98.56 C
610
+ ATOM 553 CB GLU A 70 -5.126 1.347 12.778 1.00 98.56 C
611
+ ATOM 554 O GLU A 70 -6.113 -1.254 11.068 1.00 98.56 O
612
+ ATOM 555 CG GLU A 70 -4.853 0.896 14.222 1.00 98.56 C
613
+ ATOM 556 CD GLU A 70 -5.738 1.662 15.215 1.00 98.56 C
614
+ ATOM 557 OE1 GLU A 70 -6.487 1.014 15.976 1.00 98.56 O
615
+ ATOM 558 OE2 GLU A 70 -5.805 2.906 15.153 1.00 98.56 O
616
+ ATOM 559 N GLY A 71 -4.431 -1.893 12.413 1.00 97.58 N
617
+ ATOM 560 CA GLY A 71 -4.939 -3.235 12.695 1.00 97.58 C
618
+ ATOM 561 C GLY A 71 -3.834 -4.248 12.999 1.00 97.58 C
619
+ ATOM 562 O GLY A 71 -2.636 -3.974 12.861 1.00 97.58 O
620
+ ATOM 563 N ARG A 72 -4.233 -5.460 13.403 1.00 96.75 N
621
+ ATOM 564 CA ARG A 72 -3.297 -6.541 13.759 1.00 96.75 C
622
+ ATOM 565 C ARG A 72 -2.372 -6.869 12.581 1.00 96.75 C
623
+ ATOM 566 CB ARG A 72 -4.080 -7.772 14.252 1.00 96.75 C
624
+ ATOM 567 O ARG A 72 -2.832 -7.195 11.484 1.00 96.75 O
625
+ ATOM 568 CG ARG A 72 -3.176 -8.881 14.825 1.00 96.75 C
626
+ ATOM 569 CD ARG A 72 -4.024 -10.042 15.372 1.00 96.75 C
627
+ ATOM 570 NE ARG A 72 -3.206 -11.121 15.971 1.00 96.75 N
628
+ ATOM 571 NH1 ARG A 72 -4.932 -12.602 16.416 1.00 96.75 N
629
+ ATOM 572 NH2 ARG A 72 -2.860 -13.136 16.998 1.00 96.75 N
630
+ ATOM 573 CZ ARG A 72 -3.669 -12.273 16.450 1.00 96.75 C
631
+ ATOM 574 N LYS A 73 -1.056 -6.799 12.828 1.00 97.45 N
632
+ ATOM 575 CA LYS A 73 0.005 -7.067 11.838 1.00 97.45 C
633
+ ATOM 576 C LYS A 73 -0.148 -6.273 10.526 1.00 97.45 C
634
+ ATOM 577 CB LYS A 73 0.118 -8.582 11.575 1.00 97.45 C
635
+ ATOM 578 O LYS A 73 0.269 -6.749 9.474 1.00 97.45 O
636
+ ATOM 579 CG LYS A 73 0.577 -9.417 12.777 1.00 97.45 C
637
+ ATOM 580 CD LYS A 73 0.674 -10.886 12.335 1.00 97.45 C
638
+ ATOM 581 CE LYS A 73 1.676 -11.683 13.176 1.00 97.45 C
639
+ ATOM 582 NZ LYS A 73 2.590 -12.436 12.280 1.00 97.45 N
640
+ ATOM 583 N VAL A 74 -0.742 -5.075 10.560 1.00 98.54 N
641
+ ATOM 584 CA VAL A 74 -1.056 -4.299 9.343 1.00 98.54 C
642
+ ATOM 585 C VAL A 74 0.172 -3.983 8.482 1.00 98.54 C
643
+ ATOM 586 CB VAL A 74 -1.846 -3.024 9.698 1.00 98.54 C
644
+ ATOM 587 O VAL A 74 0.063 -3.975 7.261 1.00 98.54 O
645
+ ATOM 588 CG1 VAL A 74 -1.028 -2.022 10.524 1.00 98.54 C
646
+ ATOM 589 CG2 VAL A 74 -2.391 -2.320 8.451 1.00 98.54 C
647
+ ATOM 590 N VAL A 75 1.355 -3.804 9.080 1.00 98.61 N
648
+ ATOM 591 CA VAL A 75 2.601 -3.582 8.325 1.00 98.61 C
649
+ ATOM 592 C VAL A 75 2.995 -4.830 7.525 1.00 98.61 C
650
+ ATOM 593 CB VAL A 75 3.744 -3.131 9.253 1.00 98.61 C
651
+ ATOM 594 O VAL A 75 3.136 -4.760 6.306 1.00 98.61 O
652
+ ATOM 595 CG1 VAL A 75 5.029 -2.893 8.459 1.00 98.61 C
653
+ ATOM 596 CG2 VAL A 75 3.393 -1.831 9.990 1.00 98.61 C
654
+ ATOM 597 N GLU A 76 3.111 -5.980 8.197 1.00 98.31 N
655
+ ATOM 598 CA GLU A 76 3.481 -7.269 7.588 1.00 98.31 C
656
+ ATOM 599 C GLU A 76 2.452 -7.696 6.528 1.00 98.31 C
657
+ ATOM 600 CB GLU A 76 3.598 -8.323 8.712 1.00 98.31 C
658
+ ATOM 601 O GLU A 76 2.778 -7.913 5.362 1.00 98.31 O
659
+ ATOM 602 CG GLU A 76 4.072 -9.709 8.239 1.00 98.31 C
660
+ ATOM 603 CD GLU A 76 4.005 -10.798 9.336 1.00 98.31 C
661
+ ATOM 604 OE1 GLU A 76 4.273 -11.974 9.020 1.00 98.31 O
662
+ ATOM 605 OE2 GLU A 76 3.617 -10.521 10.503 1.00 98.31 O
663
+ ATOM 606 N ASN A 77 1.176 -7.741 6.905 1.00 98.23 N
664
+ ATOM 607 CA ASN A 77 0.106 -8.178 6.013 1.00 98.23 C
665
+ ATOM 608 C ASN A 77 -0.172 -7.166 4.897 1.00 98.23 C
666
+ ATOM 609 CB ASN A 77 -1.150 -8.392 6.852 1.00 98.23 C
667
+ ATOM 610 O ASN A 77 -0.473 -7.566 3.774 1.00 98.23 O
668
+ ATOM 611 CG ASN A 77 -1.119 -9.628 7.726 1.00 98.23 C
669
+ ATOM 612 ND2 ASN A 77 -2.050 -9.737 8.642 1.00 98.23 N
670
+ ATOM 613 OD1 ASN A 77 -0.321 -10.532 7.579 1.00 98.23 O
671
+ ATOM 614 N GLY A 78 -0.038 -5.867 5.173 1.00 98.37 N
672
+ ATOM 615 CA GLY A 78 -0.117 -4.826 4.156 1.00 98.37 C
673
+ ATOM 616 C GLY A 78 0.964 -5.032 3.100 1.00 98.37 C
674
+ ATOM 617 O GLY A 78 0.663 -5.071 1.910 1.00 98.37 O
675
+ ATOM 618 N ARG A 79 2.214 -5.277 3.513 1.00 98.35 N
676
+ ATOM 619 CA ARG A 79 3.321 -5.614 2.604 1.00 98.35 C
677
+ ATOM 620 C ARG A 79 3.009 -6.823 1.721 1.00 98.35 C
678
+ ATOM 621 CB ARG A 79 4.601 -5.827 3.436 1.00 98.35 C
679
+ ATOM 622 O ARG A 79 3.305 -6.757 0.525 1.00 98.35 O
680
+ ATOM 623 CG ARG A 79 5.267 -4.488 3.811 1.00 98.35 C
681
+ ATOM 624 CD ARG A 79 6.356 -4.033 2.826 1.00 98.35 C
682
+ ATOM 625 NE ARG A 79 6.055 -4.430 1.436 1.00 98.35 N
683
+ ATOM 626 NH1 ARG A 79 6.102 -2.374 0.422 1.00 98.35 N
684
+ ATOM 627 NH2 ARG A 79 5.437 -4.203 -0.709 1.00 98.35 N
685
+ ATOM 628 CZ ARG A 79 5.881 -3.660 0.390 1.00 98.35 C
686
+ ATOM 629 N THR A 80 2.368 -7.855 2.270 1.00 98.32 N
687
+ ATOM 630 CA THR A 80 1.891 -9.018 1.504 1.00 98.32 C
688
+ ATOM 631 C THR A 80 0.787 -8.645 0.509 1.00 98.32 C
689
+ ATOM 632 CB THR A 80 1.444 -10.147 2.447 1.00 98.32 C
690
+ ATOM 633 O THR A 80 0.877 -9.018 -0.658 1.00 98.32 O
691
+ ATOM 634 CG2 THR A 80 0.922 -11.379 1.710 1.00 98.32 C
692
+ ATOM 635 OG1 THR A 80 2.561 -10.575 3.183 1.00 98.32 O
693
+ ATOM 636 N MET A 81 -0.210 -7.845 0.906 1.00 98.59 N
694
+ ATOM 637 CA MET A 81 -1.279 -7.374 0.005 1.00 98.59 C
695
+ ATOM 638 C MET A 81 -0.752 -6.514 -1.148 1.00 98.59 C
696
+ ATOM 639 CB MET A 81 -2.297 -6.540 0.790 1.00 98.59 C
697
+ ATOM 640 O MET A 81 -1.277 -6.559 -2.261 1.00 98.59 O
698
+ ATOM 641 CG MET A 81 -3.141 -7.380 1.747 1.00 98.59 C
699
+ ATOM 642 SD MET A 81 -4.180 -6.388 2.846 1.00 98.59 S
700
+ ATOM 643 CE MET A 81 -5.247 -5.558 1.641 1.00 98.59 C
701
+ ATOM 644 N LEU A 82 0.294 -5.725 -0.897 1.00 98.63 N
702
+ ATOM 645 CA LEU A 82 0.945 -4.914 -1.921 1.00 98.63 C
703
+ ATOM 646 C LEU A 82 1.639 -5.777 -2.985 1.00 98.63 C
704
+ ATOM 647 CB LEU A 82 1.958 -3.962 -1.257 1.00 98.63 C
705
+ ATOM 648 O LEU A 82 1.680 -5.376 -4.147 1.00 98.63 O
706
+ ATOM 649 CG LEU A 82 1.376 -2.603 -0.840 1.00 98.63 C
707
+ ATOM 650 CD1 LEU A 82 0.385 -2.612 0.311 1.00 98.63 C
708
+ ATOM 651 CD2 LEU A 82 2.513 -1.661 -0.438 1.00 98.63 C
709
+ ATOM 652 N GLY A 83 2.194 -6.927 -2.596 1.00 98.45 N
710
+ ATOM 653 CA GLY A 83 3.070 -7.744 -3.435 1.00 98.45 C
711
+ ATOM 654 C GLY A 83 4.537 -7.299 -3.386 1.00 98.45 C
712
+ ATOM 655 O GLY A 83 4.900 -6.304 -2.739 1.00 98.45 O
713
+ ATOM 656 N ALA A 84 5.401 -8.046 -4.075 1.00 97.59 N
714
+ ATOM 657 CA ALA A 84 6.837 -7.781 -4.154 1.00 97.59 C
715
+ ATOM 658 C ALA A 84 7.137 -6.394 -4.754 1.00 97.59 C
716
+ ATOM 659 CB ALA A 84 7.499 -8.902 -4.965 1.00 97.59 C
717
+ ATOM 660 O ALA A 84 6.314 -5.795 -5.441 1.00 97.59 O
718
+ ATOM 661 N THR A 85 8.313 -5.818 -4.480 1.00 94.66 N
719
+ ATOM 662 CA THR A 85 8.682 -4.493 -5.031 1.00 94.66 C
720
+ ATOM 663 C THR A 85 8.776 -4.501 -6.553 1.00 94.66 C
721
+ ATOM 664 CB THR A 85 9.962 -3.960 -4.381 1.00 94.66 C
722
+ ATOM 665 O THR A 85 8.341 -3.534 -7.184 1.00 94.66 O
723
+ ATOM 666 CG2 THR A 85 10.404 -2.602 -4.926 1.00 94.66 C
724
+ ATOM 667 OG1 THR A 85 9.680 -3.775 -3.013 1.00 94.66 O
725
+ ATOM 668 N LYS A 86 9.291 -5.596 -7.120 1.00 95.47 N
726
+ ATOM 669 CA LYS A 86 9.256 -5.879 -8.553 1.00 95.47 C
727
+ ATOM 670 C LYS A 86 7.914 -6.537 -8.907 1.00 95.47 C
728
+ ATOM 671 CB LYS A 86 10.413 -6.803 -8.940 1.00 95.47 C
729
+ ATOM 672 O LYS A 86 7.621 -7.596 -8.344 1.00 95.47 O
730
+ ATOM 673 CG LYS A 86 11.791 -6.153 -8.748 1.00 95.47 C
731
+ ATOM 674 CD LYS A 86 12.847 -7.130 -9.264 1.00 95.47 C
732
+ ATOM 675 CE LYS A 86 14.257 -6.545 -9.284 1.00 95.47 C
733
+ ATOM 676 NZ LYS A 86 15.132 -7.444 -10.075 1.00 95.47 N
734
+ ATOM 677 N PRO A 87 7.105 -5.943 -9.798 1.00 94.78 N
735
+ ATOM 678 CA PRO A 87 5.823 -6.521 -10.191 1.00 94.78 C
736
+ ATOM 679 C PRO A 87 5.938 -7.935 -10.763 1.00 94.78 C
737
+ ATOM 680 CB PRO A 87 5.234 -5.554 -11.212 1.00 94.78 C
738
+ ATOM 681 O PRO A 87 5.119 -8.775 -10.419 1.00 94.78 O
739
+ ATOM 682 CG PRO A 87 5.836 -4.214 -10.798 1.00 94.78 C
740
+ ATOM 683 CD PRO A 87 7.245 -4.613 -10.375 1.00 94.78 C
741
+ ATOM 684 N GLU A 88 7.007 -8.240 -11.507 1.00 95.24 N
742
+ ATOM 685 CA GLU A 88 7.259 -9.577 -12.072 1.00 95.24 C
743
+ ATOM 686 C GLU A 88 7.347 -10.707 -11.025 1.00 95.24 C
744
+ ATOM 687 CB GLU A 88 8.549 -9.517 -12.917 1.00 95.24 C
745
+ ATOM 688 O GLU A 88 7.113 -11.868 -11.341 1.00 95.24 O
746
+ ATOM 689 CG GLU A 88 9.842 -9.335 -12.085 1.00 95.24 C
747
+ ATOM 690 CD GLU A 88 10.767 -8.202 -12.552 1.00 95.24 C
748
+ ATOM 691 OE1 GLU A 88 12.009 -8.365 -12.438 1.00 95.24 O
749
+ ATOM 692 OE2 GLU A 88 10.241 -7.096 -12.801 1.00 95.24 O
750
+ ATOM 693 N ALA A 89 7.672 -10.370 -9.773 1.00 97.30 N
751
+ ATOM 694 CA ALA A 89 7.756 -11.302 -8.650 1.00 97.30 C
752
+ ATOM 695 C ALA A 89 6.526 -11.223 -7.726 1.00 97.30 C
753
+ ATOM 696 CB ALA A 89 9.069 -11.028 -7.907 1.00 97.30 C
754
+ ATOM 697 O ALA A 89 6.519 -11.808 -6.643 1.00 97.30 O
755
+ ATOM 698 N SER A 90 5.508 -10.444 -8.099 1.00 97.65 N
756
+ ATOM 699 CA SER A 90 4.282 -10.288 -7.321 1.00 97.65 C
757
+ ATOM 700 C SER A 90 3.260 -11.344 -7.727 1.00 97.65 C
758
+ ATOM 701 CB SER A 90 3.705 -8.885 -7.500 1.00 97.65 C
759
+ ATOM 702 O SER A 90 2.945 -11.492 -8.902 1.00 97.65 O
760
+ ATOM 703 OG SER A 90 4.636 -7.929 -7.031 1.00 97.65 O
761
+ ATOM 704 N CYS A 91 2.709 -12.062 -6.749 1.00 97.61 N
762
+ ATOM 705 CA CYS A 91 1.668 -13.051 -7.015 1.00 97.61 C
763
+ ATOM 706 C CYS A 91 0.375 -12.384 -7.527 1.00 97.61 C
764
+ ATOM 707 CB CYS A 91 1.381 -13.858 -5.742 1.00 97.61 C
765
+ ATOM 708 O CYS A 91 0.038 -11.287 -7.052 1.00 97.61 O
766
+ ATOM 709 SG CYS A 91 2.876 -14.732 -5.195 1.00 97.61 S
767
+ ATOM 710 N PRO A 92 -0.392 -13.055 -8.411 1.00 98.06 N
768
+ ATOM 711 CA PRO A 92 -1.765 -12.661 -8.724 1.00 98.06 C
769
+ ATOM 712 C PRO A 92 -2.602 -12.478 -7.449 1.00 98.06 C
770
+ ATOM 713 CB PRO A 92 -2.322 -13.778 -9.615 1.00 98.06 C
771
+ ATOM 714 O PRO A 92 -2.443 -13.226 -6.483 1.00 98.06 O
772
+ ATOM 715 CG PRO A 92 -1.073 -14.373 -10.263 1.00 98.06 C
773
+ ATOM 716 CD PRO A 92 -0.029 -14.255 -9.156 1.00 98.06 C
774
+ ATOM 717 N GLY A 93 -3.461 -11.460 -7.427 1.00 97.89 N
775
+ ATOM 718 CA GLY A 93 -4.252 -11.044 -6.265 1.00 97.89 C
776
+ ATOM 719 C GLY A 93 -3.575 -9.992 -5.378 1.00 97.89 C
777
+ ATOM 720 O GLY A 93 -4.246 -9.326 -4.584 1.00 97.89 O
778
+ ATOM 721 N SER A 94 -2.260 -9.786 -5.508 1.00 98.62 N
779
+ ATOM 722 CA SER A 94 -1.591 -8.632 -4.896 1.00 98.62 C
780
+ ATOM 723 C SER A 94 -1.785 -7.376 -5.745 1.00 98.62 C
781
+ ATOM 724 CB SER A 94 -0.109 -8.905 -4.647 1.00 98.62 C
782
+ ATOM 725 O SER A 94 -1.936 -7.456 -6.961 1.00 98.62 O
783
+ ATOM 726 OG SER A 94 0.619 -8.949 -5.856 1.00 98.62 O
784
+ ATOM 727 N ILE A 95 -1.731 -6.192 -5.129 1.00 98.76 N
785
+ ATOM 728 CA ILE A 95 -1.938 -4.919 -5.846 1.00 98.76 C
786
+ ATOM 729 C ILE A 95 -0.956 -4.778 -7.015 1.00 98.76 C
787
+ ATOM 730 CB ILE A 95 -1.818 -3.729 -4.866 1.00 98.76 C
788
+ ATOM 731 O ILE A 95 -1.337 -4.365 -8.107 1.00 98.76 O
789
+ ATOM 732 CG1 ILE A 95 -2.991 -3.761 -3.863 1.00 98.76 C
790
+ ATOM 733 CG2 ILE A 95 -1.796 -2.371 -5.597 1.00 98.76 C
791
+ ATOM 734 CD1 ILE A 95 -2.834 -2.814 -2.666 1.00 98.76 C
792
+ ATOM 735 N ARG A 96 0.320 -5.118 -6.808 1.00 98.71 N
793
+ ATOM 736 CA ARG A 96 1.321 -5.027 -7.876 1.00 98.71 C
794
+ ATOM 737 C ARG A 96 1.205 -6.148 -8.895 1.00 98.71 C
795
+ ATOM 738 CB ARG A 96 2.723 -4.966 -7.288 1.00 98.71 C
796
+ ATOM 739 O ARG A 96 1.418 -5.868 -10.066 1.00 98.71 O
797
+ ATOM 740 CG ARG A 96 2.945 -3.621 -6.589 1.00 98.71 C
798
+ ATOM 741 CD ARG A 96 4.249 -3.724 -5.823 1.00 98.71 C
799
+ ATOM 742 NE ARG A 96 4.516 -2.529 -5.009 1.00 98.71 N
800
+ ATOM 743 NH1 ARG A 96 5.865 -3.617 -3.514 1.00 98.71 N
801
+ ATOM 744 NH2 ARG A 96 5.614 -1.417 -3.351 1.00 98.71 N
802
+ ATOM 745 CZ ARG A 96 5.322 -2.524 -3.968 1.00 98.71 C
803
+ ATOM 746 N GLY A 97 0.845 -7.359 -8.471 1.00 98.30 N
804
+ ATOM 747 CA GLY A 97 0.607 -8.471 -9.392 1.00 98.30 C
805
+ ATOM 748 C GLY A 97 -0.561 -8.196 -10.340 1.00 98.30 C
806
+ ATOM 749 O GLY A 97 -0.468 -8.498 -11.523 1.00 98.30 O
807
+ ATOM 750 N ASP A 98 -1.620 -7.552 -9.847 1.00 98.55 N
808
+ ATOM 751 CA ASP A 98 -2.813 -7.275 -10.652 1.00 98.55 C
809
+ ATOM 752 C ASP A 98 -2.676 -6.012 -11.512 1.00 98.55 C
810
+ ATOM 753 CB ASP A 98 -4.033 -7.155 -9.730 1.00 98.55 C
811
+ ATOM 754 O ASP A 98 -3.278 -5.919 -12.582 1.00 98.55 O
812
+ ATOM 755 CG ASP A 98 -4.351 -8.438 -8.959 1.00 98.55 C
813
+ ATOM 756 OD1 ASP A 98 -3.948 -9.543 -9.383 1.00 98.55 O
814
+ ATOM 757 OD2 ASP A 98 -5.009 -8.304 -7.899 1.00 98.55 O
815
+ ATOM 758 N TYR A 99 -1.903 -5.018 -11.054 1.00 98.44 N
816
+ ATOM 759 CA TYR A 99 -1.921 -3.685 -11.661 1.00 98.44 C
817
+ ATOM 760 C TYR A 99 -0.576 -3.163 -12.151 1.00 98.44 C
818
+ ATOM 761 CB TYR A 99 -2.582 -2.680 -10.702 1.00 98.44 C
819
+ ATOM 762 O TYR A 99 -0.556 -2.069 -12.710 1.00 98.44 O
820
+ ATOM 763 CG TYR A 99 -3.974 -3.071 -10.248 1.00 98.44 C
821
+ ATOM 764 CD1 TYR A 99 -4.956 -3.384 -11.205 1.00 98.44 C
822
+ ATOM 765 CD2 TYR A 99 -4.285 -3.145 -8.877 1.00 98.44 C
823
+ ATOM 766 CE1 TYR A 99 -6.229 -3.809 -10.795 1.00 98.44 C
824
+ ATOM 767 CE2 TYR A 99 -5.575 -3.536 -8.462 1.00 98.44 C
825
+ ATOM 768 OH TYR A 99 -7.787 -4.332 -9.092 1.00 98.44 O
826
+ ATOM 769 CZ TYR A 99 -6.544 -3.893 -9.428 1.00 98.44 C
827
+ ATOM 770 N CYS A 100 0.551 -3.842 -11.961 1.00 98.28 N
828
+ ATOM 771 CA CYS A 100 1.861 -3.287 -12.316 1.00 98.28 C
829
+ ATOM 772 C CYS A 100 2.623 -4.199 -13.279 1.00 98.28 C
830
+ ATOM 773 CB CYS A 100 2.656 -2.986 -11.040 1.00 98.28 C
831
+ ATOM 774 O CYS A 100 2.511 -5.415 -13.215 1.00 98.28 O
832
+ ATOM 775 SG CYS A 100 1.696 -1.940 -9.913 1.00 98.28 S
833
+ ATOM 776 N GLN A 101 3.431 -3.591 -14.149 1.00 96.85 N
834
+ ATOM 777 CA GLN A 101 4.346 -4.302 -15.052 1.00 96.85 C
835
+ ATOM 778 C GLN A 101 5.799 -3.938 -14.723 1.00 96.85 C
836
+ ATOM 779 CB GLN A 101 3.987 -4.002 -16.519 1.00 96.85 C
837
+ ATOM 780 O GLN A 101 6.625 -4.820 -14.519 1.00 96.85 O
838
+ ATOM 781 CG GLN A 101 2.578 -4.500 -16.891 1.00 96.85 C
839
+ ATOM 782 CD GLN A 101 2.203 -4.227 -18.346 1.00 96.85 C
840
+ ATOM 783 NE2 GLN A 101 0.928 -4.164 -18.664 1.00 96.85 N
841
+ ATOM 784 OE1 GLN A 101 3.023 -4.070 -19.231 1.00 96.85 O
842
+ ATOM 785 N ASP A 102 6.087 -2.645 -14.538 1.00 96.59 N
843
+ ATOM 786 CA ASP A 102 7.443 -2.145 -14.291 1.00 96.59 C
844
+ ATOM 787 C ASP A 102 7.704 -1.752 -12.831 1.00 96.59 C
845
+ ATOM 788 CB ASP A 102 7.711 -0.930 -15.181 1.00 96.59 C
846
+ ATOM 789 O ASP A 102 6.889 -1.101 -12.173 1.00 96.59 O
847
+ ATOM 790 CG ASP A 102 7.659 -1.290 -16.660 1.00 96.59 C
848
+ ATOM 791 OD1 ASP A 102 8.534 -2.079 -17.069 1.00 96.59 O
849
+ ATOM 792 OD2 ASP A 102 6.773 -0.727 -17.336 1.00 96.59 O
850
+ ATOM 793 N VAL A 103 8.917 -2.026 -12.337 1.00 95.22 N
851
+ ATOM 794 CA VAL A 103 9.349 -1.637 -10.977 1.00 95.22 C
852
+ ATOM 795 C VAL A 103 9.292 -0.122 -10.731 1.00 95.22 C
853
+ ATOM 796 CB VAL A 103 10.748 -2.213 -10.677 1.00 95.22 C
854
+ ATOM 797 O VAL A 103 8.979 0.318 -9.623 1.00 95.22 O
855
+ ATOM 798 CG1 VAL A 103 11.861 -1.607 -11.544 1.00 95.22 C
856
+ ATOM 799 CG2 VAL A 103 11.129 -2.048 -9.201 1.00 95.22 C
857
+ ATOM 800 N GLY A 104 9.542 0.689 -11.764 1.00 95.14 N
858
+ ATOM 801 CA GLY A 104 9.465 2.152 -11.696 1.00 95.14 C
859
+ ATOM 802 C GLY A 104 8.037 2.716 -11.714 1.00 95.14 C
860
+ ATOM 803 O GLY A 104 7.866 3.921 -11.523 1.00 95.14 O
861
+ ATOM 804 N ARG A 105 7.024 1.874 -11.957 1.00 97.47 N
862
+ ATOM 805 CA ARG A 105 5.590 2.209 -12.032 1.00 97.47 C
863
+ ATOM 806 C ARG A 105 4.776 1.164 -11.265 1.00 97.47 C
864
+ ATOM 807 CB ARG A 105 5.135 2.311 -13.500 1.00 97.47 C
865
+ ATOM 808 O ARG A 105 3.951 0.440 -11.820 1.00 97.47 O
866
+ ATOM 809 CG ARG A 105 5.806 3.431 -14.305 1.00 97.47 C
867
+ ATOM 810 CD ARG A 105 5.448 4.822 -13.767 1.00 97.47 C
868
+ ATOM 811 NE ARG A 105 6.067 5.881 -14.584 1.00 97.47 N
869
+ ATOM 812 NH1 ARG A 105 8.095 5.983 -13.498 1.00 97.47 N
870
+ ATOM 813 NH2 ARG A 105 7.702 7.308 -15.251 1.00 97.47 N
871
+ ATOM 814 CZ ARG A 105 7.281 6.379 -14.438 1.00 97.47 C
872
+ ATOM 815 N ASN A 106 5.093 1.040 -9.976 1.00 97.81 N
873
+ ATOM 816 CA ASN A 106 4.556 -0.005 -9.107 1.00 97.81 C
874
+ ATOM 817 C ASN A 106 3.438 0.460 -8.152 1.00 97.81 C
875
+ ATOM 818 CB ASN A 106 5.704 -0.771 -8.432 1.00 97.81 C
876
+ ATOM 819 O ASN A 106 3.247 -0.188 -7.119 1.00 97.81 O
877
+ ATOM 820 CG ASN A 106 6.431 -0.012 -7.340 1.00 97.81 C
878
+ ATOM 821 ND2 ASN A 106 7.305 -0.695 -6.636 1.00 97.81 N
879
+ ATOM 822 OD1 ASN A 106 6.253 1.175 -7.093 1.00 97.81 O
880
+ ATOM 823 N VAL A 107 2.734 1.557 -8.485 1.00 98.15 N
881
+ ATOM 824 CA VAL A 107 1.443 2.054 -7.942 1.00 98.15 C
882
+ ATOM 825 C VAL A 107 1.389 2.416 -6.453 1.00 98.15 C
883
+ ATOM 826 CB VAL A 107 0.301 1.097 -8.355 1.00 98.15 C
884
+ ATOM 827 O VAL A 107 0.765 3.406 -6.081 1.00 98.15 O
885
+ ATOM 828 CG1 VAL A 107 -1.091 1.468 -7.825 1.00 98.15 C
886
+ ATOM 829 CG2 VAL A 107 0.151 1.069 -9.883 1.00 98.15 C
887
+ ATOM 830 N VAL A 108 2.023 1.653 -5.568 1.00 98.56 N
888
+ ATOM 831 CA VAL A 108 1.879 1.754 -4.113 1.00 98.56 C
889
+ ATOM 832 C VAL A 108 3.219 1.685 -3.385 1.00 98.56 C
890
+ ATOM 833 CB VAL A 108 0.908 0.693 -3.547 1.00 98.56 C
891
+ ATOM 834 O VAL A 108 4.133 0.936 -3.744 1.00 98.56 O
892
+ ATOM 835 CG1 VAL A 108 -0.553 1.021 -3.857 1.00 98.56 C
893
+ ATOM 836 CG2 VAL A 108 1.208 -0.726 -4.055 1.00 98.56 C
894
+ ATOM 837 N HIS A 109 3.320 2.462 -2.310 1.00 98.47 N
895
+ ATOM 838 CA HIS A 109 4.326 2.351 -1.250 1.00 98.47 C
896
+ ATOM 839 C HIS A 109 3.662 1.886 0.051 1.00 98.47 C
897
+ ATOM 840 CB HIS A 109 5.043 3.694 -1.047 1.00 98.47 C
898
+ ATOM 841 O HIS A 109 2.533 2.289 0.345 1.00 98.47 O
899
+ ATOM 842 CG HIS A 109 5.957 3.680 0.152 1.00 98.47 C
900
+ ATOM 843 CD2 HIS A 109 5.651 4.124 1.407 1.00 98.47 C
901
+ ATOM 844 ND1 HIS A 109 7.204 3.062 0.175 1.00 98.47 N
902
+ ATOM 845 CE1 HIS A 109 7.643 3.187 1.444 1.00 98.47 C
903
+ ATOM 846 NE2 HIS A 109 6.720 3.793 2.208 1.00 98.47 N
904
+ ATOM 847 N GLY A 110 4.384 1.090 0.835 1.00 98.24 N
905
+ ATOM 848 CA GLY A 110 3.994 0.669 2.179 1.00 98.24 C
906
+ ATOM 849 C GLY A 110 5.244 0.354 2.991 1.00 98.24 C
907
+ ATOM 850 O GLY A 110 6.169 -0.259 2.444 1.00 98.24 O
908
+ ATOM 851 N SER A 111 5.257 0.797 4.247 1.00 98.33 N
909
+ ATOM 852 CA SER A 111 6.378 0.667 5.179 1.00 98.33 C
910
+ ATOM 853 C SER A 111 6.792 -0.798 5.350 1.00 98.33 C
911
+ ATOM 854 CB SER A 111 5.996 1.281 6.531 1.00 98.33 C
912
+ ATOM 855 O SER A 111 5.955 -1.698 5.284 1.00 98.33 O
913
+ ATOM 856 OG SER A 111 5.264 2.483 6.344 1.00 98.33 O
914
+ ATOM 857 N ASP A 112 8.087 -1.048 5.524 1.00 96.76 N
915
+ ATOM 858 CA ASP A 112 8.671 -2.398 5.583 1.00 96.76 C
916
+ ATOM 859 C ASP A 112 8.773 -2.978 7.000 1.00 96.76 C
917
+ ATOM 860 CB ASP A 112 10.039 -2.377 4.887 1.00 96.76 C
918
+ ATOM 861 O ASP A 112 8.945 -4.182 7.170 1.00 96.76 O
919
+ ATOM 862 CG ASP A 112 11.047 -1.379 5.476 1.00 96.76 C
920
+ ATOM 863 OD1 ASP A 112 10.802 -0.828 6.580 1.00 96.76 O
921
+ ATOM 864 OD2 ASP A 112 12.031 -1.109 4.763 1.00 96.76 O
922
+ ATOM 865 N SER A 113 8.628 -2.130 8.012 1.00 97.76 N
923
+ ATOM 866 CA SER A 113 8.737 -2.463 9.426 1.00 97.76 C
924
+ ATOM 867 C SER A 113 7.872 -1.518 10.257 1.00 97.76 C
925
+ ATOM 868 CB SER A 113 10.201 -2.366 9.863 1.00 97.76 C
926
+ ATOM 869 O SER A 113 7.460 -0.452 9.790 1.00 97.76 O
927
+ ATOM 870 OG SER A 113 10.659 -1.041 9.704 1.00 97.76 O
928
+ ATOM 871 N THR A 114 7.572 -1.903 11.496 1.00 97.93 N
929
+ ATOM 872 CA THR A 114 6.872 -1.032 12.452 1.00 97.93 C
930
+ ATOM 873 C THR A 114 7.669 0.236 12.756 1.00 97.93 C
931
+ ATOM 874 CB THR A 114 6.595 -1.782 13.763 1.00 97.93 C
932
+ ATOM 875 O THR A 114 7.081 1.303 12.910 1.00 97.93 O
933
+ ATOM 876 CG2 THR A 114 5.397 -2.719 13.624 1.00 97.93 C
934
+ ATOM 877 OG1 THR A 114 7.706 -2.585 14.103 1.00 97.93 O
935
+ ATOM 878 N GLU A 115 9.000 0.147 12.775 1.00 97.99 N
936
+ ATOM 879 CA GLU A 115 9.892 1.295 12.954 1.00 97.99 C
937
+ ATOM 880 C GLU A 115 9.787 2.279 11.779 1.00 97.99 C
938
+ ATOM 881 CB GLU A 115 11.316 0.767 13.152 1.00 97.99 C
939
+ ATOM 882 O GLU A 115 9.492 3.461 11.985 1.00 97.99 O
940
+ ATOM 883 CG GLU A 115 12.292 1.886 13.537 1.00 97.99 C
941
+ ATOM 884 CD GLU A 115 13.683 1.364 13.927 1.00 97.99 C
942
+ ATOM 885 OE1 GLU A 115 14.504 2.215 14.331 1.00 97.99 O
943
+ ATOM 886 OE2 GLU A 115 13.908 0.136 13.832 1.00 97.99 O
944
+ ATOM 887 N SER A 116 9.905 1.783 10.540 1.00 97.79 N
945
+ ATOM 888 CA SER A 116 9.667 2.582 9.332 1.00 97.79 C
946
+ ATOM 889 C SER A 116 8.258 3.168 9.308 1.00 97.79 C
947
+ ATOM 890 CB SER A 116 9.859 1.747 8.064 1.00 97.79 C
948
+ ATOM 891 O SER A 116 8.101 4.331 8.949 1.00 97.79 O
949
+ ATOM 892 OG SER A 116 11.219 1.437 7.867 1.00 97.79 O
950
+ ATOM 893 N ALA A 117 7.238 2.409 9.720 1.00 98.50 N
951
+ ATOM 894 CA ALA A 117 5.860 2.890 9.764 1.00 98.50 C
952
+ ATOM 895 C ALA A 117 5.705 4.087 10.711 1.00 98.50 C
953
+ ATOM 896 CB ALA A 117 4.925 1.736 10.138 1.00 98.50 C
954
+ ATOM 897 O ALA A 117 5.166 5.109 10.300 1.00 98.50 O
955
+ ATOM 898 N ASN A 118 6.235 4.013 11.936 1.00 98.13 N
956
+ ATOM 899 CA ASN A 118 6.182 5.126 12.890 1.00 98.13 C
957
+ ATOM 900 C ASN A 118 6.890 6.378 12.351 1.00 98.13 C
958
+ ATOM 901 CB ASN A 118 6.810 4.673 14.217 1.00 98.13 C
959
+ ATOM 902 O ASN A 118 6.342 7.480 12.414 1.00 98.13 O
960
+ ATOM 903 CG ASN A 118 5.933 3.704 14.987 1.00 98.13 C
961
+ ATOM 904 ND2 ASN A 118 6.523 2.803 15.735 1.00 98.13 N
962
+ ATOM 905 OD1 ASN A 118 4.717 3.757 14.959 1.00 98.13 O
963
+ ATOM 906 N ARG A 119 8.090 6.214 11.779 1.00 98.21 N
964
+ ATOM 907 CA ARG A 119 8.846 7.316 11.163 1.00 98.21 C
965
+ ATOM 908 C ARG A 119 8.077 7.947 10.003 1.00 98.21 C
966
+ ATOM 909 CB ARG A 119 10.209 6.778 10.702 1.00 98.21 C
967
+ ATOM 910 O ARG A 119 7.982 9.167 9.915 1.00 98.21 O
968
+ ATOM 911 CG ARG A 119 11.118 7.872 10.109 1.00 98.21 C
969
+ ATOM 912 CD ARG A 119 12.412 7.276 9.537 1.00 98.21 C
970
+ ATOM 913 NE ARG A 119 12.142 6.383 8.387 1.00 98.21 N
971
+ ATOM 914 NH1 ARG A 119 12.373 7.906 6.667 1.00 98.21 N
972
+ ATOM 915 NH2 ARG A 119 11.738 5.829 6.211 1.00 98.21 N
973
+ ATOM 916 CZ ARG A 119 12.088 6.710 7.105 1.00 98.21 C
974
+ ATOM 917 N GLU A 120 7.537 7.122 9.111 1.00 98.63 N
975
+ ATOM 918 CA GLU A 120 6.803 7.578 7.933 1.00 98.63 C
976
+ ATOM 919 C GLU A 120 5.483 8.255 8.316 1.00 98.63 C
977
+ ATOM 920 CB GLU A 120 6.570 6.406 6.963 1.00 98.63 C
978
+ ATOM 921 O GLU A 120 5.193 9.324 7.792 1.00 98.63 O
979
+ ATOM 922 CG GLU A 120 7.867 5.959 6.264 1.00 98.63 C
980
+ ATOM 923 CD GLU A 120 7.706 4.669 5.440 1.00 98.63 C
981
+ ATOM 924 OE1 GLU A 120 8.734 4.043 5.116 1.00 98.63 O
982
+ ATOM 925 OE2 GLU A 120 6.576 4.331 5.008 1.00 98.63 O
983
+ ATOM 926 N ILE A 121 4.709 7.711 9.258 1.00 98.69 N
984
+ ATOM 927 CA ILE A 121 3.463 8.340 9.724 1.00 98.69 C
985
+ ATOM 928 C ILE A 121 3.744 9.754 10.239 1.00 98.69 C
986
+ ATOM 929 CB ILE A 121 2.767 7.454 10.782 1.00 98.69 C
987
+ ATOM 930 O ILE A 121 3.123 10.697 9.757 1.00 98.69 O
988
+ ATOM 931 CG1 ILE A 121 2.201 6.183 10.108 1.00 98.69 C
989
+ ATOM 932 CG2 ILE A 121 1.630 8.216 11.490 1.00 98.69 C
990
+ ATOM 933 CD1 ILE A 121 1.862 5.065 11.102 1.00 98.69 C
991
+ ATOM 934 N ASN A 122 4.736 9.909 11.121 1.00 98.32 N
992
+ ATOM 935 CA ASN A 122 5.114 11.207 11.688 1.00 98.32 C
993
+ ATOM 936 C ASN A 122 5.680 12.189 10.649 1.00 98.32 C
994
+ ATOM 937 CB ASN A 122 6.148 10.962 12.798 1.00 98.32 C
995
+ ATOM 938 O ASN A 122 5.614 13.399 10.844 1.00 98.32 O
996
+ ATOM 939 CG ASN A 122 5.560 10.302 14.032 1.00 98.32 C
997
+ ATOM 940 ND2 ASN A 122 6.362 9.592 14.788 1.00 98.32 N
998
+ ATOM 941 OD1 ASN A 122 4.395 10.432 14.360 1.00 98.32 O
999
+ ATOM 942 N LEU A 123 6.256 11.683 9.555 1.00 98.53 N
1000
+ ATOM 943 CA LEU A 123 6.753 12.514 8.459 1.00 98.53 C
1001
+ ATOM 944 C LEU A 123 5.616 13.020 7.557 1.00 98.53 C
1002
+ ATOM 945 CB LEU A 123 7.792 11.698 7.669 1.00 98.53 C
1003
+ ATOM 946 O LEU A 123 5.680 14.139 7.052 1.00 98.53 O
1004
+ ATOM 947 CG LEU A 123 8.426 12.461 6.494 1.00 98.53 C
1005
+ ATOM 948 CD1 LEU A 123 9.223 13.684 6.953 1.00 98.53 C
1006
+ ATOM 949 CD2 LEU A 123 9.363 11.538 5.718 1.00 98.53 C
1007
+ ATOM 950 N TRP A 124 4.599 12.190 7.316 1.00 98.65 N
1008
+ ATOM 951 CA TRP A 124 3.526 12.485 6.362 1.00 98.65 C
1009
+ ATOM 952 C TRP A 124 2.294 13.131 6.999 1.00 98.65 C
1010
+ ATOM 953 CB TRP A 124 3.154 11.204 5.604 1.00 98.65 C
1011
+ ATOM 954 O TRP A 124 1.557 13.821 6.292 1.00 98.65 O
1012
+ ATOM 955 CG TRP A 124 4.130 10.796 4.540 1.00 98.65 C
1013
+ ATOM 956 CD1 TRP A 124 5.178 9.954 4.687 1.00 98.65 C
1014
+ ATOM 957 CD2 TRP A 124 4.164 11.206 3.140 1.00 98.65 C
1015
+ ATOM 958 CE2 TRP A 124 5.273 10.569 2.508 1.00 98.65 C
1016
+ ATOM 959 CE3 TRP A 124 3.369 12.047 2.334 1.00 98.65 C
1017
+ ATOM 960 NE1 TRP A 124 5.855 9.822 3.496 1.00 98.65 N
1018
+ ATOM 961 CH2 TRP A 124 4.769 11.583 0.381 1.00 98.65 C
1019
+ ATOM 962 CZ2 TRP A 124 5.586 10.745 1.157 1.00 98.65 C
1020
+ ATOM 963 CZ3 TRP A 124 3.664 12.227 0.969 1.00 98.65 C
1021
+ ATOM 964 N PHE A 125 2.056 12.925 8.293 1.00 98.68 N
1022
+ ATOM 965 CA PHE A 125 0.862 13.379 8.999 1.00 98.68 C
1023
+ ATOM 966 C PHE A 125 1.216 14.008 10.344 1.00 98.68 C
1024
+ ATOM 967 CB PHE A 125 -0.101 12.203 9.200 1.00 98.68 C
1025
+ ATOM 968 O PHE A 125 2.029 13.492 11.107 1.00 98.68 O
1026
+ ATOM 969 CG PHE A 125 -0.648 11.630 7.909 1.00 98.68 C
1027
+ ATOM 970 CD1 PHE A 125 -1.785 12.207 7.316 1.00 98.68 C
1028
+ ATOM 971 CD2 PHE A 125 -0.024 10.526 7.301 1.00 98.68 C
1029
+ ATOM 972 CE1 PHE A 125 -2.323 11.660 6.141 1.00 98.68 C
1030
+ ATOM 973 CE2 PHE A 125 -0.549 9.988 6.113 1.00 98.68 C
1031
+ ATOM 974 CZ PHE A 125 -1.707 10.550 5.544 1.00 98.68 C
1032
+ ATOM 975 N SER A 126 0.548 15.113 10.649 1.00 98.31 N
1033
+ ATOM 976 CA SER A 126 0.478 15.662 11.995 1.00 98.31 C
1034
+ ATOM 977 C SER A 126 -0.423 14.794 12.886 1.00 98.31 C
1035
+ ATOM 978 CB SER A 126 -0.032 17.105 11.956 1.00 98.31 C
1036
+ ATOM 979 O SER A 126 -1.332 14.126 12.382 1.00 98.31 O
1037
+ ATOM 980 OG SER A 126 -1.350 17.158 11.457 1.00 98.31 O
1038
+ ATOM 981 N PRO A 127 -0.245 14.827 14.218 1.00 97.88 N
1039
+ ATOM 982 CA PRO A 127 -1.102 14.077 15.136 1.00 97.88 C
1040
+ ATOM 983 C PRO A 127 -2.598 14.394 14.992 1.00 97.88 C
1041
+ ATOM 984 CB PRO A 127 -0.592 14.434 16.535 1.00 97.88 C
1042
+ ATOM 985 O PRO A 127 -3.429 13.522 15.217 1.00 97.88 O
1043
+ ATOM 986 CG PRO A 127 0.878 14.779 16.301 1.00 97.88 C
1044
+ ATOM 987 CD PRO A 127 0.852 15.461 14.937 1.00 97.88 C
1045
+ ATOM 988 N GLN A 128 -2.953 15.621 14.597 1.00 98.05 N
1046
+ ATOM 989 CA GLN A 128 -4.343 16.054 14.415 1.00 98.05 C
1047
+ ATOM 990 C GLN A 128 -4.990 15.483 13.145 1.00 98.05 C
1048
+ ATOM 991 CB GLN A 128 -4.419 17.591 14.381 1.00 98.05 C
1049
+ ATOM 992 O GLN A 128 -6.213 15.443 13.051 1.00 98.05 O
1050
+ ATOM 993 CG GLN A 128 -3.970 18.268 15.687 1.00 98.05 C
1051
+ ATOM 994 CD GLN A 128 -2.458 18.276 15.911 1.00 98.05 C
1052
+ ATOM 995 NE2 GLN A 128 -2.007 18.360 17.141 1.00 98.05 N
1053
+ ATOM 996 OE1 GLN A 128 -1.645 18.190 15.006 1.00 98.05 O
1054
+ ATOM 997 N GLU A 129 -4.189 15.035 12.176 1.00 98.59 N
1055
+ ATOM 998 CA GLU A 129 -4.682 14.367 10.966 1.00 98.59 C
1056
+ ATOM 999 C GLU A 129 -5.002 12.880 11.203 1.00 98.59 C
1057
+ ATOM 1000 CB GLU A 129 -3.662 14.530 9.826 1.00 98.59 C
1058
+ ATOM 1001 O GLU A 129 -5.566 12.236 10.319 1.00 98.59 O
1059
+ ATOM 1002 CG GLU A 129 -3.554 15.965 9.289 1.00 98.59 C
1060
+ ATOM 1003 CD GLU A 129 -2.342 16.109 8.357 1.00 98.59 C
1061
+ ATOM 1004 OE1 GLU A 129 -2.489 16.040 7.115 1.00 98.59 O
1062
+ ATOM 1005 OE2 GLU A 129 -1.209 16.248 8.866 1.00 98.59 O
1063
+ ATOM 1006 N LEU A 130 -4.656 12.326 12.374 1.00 98.65 N
1064
+ ATOM 1007 CA LEU A 130 -4.946 10.942 12.747 1.00 98.65 C
1065
+ ATOM 1008 C LEU A 130 -6.284 10.862 13.497 1.00 98.65 C
1066
+ ATOM 1009 CB LEU A 130 -3.786 10.358 13.577 1.00 98.65 C
1067
+ ATOM 1010 O LEU A 130 -6.406 11.280 14.649 1.00 98.65 O
1068
+ ATOM 1011 CG LEU A 130 -2.385 10.445 12.942 1.00 98.65 C
1069
+ ATOM 1012 CD1 LEU A 130 -1.366 9.771 13.861 1.00 98.65 C
1070
+ ATOM 1013 CD2 LEU A 130 -2.303 9.774 11.569 1.00 98.65 C
1071
+ ATOM 1014 N CYS A 131 -7.304 10.297 12.855 1.00 98.47 N
1072
+ ATOM 1015 CA CYS A 131 -8.637 10.152 13.427 1.00 98.47 C
1073
+ ATOM 1016 C CYS A 131 -8.710 8.993 14.431 1.00 98.47 C
1074
+ ATOM 1017 CB CYS A 131 -9.668 9.957 12.307 1.00 98.47 C
1075
+ ATOM 1018 O CYS A 131 -8.394 7.847 14.110 1.00 98.47 O
1076
+ ATOM 1019 SG CYS A 131 -9.729 11.408 11.215 1.00 98.47 S
1077
+ ATOM 1020 N GLN A 132 -9.233 9.274 15.627 1.00 97.77 N
1078
+ ATOM 1021 CA GLN A 132 -9.548 8.261 16.634 1.00 97.77 C
1079
+ ATOM 1022 C GLN A 132 -11.034 7.899 16.578 1.00 97.77 C
1080
+ ATOM 1023 CB GLN A 132 -9.162 8.757 18.033 1.00 97.77 C
1081
+ ATOM 1024 O GLN A 132 -11.896 8.743 16.813 1.00 97.77 O
1082
+ ATOM 1025 CG GLN A 132 -7.649 8.977 18.175 1.00 97.77 C
1083
+ ATOM 1026 CD GLN A 132 -7.239 9.345 19.597 1.00 97.77 C
1084
+ ATOM 1027 NE2 GLN A 132 -5.968 9.579 19.833 1.00 97.77 N
1085
+ ATOM 1028 OE1 GLN A 132 -8.031 9.424 20.521 1.00 97.77 O
1086
+ ATOM 1029 N TYR A 133 -11.340 6.640 16.271 1.00 98.01 N
1087
+ ATOM 1030 CA TYR A 133 -12.708 6.124 16.256 1.00 98.01 C
1088
+ ATOM 1031 C TYR A 133 -12.744 4.610 16.490 1.00 98.01 C
1089
+ ATOM 1032 CB TYR A 133 -13.414 6.491 14.940 1.00 98.01 C
1090
+ ATOM 1033 O TYR A 133 -11.742 3.908 16.308 1.00 98.01 O
1091
+ ATOM 1034 CG TYR A 133 -12.918 5.751 13.713 1.00 98.01 C
1092
+ ATOM 1035 CD1 TYR A 133 -11.670 6.084 13.155 1.00 98.01 C
1093
+ ATOM 1036 CD2 TYR A 133 -13.715 4.755 13.112 1.00 98.01 C
1094
+ ATOM 1037 CE1 TYR A 133 -11.234 5.449 11.983 1.00 98.01 C
1095
+ ATOM 1038 CE2 TYR A 133 -13.277 4.115 11.936 1.00 98.01 C
1096
+ ATOM 1039 OH TYR A 133 -11.656 3.940 10.173 1.00 98.01 O
1097
+ ATOM 1040 CZ TYR A 133 -12.047 4.484 11.354 1.00 98.01 C
1098
+ ATOM 1041 N LYS A 134 -13.924 4.124 16.891 1.00 96.78 N
1099
+ ATOM 1042 CA LYS A 134 -14.242 2.698 17.012 1.00 96.78 C
1100
+ ATOM 1043 C LYS A 134 -14.803 2.191 15.687 1.00 96.78 C
1101
+ ATOM 1044 CB LYS A 134 -15.230 2.491 18.171 1.00 96.78 C
1102
+ ATOM 1045 O LYS A 134 -15.750 2.785 15.167 1.00 96.78 O
1103
+ ATOM 1046 CG LYS A 134 -15.509 1.000 18.411 1.00 96.78 C
1104
+ ATOM 1047 CD LYS A 134 -16.436 0.788 19.613 1.00 96.78 C
1105
+ ATOM 1048 CE LYS A 134 -16.638 -0.718 19.810 1.00 96.78 C
1106
+ ATOM 1049 NZ LYS A 134 -17.508 -1.016 20.975 1.00 96.78 N
1107
+ ATOM 1050 N GLN A 135 -14.272 1.091 15.163 1.00 95.74 N
1108
+ ATOM 1051 CA GLN A 135 -14.828 0.483 13.963 1.00 95.74 C
1109
+ ATOM 1052 C GLN A 135 -15.991 -0.423 14.384 1.00 95.74 C
1110
+ ATOM 1053 CB GLN A 135 -13.717 -0.237 13.188 1.00 95.74 C
1111
+ ATOM 1054 O GLN A 135 -15.839 -1.329 15.197 1.00 95.74 O
1112
+ ATOM 1055 CG GLN A 135 -14.009 -0.362 11.690 1.00 95.74 C
1113
+ ATOM 1056 CD GLN A 135 -12.837 -0.952 10.902 1.00 95.74 C
1114
+ ATOM 1057 NE2 GLN A 135 -12.968 -1.123 9.606 1.00 95.74 N
1115
+ ATOM 1058 OE1 GLN A 135 -11.762 -1.210 11.399 1.00 95.74 O
1116
+ ATOM 1059 N ALA A 136 -17.193 -0.176 13.857 1.00 96.94 N
1117
+ ATOM 1060 CA ALA A 136 -18.390 -0.920 14.269 1.00 96.94 C
1118
+ ATOM 1061 C ALA A 136 -18.271 -2.439 14.033 1.00 96.94 C
1119
+ ATOM 1062 CB ALA A 136 -19.595 -0.343 13.519 1.00 96.94 C
1120
+ ATOM 1063 O ALA A 136 -18.921 -3.228 14.716 1.00 96.94 O
1121
+ ATOM 1064 N VAL A 137 -17.425 -2.833 13.079 1.00 96.00 N
1122
+ ATOM 1065 CA VAL A 137 -17.173 -4.226 12.709 1.00 96.00 C
1123
+ ATOM 1066 C VAL A 137 -16.037 -4.890 13.496 1.00 96.00 C
1124
+ ATOM 1067 CB VAL A 137 -16.982 -4.382 11.187 1.00 96.00 C
1125
+ ATOM 1068 O VAL A 137 -15.826 -6.081 13.297 1.00 96.00 O
1126
+ ATOM 1069 CG1 VAL A 137 -18.285 -4.062 10.441 1.00 96.00 C
1127
+ ATOM 1070 CG2 VAL A 137 -15.885 -3.468 10.633 1.00 96.00 C
1128
+ ATOM 1071 N ASP A 138 -15.354 -4.183 14.411 1.00 95.73 N
1129
+ ATOM 1072 CA ASP A 138 -14.252 -4.741 15.221 1.00 95.73 C
1130
+ ATOM 1073 C ASP A 138 -14.603 -6.100 15.868 1.00 95.73 C
1131
+ ATOM 1074 CB ASP A 138 -13.774 -3.743 16.308 1.00 95.73 C
1132
+ ATOM 1075 O ASP A 138 -13.807 -7.030 15.721 1.00 95.73 O
1133
+ ATOM 1076 CG ASP A 138 -12.899 -2.592 15.799 1.00 95.73 C
1134
+ ATOM 1077 OD1 ASP A 138 -12.216 -2.793 14.778 1.00 95.73 O
1135
+ ATOM 1078 OD2 ASP A 138 -12.918 -1.506 16.437 1.00 95.73 O
1136
+ ATOM 1079 N PRO A 139 -15.795 -6.297 16.483 1.00 96.81 N
1137
+ ATOM 1080 CA PRO A 139 -16.163 -7.580 17.100 1.00 96.81 C
1138
+ ATOM 1081 C PRO A 139 -16.336 -8.754 16.122 1.00 96.81 C
1139
+ ATOM 1082 CB PRO A 139 -17.483 -7.322 17.839 1.00 96.81 C
1140
+ ATOM 1083 O PRO A 139 -16.470 -9.891 16.555 1.00 96.81 O
1141
+ ATOM 1084 CG PRO A 139 -17.509 -5.811 18.044 1.00 96.81 C
1142
+ ATOM 1085 CD PRO A 139 -16.831 -5.312 16.776 1.00 96.81 C
1143
+ ATOM 1086 N TRP A 140 -16.395 -8.489 14.814 1.00 97.41 N
1144
+ ATOM 1087 CA TRP A 140 -16.508 -9.510 13.766 1.00 97.41 C
1145
+ ATOM 1088 C TRP A 140 -15.171 -9.774 13.056 1.00 97.41 C
1146
+ ATOM 1089 CB TRP A 140 -17.587 -9.089 12.760 1.00 97.41 C
1147
+ ATOM 1090 O TRP A 140 -15.081 -10.694 12.245 1.00 97.41 O
1148
+ ATOM 1091 CG TRP A 140 -18.928 -8.758 13.342 1.00 97.41 C
1149
+ ATOM 1092 CD1 TRP A 140 -19.415 -7.512 13.525 1.00 97.41 C
1150
+ ATOM 1093 CD2 TRP A 140 -19.960 -9.661 13.843 1.00 97.41 C
1151
+ ATOM 1094 CE2 TRP A 140 -21.061 -8.877 14.304 1.00 97.41 C
1152
+ ATOM 1095 CE3 TRP A 140 -20.076 -11.062 13.962 1.00 97.41 C
1153
+ ATOM 1096 NE1 TRP A 140 -20.673 -7.572 14.089 1.00 97.41 N
1154
+ ATOM 1097 CH2 TRP A 140 -22.306 -10.849 14.942 1.00 97.41 C
1155
+ ATOM 1098 CZ2 TRP A 140 -22.222 -9.450 14.843 1.00 97.41 C
1156
+ ATOM 1099 CZ3 TRP A 140 -21.235 -11.649 14.505 1.00 97.41 C
1157
+ ATOM 1100 N ILE A 141 -14.145 -8.956 13.326 1.00 96.13 N
1158
+ ATOM 1101 CA ILE A 141 -12.797 -9.059 12.741 1.00 96.13 C
1159
+ ATOM 1102 C ILE A 141 -11.799 -9.609 13.770 1.00 96.13 C
1160
+ ATOM 1103 CB ILE A 141 -12.349 -7.674 12.206 1.00 96.13 C
1161
+ ATOM 1104 O ILE A 141 -10.845 -10.303 13.406 1.00 96.13 O
1162
+ ATOM 1105 CG1 ILE A 141 -13.303 -7.164 11.100 1.00 96.13 C
1163
+ ATOM 1106 CG2 ILE A 141 -10.915 -7.723 11.635 1.00 96.13 C
1164
+ ATOM 1107 CD1 ILE A 141 -13.096 -5.686 10.752 1.00 96.13 C
1165
+ ATOM 1108 N HIS A 142 -11.995 -9.286 15.048 1.00 93.03 N
1166
+ ATOM 1109 CA HIS A 142 -11.108 -9.645 16.148 1.00 93.03 C
1167
+ ATOM 1110 C HIS A 142 -11.830 -10.543 17.159 1.00 93.03 C
1168
+ ATOM 1111 CB HIS A 142 -10.554 -8.354 16.772 1.00 93.03 C
1169
+ ATOM 1112 O HIS A 142 -12.997 -10.306 17.464 1.00 93.03 O
1170
+ ATOM 1113 CG HIS A 142 -9.795 -7.494 15.784 1.00 93.03 C
1171
+ ATOM 1114 CD2 HIS A 142 -9.924 -6.144 15.580 1.00 93.03 C
1172
+ ATOM 1115 ND1 HIS A 142 -8.895 -7.957 14.852 1.00 93.03 N
1173
+ ATOM 1116 CE1 HIS A 142 -8.480 -6.912 14.118 1.00 93.03 C
1174
+ ATOM 1117 NE2 HIS A 142 -9.056 -5.783 14.540 1.00 93.03 N
1175
+ ATOM 1118 N GLU A 143 -11.121 -11.573 17.634 1.00 90.01 N
1176
+ ATOM 1119 CA GLU A 143 -11.546 -12.472 18.721 1.00 90.01 C
1177
+ ATOM 1120 C GLU A 143 -11.356 -11.840 20.106 1.00 90.01 C
1178
+ ATOM 1121 CB GLU A 143 -10.831 -13.838 18.593 1.00 90.01 C
1179
+ ATOM 1122 O GLU A 143 -10.395 -11.045 20.264 1.00 90.01 O
1180
+ ATOM 1123 CG GLU A 143 -9.312 -13.779 18.871 1.00 90.01 C
1181
+ ATOM 1124 CD GLU A 143 -8.523 -15.059 18.519 1.00 90.01 C
1182
+ ATOM 1125 OE1 GLU A 143 -7.279 -14.926 18.332 1.00 90.01 O
1183
+ ATOM 1126 OE2 GLU A 143 -9.123 -16.150 18.382 1.00 90.01 O
1184
+ ATOM 1127 OXT GLU A 143 -12.189 -12.169 20.976 1.00 90.01 O
1185
+ TER 1128 GLU A 143
1186
+ ENDMDL
1187
+ END
case_study/A0A516RTC5.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/A0A5B8NBE6.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/A0A5B8NBN0.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/A0A7J6F8C5.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/B1KN79.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/C1DMX5.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/C4R826.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/G4VQX9.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/J9PY59.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/J9VGQ7.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/J9VVW8.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/M9PF61.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/O53504.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/Q0RWC9.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/Q1NEJ0.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/Q39VG1.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/Q6F4N4.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/Q72K04.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/Q93UV7.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/Q9AGK2.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/Q9AI62.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/Q9KJF3.pdb ADDED
The diff for this file is too large to render. See raw diff
 
case_study/Q9XZ48.pdb ADDED
The diff for this file is too large to render. See raw diff
 
constants.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ reps1 = [
2
+ {
3
+ "model": 0,
4
+ "chain": "",
5
+ "resname": "",
6
+ "style": "cartoon", # line, stick, sphere, cartoon, surface
7
+ "color": "whiteCarbon", # blue, red, green, yellow, whiteCarbon
8
+ "residue_range": "", # 3-15
9
+ "around": 0, # 周围范围,默认0
10
+ "byres": False,
11
+ "visible": False
12
+ },
13
+ ]
14
+
15
+ style_list = ["Cartoon", "Sphere", "Stick", "Line", "Surface"]
16
+ color_list = ["White", "Blue", "Red", "Green", "Yellow", "Magenta", "Cyan", "Orange", "Purple", "Gray"]
17
+ default_reps = [
18
+ {
19
+ "model": 0,
20
+ "chain": "",
21
+ "resname": "",
22
+ "style": style_list[0][0].lower() + style_list[0][1:],
23
+ "color": color_list[0][0].lower() + color_list[0][1:] + "Carbon", # whiteCarbon
24
+ "residue_range": "", # 3-15
25
+ "around": 0, # 周围范围,默认0
26
+ "byres": False,
27
+ "visible": False
28
+ },
29
+ ]
30
+ model_list = ['M3Site-ESM3-abs', 'M3Site-ESM3-full', 'M3Site-ESM2-abs', 'M3Site-ESM2-full', 'M3Site-ESM1b-abs', 'M3Site-ESM1b-full']
31
+ no_cat_dict = {
32
+ 'b': 'background',
33
+ '0': 'CRI',
34
+ '1': 'SCI',
35
+ '2': 'PI',
36
+ '3': 'PTCR',
37
+ '4': 'IA',
38
+ '5': 'SSA'
39
+ }
esm/__init__.py ADDED
File without changes
esm/layers/attention.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import functools
2
+
3
+ import einops
4
+ import torch
5
+ import torch.nn.functional as F
6
+ from torch import nn
7
+
8
+ from esm.layers.rotary import RotaryEmbedding
9
+
10
+
11
+ class MultiHeadAttention(nn.Module):
12
+ def __init__(
13
+ self,
14
+ d_model: int,
15
+ n_heads: int,
16
+ bias: bool = False,
17
+ qk_layernorm: bool = True,
18
+ ):
19
+ super().__init__()
20
+
21
+ self.d_model = d_model
22
+ self.n_heads = n_heads
23
+
24
+ self.d_head = self.d_model // self.n_heads
25
+ self.layernorm_qkv = nn.Sequential(
26
+ nn.LayerNorm(d_model), nn.Linear(d_model, d_model * 3, bias=bias)
27
+ )
28
+ self.out_proj = nn.Linear(d_model, d_model, bias=bias)
29
+
30
+ if qk_layernorm:
31
+ self.q_ln = nn.LayerNorm(d_model, bias=bias)
32
+ self.k_ln = nn.LayerNorm(d_model, bias=bias)
33
+ else:
34
+ self.q_ln = nn.Identity()
35
+ self.k_ln = nn.Identity()
36
+
37
+ self.rotary = RotaryEmbedding(d_model // n_heads)
38
+
39
+ def _apply_rotary(self, q: torch.Tensor, k: torch.Tensor):
40
+ q = q.unflatten(-1, (self.n_heads, self.d_head))
41
+ k = k.unflatten(-1, (self.n_heads, self.d_head))
42
+ q, k = self.rotary(q, k)
43
+ q = q.flatten(-2, -1)
44
+ k = k.flatten(-2, -1)
45
+ return q, k
46
+
47
+ def forward(self, x, seq_id):
48
+ qkv_BLD3 = self.layernorm_qkv(x)
49
+ query_BLD, key_BLD, value_BLD = torch.chunk(qkv_BLD3, 3, dim=-1)
50
+ query_BLD, key_BLD = self.q_ln(query_BLD), self.k_ln(key_BLD)
51
+ query_BLD, key_BLD = self._apply_rotary(query_BLD, key_BLD)
52
+
53
+ n_heads = self.n_heads
54
+ reshaper = functools.partial(
55
+ einops.rearrange, pattern="b s (h d) -> b h s d", h=n_heads
56
+ )
57
+
58
+ query_BHLD, key_BHLD, value_BHLD = map(
59
+ reshaper, (query_BLD, key_BLD, value_BLD)
60
+ )
61
+
62
+ # Where True, enable participation in attention.
63
+ mask_BLL = seq_id.unsqueeze(-1) == seq_id.unsqueeze(-2)
64
+ mask_BHLL = mask_BLL.unsqueeze(1)
65
+
66
+ context_BHLD = F.scaled_dot_product_attention(
67
+ query_BHLD, key_BHLD, value_BHLD, mask_BHLL
68
+ )
69
+ context_BLD = einops.rearrange(context_BHLD, "b h s d -> b s (h d)")
70
+ return self.out_proj(context_BLD)
esm/layers/blocks.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+ from esm.layers.attention import MultiHeadAttention
6
+ from esm.layers.geom_attention import (
7
+ GeometricReasoningOriginalImpl,
8
+ )
9
+ from esm.utils.structure.affine3d import Affine3D
10
+
11
+
12
+ def swiglu_correction_fn(expansion_ratio: float, d_model: int) -> int:
13
+ # set hidden dimesion to nearest multiple of 256 after expansion ratio
14
+ return int(((expansion_ratio * d_model) + 255) // 256 * 256)
15
+
16
+
17
+ class SwiGLU(nn.Module):
18
+ """
19
+ SwiGLU activation function as an nn.Module, allowing it to be used within nn.Sequential.
20
+ This module splits the input tensor along the last dimension and applies the SiLU (Swish)
21
+ activation function to the first half, then multiplies it by the second half.
22
+ """
23
+
24
+ def __init__(self):
25
+ super(SwiGLU, self).__init__()
26
+
27
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
28
+ x1, x2 = x.chunk(2, dim=-1)
29
+ return F.silu(x1) * x2
30
+
31
+
32
+ def swiglu_ln_ffn(d_model: int, expansion_ratio: float, bias: bool):
33
+ return nn.Sequential(
34
+ nn.LayerNorm(d_model),
35
+ nn.Linear(
36
+ d_model, swiglu_correction_fn(expansion_ratio, d_model) * 2, bias=bias
37
+ ),
38
+ SwiGLU(),
39
+ nn.Linear(swiglu_correction_fn(expansion_ratio, d_model), d_model, bias=bias),
40
+ )
41
+
42
+
43
+ def gelu_ln_ffn(d_model: int, expansion_ratio: float, bias: bool):
44
+ hidden_dim = int(expansion_ratio * d_model)
45
+ return nn.Sequential(
46
+ nn.LayerNorm(d_model),
47
+ nn.Linear(d_model, hidden_dim, bias=bias),
48
+ nn.GELU(),
49
+ nn.Linear(hidden_dim, d_model, bias=bias),
50
+ )
51
+
52
+
53
+ class UnifiedTransformerBlock(nn.Module):
54
+ """
55
+ A unified transformer block that can optionally incorporate geometric attention.
56
+
57
+ This class defines a transformer block that can be configured to use geometric attention
58
+ alongside the standard multi-head attention mechanism. It is designed to be a flexible
59
+ component of transformer-based models, allowing for the integration of geometric reasoning.
60
+
61
+ Parameters
62
+ ----------
63
+ d_model : int
64
+ The dimensionality of the input and output features of the transformer block.
65
+ n_heads : int
66
+ The number of attention heads in the multi-head attention mechanism.
67
+ n_layers : int
68
+ The number of layers in the transformer block.
69
+ use_geom_attn : bool, optional
70
+ Whether to use geometric attention in addition to the standard multi-head attention. Defaults to False.
71
+ v_heads : int, optional
72
+ The number of heads to use for the geometric attention mechanism, if enabled. Must be specified if `use_geom_attn` is True.
73
+ """
74
+
75
+ def __init__(
76
+ self,
77
+ d_model: int,
78
+ n_heads: int,
79
+ use_geom_attn: bool = False,
80
+ use_plain_attn: bool = True,
81
+ v_heads: int | None = None,
82
+ bias: bool = False,
83
+ expansion_ratio: float = 4.0,
84
+ residue_scaling_factor: float = 1,
85
+ mask_and_zero_frameless: bool = False,
86
+ qk_layernorm: bool = True,
87
+ ffn_type: str = "swiglu", # swiglu | gelu
88
+ ):
89
+ super().__init__()
90
+ self.use_plain_attn = use_plain_attn
91
+ if self.use_plain_attn:
92
+ self.attn = MultiHeadAttention(
93
+ d_model, n_heads, bias, qk_layernorm=qk_layernorm
94
+ )
95
+ self.use_geom_attn = use_geom_attn
96
+ if self.use_geom_attn:
97
+ if v_heads is None:
98
+ raise ValueError("v_heads must be specified when use_geom_attn is True")
99
+ self.geom_attn = GeometricReasoningOriginalImpl(
100
+ c_s=d_model,
101
+ v_heads=v_heads,
102
+ bias=bias,
103
+ mask_and_zero_frameless=mask_and_zero_frameless,
104
+ )
105
+ if ffn_type == "swiglu":
106
+ self.ffn = swiglu_ln_ffn(d_model, expansion_ratio, bias)
107
+ elif ffn_type == "gelu":
108
+ self.ffn = gelu_ln_ffn(d_model, expansion_ratio, bias)
109
+ else:
110
+ raise ValueError(f"Unknown ffn_type: {ffn_type}")
111
+ self.scaling_factor = residue_scaling_factor
112
+
113
+ def forward(
114
+ self,
115
+ x: torch.Tensor,
116
+ sequence_id: torch.Tensor,
117
+ frames: Affine3D,
118
+ frames_mask: torch.Tensor,
119
+ chain_id: torch.Tensor,
120
+ ) -> torch.Tensor:
121
+ """
122
+ Forward pass for the UnifiedTransformerBlock.
123
+
124
+ Parameters
125
+ ----------
126
+ x : torch.Tensor[float]
127
+ Input tensor to the transformer block, typically the output from the previous layer.
128
+ sequence_id : torch.Tensor[int]
129
+ Tensor containing sequence IDs for each element in the batch, used for attention masking.
130
+ frames : Affine3D
131
+ Affine3D containing geometric frame information for geometric attention.
132
+ frames_mask : torch.Tensor[bool]
133
+ Boolean mask tensor indicating valid frames for geometric attention.
134
+ chain_id : torch.Tensor[int]
135
+ Tensor containing chain IDs for each element, used for attention masking in geometric attention.
136
+
137
+ Returns
138
+ -------
139
+ torch.Tensor[float]
140
+ The output tensor after applying the transformer block operations.
141
+ """
142
+ if self.use_plain_attn:
143
+ r1 = self.attn(x, sequence_id)
144
+ x = x + r1 / self.scaling_factor
145
+
146
+ if self.use_geom_attn:
147
+ r2 = self.geom_attn(x, frames, frames_mask, sequence_id, chain_id)
148
+ x = x + r2 / self.scaling_factor
149
+
150
+ r3 = self.ffn(x) / self.scaling_factor
151
+ x = x + r3
152
+
153
+ return x
esm/layers/codebook.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.distributed as dist
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+
7
+
8
+ class EMACodebook(nn.Module):
9
+ def __init__(
10
+ self,
11
+ n_codes,
12
+ embedding_dim,
13
+ no_random_restart=True,
14
+ restart_thres=1.0,
15
+ ema_decay=0.99,
16
+ ):
17
+ super().__init__()
18
+ self.register_buffer("embeddings", torch.randn(n_codes, embedding_dim))
19
+ self.register_buffer("N", torch.zeros(n_codes))
20
+ self.register_buffer("z_avg", self.embeddings.data.clone())
21
+
22
+ self.n_codes = n_codes
23
+ self.embedding_dim = embedding_dim
24
+ self._need_init = True
25
+ self.no_random_restart = no_random_restart
26
+ self.restart_thres = restart_thres
27
+ self.freeze_codebook = False
28
+ self.ema_decay = ema_decay
29
+
30
+ def reset_parameters(self):
31
+ # For meta init
32
+ pass
33
+
34
+ def _tile(self, x):
35
+ d, ew = x.shape
36
+ if d < self.n_codes:
37
+ n_repeats = (self.n_codes + d - 1) // d
38
+ std = 0.01 / np.sqrt(ew)
39
+ x = x.repeat(n_repeats, 1)
40
+ x = x + torch.randn_like(x) * std
41
+ return x
42
+
43
+ def _init_embeddings(self, z):
44
+ # z: [b, t, c]
45
+ self._need_init = False
46
+ flat_inputs = z.view(-1, self.embedding_dim)
47
+ y = self._tile(flat_inputs)
48
+
49
+ y.shape[0]
50
+ _k_rand = y[torch.randperm(y.shape[0])][: self.n_codes]
51
+ if dist.is_initialized():
52
+ dist.broadcast(_k_rand, 0)
53
+ self.embeddings.data.copy_(_k_rand)
54
+ self.z_avg.data.copy_(_k_rand)
55
+ self.N.data.copy_(torch.ones(self.n_codes))
56
+
57
+ def forward(self, z):
58
+ # z: [b, t, c]
59
+ if self._need_init and self.training and not self.freeze_codebook:
60
+ self._init_embeddings(z)
61
+ # z is of shape [batch_size, sequence length, channels]
62
+ flat_inputs = z.view(-1, self.embedding_dim)
63
+ distances = (
64
+ (flat_inputs**2).sum(dim=1, keepdim=True)
65
+ - 2 * flat_inputs @ self.embeddings.t()
66
+ + (self.embeddings.t() ** 2).sum(dim=0, keepdim=True)
67
+ ) # [bt, c]
68
+
69
+ encoding_indices = torch.argmin(distances, dim=1)
70
+ encoding_indices = encoding_indices.view(*z.shape[:2]) # [b, t, ncode]
71
+
72
+ embeddings = F.embedding(encoding_indices, self.embeddings) # [b, t, c]
73
+
74
+ commitment_loss = 0.25 * F.mse_loss(z, embeddings.detach())
75
+
76
+ # EMA codebook update
77
+ if self.training and not self.freeze_codebook:
78
+ assert False, "Not implemented"
79
+ embeddings_st = (embeddings - z).detach() + z
80
+
81
+ return embeddings_st, encoding_indices, commitment_loss
82
+
83
+ def dictionary_lookup(self, encodings):
84
+ embeddings = F.embedding(encodings, self.embeddings)
85
+ return embeddings
86
+
87
+ def soft_codebook_lookup(self, weights: torch.Tensor) -> torch.Tensor:
88
+ return weights @ self.embeddings
esm/layers/ffn.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ import torch.nn.functional as F
3
+ from torch import Tensor
4
+
5
+ # NOT CURRENTLY USED
6
+
7
+
8
+ class SwiGLU(nn.Module):
9
+ def __init__(self) -> None:
10
+ super().__init__()
11
+
12
+ def forward(self, x: Tensor) -> Tensor:
13
+ x1, x2 = x.chunk(2, dim=-1)
14
+ hidden = F.silu(x1) * x2
15
+ return hidden
16
+
17
+
18
+ class FFN(nn.Module):
19
+ def __init__(self, in_proj, activation, out_proj) -> None:
20
+ super().__init__()
21
+ self.in_proj = in_proj
22
+ self.activation = activation
23
+ self.out_proj = out_proj
24
+
25
+ def forward(self, x: Tensor) -> Tensor:
26
+ x = self.in_proj(x)
27
+ x = self.activation(x)
28
+ x = self.out_proj(x)
29
+ return x
esm/layers/geom_attention.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from math import sqrt
2
+
3
+ import torch
4
+ from einops import rearrange
5
+ from torch import nn
6
+ from torch.nn import functional as F
7
+
8
+
9
+ class GeometricReasoningOriginalImpl(nn.Module):
10
+ def __init__(
11
+ self,
12
+ c_s: int,
13
+ v_heads: int,
14
+ num_vector_messages: int = 1,
15
+ mask_and_zero_frameless: bool = True,
16
+ divide_residual_by_depth: bool = False,
17
+ bias: bool = False,
18
+ ):
19
+ """Approximate implementation:
20
+
21
+ ATTN(A, v) := (softmax_j A_ij) v_j
22
+ make_rot_vectors(x) := R(i->g) Linear(x).reshape(..., 3)
23
+ make_vectors(x) := T(i->g) Linear(x).reshape(..., 3)
24
+
25
+ v <- make_rot_vectors(x)
26
+ q_dir, k_dir <- make_rot_vectors(x)
27
+ q_dist, k_dist <- make_vectors(x)
28
+
29
+ A_ij <- dot(q_dir_i, k_dir_j) -||q_dist_i - k_dist_j||^2
30
+ x <- x + Linear(T(g->i) ATTN(A, v))
31
+ """
32
+ super().__init__()
33
+ self.c_s = c_s
34
+ self.v_heads = v_heads
35
+ self.num_vector_messages = num_vector_messages
36
+ self.mask_and_zero_frameless = mask_and_zero_frameless
37
+
38
+ self.s_norm = nn.LayerNorm(c_s, bias=bias)
39
+ dim_proj = (
40
+ 4 * self.v_heads * 3 + self.v_heads * 3 * self.num_vector_messages
41
+ ) # 2 x (q, k) * number of heads * (x, y, z) + number of heads * number of vector messages * (x, y, z)
42
+ self.proj = nn.Linear(c_s, dim_proj, bias=bias)
43
+ channels_out = self.v_heads * 3 * self.num_vector_messages
44
+ self.out_proj = nn.Linear(channels_out, c_s, bias=bias)
45
+
46
+ # The basic idea is for some attention heads to pay more or less attention to rotation versus distance,
47
+ # as well as to control the sharpness of the softmax (i.e., should this head only attend to those residues
48
+ # very nearby or should there be shallower dropoff in attention weight?)
49
+ self.distance_scale_per_head = nn.Parameter(torch.zeros((self.v_heads)))
50
+ self.rotation_scale_per_head = nn.Parameter(torch.zeros((self.v_heads)))
51
+
52
+ def forward(self, s, affine, affine_mask, sequence_id, chain_id):
53
+ attn_bias = sequence_id.unsqueeze(-1) == sequence_id.unsqueeze(-2)
54
+ attn_bias = attn_bias.unsqueeze(1).float()
55
+ attn_bias = attn_bias.masked_fill(
56
+ ~affine_mask[:, None, None, :], torch.finfo(attn_bias.dtype).min
57
+ )
58
+ chain_id_mask = chain_id.unsqueeze(1) != chain_id.unsqueeze(2)
59
+ attn_bias = attn_bias.masked_fill(
60
+ chain_id_mask.unsqueeze(1), torch.finfo(s.dtype).min
61
+ )
62
+
63
+ ns = self.s_norm(s)
64
+ vec_rot, vec_dist = self.proj(ns).split(
65
+ [
66
+ self.v_heads * 2 * 3 + self.v_heads * 3 * self.num_vector_messages,
67
+ self.v_heads * 2 * 3,
68
+ ],
69
+ dim=-1,
70
+ )
71
+
72
+ # Rotate the queries and keys for the rotation term. We also rotate the values.
73
+ # NOTE(zeming, thayes): Values are only rotated, not translated. We may wish to change
74
+ # this in the future.
75
+ query_rot, key_rot, value = (
76
+ affine.rot[..., None]
77
+ .apply(rearrange(vec_rot, "... (h c) -> ... h c", c=3))
78
+ .split(
79
+ [
80
+ self.v_heads,
81
+ self.v_heads,
82
+ self.v_heads * self.num_vector_messages,
83
+ ],
84
+ dim=-2,
85
+ )
86
+ )
87
+
88
+ # Rotate and translate the queries and keys for the distance term
89
+ # NOTE(thayes): a simple speedup would be to apply all rotations together, then
90
+ # separately apply the translations.
91
+ query_dist, key_dist = (
92
+ affine[..., None]
93
+ .apply(rearrange(vec_dist, "... (h c) -> ... h c", c=3))
94
+ .chunk(2, dim=-2)
95
+ )
96
+
97
+ query_dist = rearrange(query_dist, "b s h d -> b h s 1 d")
98
+ key_dist = rearrange(key_dist, "b s h d -> b h 1 s d")
99
+ query_rot = rearrange(query_rot, "b s h d -> b h s d")
100
+ key_rot = rearrange(key_rot, "b s h d -> b h d s")
101
+ value = rearrange(
102
+ value, "b s (h m) d -> b h s (m d)", m=self.num_vector_messages
103
+ )
104
+
105
+ distance_term = (query_dist - key_dist).norm(dim=-1) / sqrt(3)
106
+ rotation_term = query_rot.matmul(key_rot) / sqrt(3)
107
+ distance_term_weight = rearrange(
108
+ F.softplus(self.distance_scale_per_head), "h -> h 1 1"
109
+ )
110
+ rotation_term_weight = rearrange(
111
+ F.softplus(self.rotation_scale_per_head), "h -> h 1 1"
112
+ )
113
+
114
+ attn_weight = (
115
+ rotation_term * rotation_term_weight - distance_term * distance_term_weight
116
+ )
117
+
118
+ if attn_bias is not None:
119
+ # we can re-use the attention bias from the transformer layers
120
+ # NOTE(thayes): This attention bias is expected to handle two things:
121
+ # 1. Masking attention on padding tokens
122
+ # 2. Masking cross sequence attention in the case of bin packing
123
+ s_q = attn_weight.size(2)
124
+ s_k = attn_weight.size(3)
125
+ _s_q = max(0, attn_bias.size(2) - s_q)
126
+ _s_k = max(0, attn_bias.size(3) - s_k)
127
+ attn_bias = attn_bias[:, :, _s_q:, _s_k:]
128
+ attn_weight = attn_weight + attn_bias
129
+
130
+ attn_weight = torch.softmax(attn_weight, dim=-1)
131
+
132
+ attn_out = attn_weight.matmul(value)
133
+
134
+ attn_out = (
135
+ affine.rot[..., None]
136
+ .invert()
137
+ .apply(
138
+ rearrange(
139
+ attn_out, "b h s (m d) -> b s (h m) d", m=self.num_vector_messages
140
+ )
141
+ )
142
+ )
143
+
144
+ attn_out = rearrange(
145
+ attn_out, "b s (h m) d -> b s (h m d)", m=self.num_vector_messages
146
+ )
147
+ if self.mask_and_zero_frameless:
148
+ attn_out = attn_out.masked_fill(~affine_mask[..., None], 0.0)
149
+ s = self.out_proj(attn_out)
150
+
151
+ return s
esm/layers/regression_head.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+
3
+
4
+ def RegressionHead(
5
+ d_model: int,
6
+ output_dim: int,
7
+ hidden_dim: int | None = None,
8
+ ) -> nn.Module:
9
+ """Single-hidden layer MLP for supervised output.
10
+
11
+ Args:
12
+ d_model: input dimension
13
+ output_dim: dimensionality of the output.
14
+ hidden_dim: optional dimension of hidden layer, defaults to d_model.
15
+ Returns:
16
+ output MLP module.
17
+ """
18
+ hidden_dim = hidden_dim if hidden_dim is not None else d_model
19
+ return nn.Sequential(
20
+ nn.Linear(d_model, hidden_dim),
21
+ nn.GELU(),
22
+ nn.LayerNorm(hidden_dim),
23
+ nn.Linear(hidden_dim, output_dim),
24
+ )
esm/layers/rotary.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
2
+ #
3
+ # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
4
+ # and OPT implementations in this library. It has been modified from its
5
+ # original forms to accommodate minor architectural differences compared
6
+ # to GPT-NeoX and OPT used by the Meta AI team that trained the model.
7
+ #
8
+ # Licensed under the Apache License, Version 2.0 (the "License");
9
+ # you may not use this file except in compliance with the License.
10
+ # You may obtain a copy of the License at
11
+ #
12
+ # http://www.apache.org/licenses/LICENSE-2.0
13
+ #
14
+ # Unless required by applicable law or agreed to in writing, software
15
+ # distributed under the License is distributed on an "AS IS" BASIS,
16
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
+ # See the License for the specific language governing permissions and
18
+ # limitations under the License.
19
+ # NOTE: this implementation is from LLaMA 2:
20
+ # https://huggingface.co/togethercomputer/LLaMA-2-7B-32K/blob/08639a72e17836184096ae6a7e2766f2a34c3e36/modeling_flash_llama.py#L114
21
+ # Flash attention rotary implementation can be installed like so: `pip install git+https://github.com/HazyResearch/flash-attention.git#subdirectory=csrc/rotary`
22
+
23
+ from typing import Tuple
24
+
25
+ import torch
26
+ from einops import rearrange, repeat
27
+
28
+
29
+ def rotate_half(x, interleaved=False):
30
+ if not interleaved:
31
+ x1, x2 = x.chunk(2, dim=-1)
32
+ return torch.cat((-x2, x1), dim=-1)
33
+ else:
34
+ x1, x2 = x[..., ::2], x[..., 1::2]
35
+ return rearrange(
36
+ torch.stack((-x2, x1), dim=-1), "... d two -> ... (d two)", two=2
37
+ )
38
+
39
+
40
+ def apply_rotary_emb_torch(x, cos, sin, interleaved=False, _inplace=False):
41
+ """
42
+ x: (batch_size, seqlen, nheads, headdim)
43
+ cos, sin: (seqlen, rotary_dim / 2)
44
+ """
45
+ ro_dim = cos.shape[-1] * 2
46
+ assert ro_dim <= x.shape[-1]
47
+ seqlen = x.size(1)
48
+ cos = cos[:seqlen]
49
+ sin = sin[:seqlen]
50
+ cos = repeat(cos, "s d -> s 1 (2 d)")
51
+ sin = repeat(sin, "s d -> s 1 (2 d)")
52
+ return torch.cat(
53
+ [
54
+ x[..., :ro_dim] * cos + rotate_half(x[..., :ro_dim], interleaved) * sin,
55
+ x[..., ro_dim:],
56
+ ],
57
+ dim=-1,
58
+ )
59
+
60
+
61
+ class RotaryEmbedding(torch.nn.Module):
62
+ """
63
+ The rotary position embeddings from RoFormer_ (Su et. al).
64
+ A crucial insight from the method is that the query and keys are
65
+ transformed by rotation matrices which depend on the relative positions.
66
+ Other implementations are available in the Rotary Transformer repo_ and in
67
+ GPT-NeoX_, GPT-NeoX was an inspiration
68
+ .. _RoFormer: https://arxiv.org/abs/2104.09864
69
+ .. _repo: https://github.com/ZhuiyiTechnology/roformer
70
+ .. _GPT-NeoX: https://github.com/EleutherAI/gpt-neox
71
+ If scale_base is not None, this implements XPos (Sun et al., https://arxiv.org/abs/2212.10554).
72
+ A recommended value for scale_base is 512: https://github.com/HazyResearch/flash-attention/issues/96
73
+ Reference: https://github.com/sunyt32/torchscale/blob/main/torchscale/component/xpos_relative_position.py
74
+ """
75
+
76
+ def __init__(
77
+ self,
78
+ dim: int,
79
+ base=10000.0,
80
+ interleaved=False,
81
+ scale_base=None,
82
+ scaling_factor=1.0,
83
+ pos_idx_in_fp32=True,
84
+ device=None,
85
+ ):
86
+ """
87
+ interleaved: if True, rotate pairs of even and odd dimensions (GPT-J style) instead
88
+ of 1st half and 2nd half (GPT-NeoX style).
89
+ pos_idx_in_fp32: if True, the position indices [0.0, ..., seqlen - 1] are in fp32,
90
+ otherwise they might be in lower precision.
91
+ This option was added because previously (before 2023-07-02), when we construct
92
+ the position indices, we use the dtype of self.inv_freq. In most cases this would
93
+ be fp32, but if the model is trained in pure bf16 (not mixed precision), then
94
+ self.inv_freq would be bf16, and the position indices are also in bf16.
95
+ Because of the limited precision of bf16 (e.g. 1995.0 is rounded to 2000.0), the
96
+ embeddings for some positions will coincide.
97
+ To maintain compatibility with models previously trained in pure bf16,
98
+ we add this option.
99
+ scaling_factor: RotaryEmbedding extended with linear scaling.
100
+ """
101
+ super().__init__()
102
+ self.dim = dim
103
+ self.base = float(base)
104
+ self.pos_idx_in_fp32 = pos_idx_in_fp32
105
+ # Generate and save the inverse frequency buffer (non trainable)
106
+ self.interleaved = interleaved
107
+ self.scale_base = scale_base
108
+ self.scaling_factor = scaling_factor
109
+ self.device = device
110
+
111
+ self._seq_len_cached = 0
112
+ self._cos_cached = None
113
+ self._sin_cached = None
114
+ self._cos_k_cached = None
115
+ self._sin_k_cached = None
116
+ self.reset_parameters()
117
+
118
+ def reset_parameters(self):
119
+ inv_freq = self._compute_inv_freq(self.device)
120
+ self.register_buffer("inv_freq", inv_freq, persistent=False)
121
+ arange = torch.arange(0, self.dim, 2, device=self.device, dtype=torch.float32)
122
+ scale = (
123
+ (arange + 0.4 * self.dim) / (1.4 * self.dim)
124
+ if self.scale_base is not None
125
+ else None
126
+ )
127
+ self.register_buffer("scale", scale)
128
+
129
+ def _compute_inv_freq(self, device=None):
130
+ return 1 / (
131
+ self.base
132
+ ** (
133
+ torch.arange(0, self.dim, 2, device=device, dtype=torch.float32)
134
+ / self.dim
135
+ )
136
+ )
137
+
138
+ def _update_cos_sin_cache(self, seqlen, device=None, dtype=None):
139
+ # Reset the tables if the sequence length has changed,
140
+ # if we're on a new device (possibly due to tracing for instance),
141
+ # or if we're switching from inference mode to training
142
+ if (
143
+ seqlen > self._seq_len_cached
144
+ or self._cos_cached is None
145
+ or self._cos_cached.device != device
146
+ or self._cos_cached.dtype != dtype
147
+ or (self.training and self._cos_cached.is_inference())
148
+ ):
149
+ self._seq_len_cached = seqlen
150
+ # We want fp32 here, not self.inv_freq.dtype, since the model could be loaded in bf16
151
+ # And the output of arange can be quite large, so bf16 would lose a lot of precision.
152
+ # However, for compatibility reason, we add an option to use the dtype of self.inv_freq.
153
+ if self.pos_idx_in_fp32:
154
+ t = torch.arange(seqlen, device=device, dtype=torch.float32)
155
+ t /= self.scaling_factor
156
+ # We want fp32 here as well since inv_freq will be multiplied with t, and the output
157
+ # will be large. Having it in bf16 will lose a lot of precision and cause the
158
+ # cos & sin output to change significantly.
159
+ # We want to recompute self.inv_freq if it was not loaded in fp32
160
+ if self.inv_freq.dtype != torch.float32:
161
+ inv_freq = self.inv_freq.to(torch.float32)
162
+ else:
163
+ inv_freq = self.inv_freq
164
+ else:
165
+ t = torch.arange(seqlen, device=device, dtype=self.inv_freq.dtype)
166
+ t /= self.scaling_factor
167
+ inv_freq = self.inv_freq
168
+ # Don't do einsum, it converts fp32 to fp16 under AMP
169
+ # freqs = torch.einsum("i,j->ij", t, self.inv_freq)
170
+ freqs = torch.outer(t, inv_freq)
171
+
172
+ if self.scale is None:
173
+ self._cos_cached = torch.cos(freqs).to(dtype)
174
+ self._sin_cached = torch.sin(freqs).to(dtype)
175
+ else:
176
+ power = (
177
+ torch.arange(
178
+ seqlen, dtype=self.scale.dtype, device=self.scale.device
179
+ )
180
+ - seqlen // 2
181
+ ) / self.scale_base
182
+ scale = self.scale.to(device=power.device) ** power.unsqueeze(-1)
183
+ # We want the multiplication by scale to happen in fp32
184
+ self._cos_cached = (torch.cos(freqs) * scale).to(dtype)
185
+ self._sin_cached = (torch.sin(freqs) * scale).to(dtype)
186
+ self._cos_k_cached = (torch.cos(freqs) / scale).to(dtype)
187
+ self._sin_k_cached = (torch.sin(freqs) / scale).to(dtype)
188
+
189
+ def forward(
190
+ self, q: torch.Tensor, k: torch.Tensor, seqlen_offset: int = 0
191
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
192
+ """
193
+ q: (batch, seqlen, nheads, headdim)
194
+ k: (batch, seqlen, nheads, headdim)
195
+ seqlen_offset: can be used in generation where the qkv being passed in is only the last
196
+ token in the batch.
197
+ """
198
+ self._update_cos_sin_cache(
199
+ q.shape[1] + seqlen_offset, device=q.device, dtype=q.dtype
200
+ )
201
+ assert self._cos_cached is not None
202
+ assert self._sin_cached is not None
203
+ if self.scale is None:
204
+ return (
205
+ apply_rotary_emb_torch(
206
+ q,
207
+ self._cos_cached[seqlen_offset:],
208
+ self._sin_cached[seqlen_offset:],
209
+ self.interleaved,
210
+ True, # inplace=True
211
+ ),
212
+ apply_rotary_emb_torch(
213
+ k,
214
+ self._cos_cached[seqlen_offset:],
215
+ self._sin_cached[seqlen_offset:],
216
+ self.interleaved,
217
+ True, # inplace=True
218
+ ),
219
+ ) # type: ignore
220
+ else:
221
+ assert False
esm/layers/structure_proj.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from esm.utils.constants.physics import (
5
+ BB_COORDINATES,
6
+ )
7
+ from esm.utils.structure.affine3d import (
8
+ Affine3D,
9
+ RotationMatrix,
10
+ )
11
+
12
+
13
+ class Dim6RotStructureHead(nn.Module):
14
+ # Normally, AF2 uses quaternions to specify rotations. There's some evidence that
15
+ # other representations are more well behaved - the best one according to
16
+ # https://openaccess.thecvf.com/content_CVPR_2019/papers/Zhou_On_the_Continuity_of_Rotation_Representations_in_Neural_Networks_CVPR_2019_paper.pdf
17
+ # is using graham schmidt on 2 vectors, which is implemented here.
18
+ def __init__(
19
+ self,
20
+ input_dim: int,
21
+ trans_scale_factor: float = 10,
22
+ norm_type: str = "layernorm",
23
+ activation_fn: str = "esm_gelu",
24
+ predict_torsion_angles: bool = True,
25
+ ):
26
+ super().__init__()
27
+ self.ffn1 = nn.Linear(input_dim, input_dim)
28
+ self.activation_fn = nn.GELU()
29
+ self.norm = nn.LayerNorm(input_dim)
30
+ self.proj = nn.Linear(input_dim, 9 + 7 * 2)
31
+ self.trans_scale_factor = trans_scale_factor
32
+ self.predict_torsion_angles = predict_torsion_angles
33
+ self.bb_local_coords = torch.tensor(BB_COORDINATES).float()
34
+
35
+ def forward(self, x, affine, affine_mask, **kwargs):
36
+ if affine is None:
37
+ rigids = Affine3D.identity(
38
+ x.shape[:-1],
39
+ dtype=x.dtype,
40
+ device=x.device,
41
+ requires_grad=self.training,
42
+ rotation_type=RotationMatrix,
43
+ )
44
+ else:
45
+ rigids = affine
46
+
47
+ # [*, N]
48
+ x = self.ffn1(x)
49
+ x = self.activation_fn(x)
50
+ x = self.norm(x)
51
+ trans, x, y, angles = self.proj(x).split([3, 3, 3, 7 * 2], dim=-1)
52
+ trans = trans * self.trans_scale_factor
53
+ x = x / (x.norm(dim=-1, keepdim=True) + 1e-5)
54
+ y = y / (y.norm(dim=-1, keepdim=True) + 1e-5)
55
+ update = Affine3D.from_graham_schmidt(x + trans, trans, y + trans)
56
+ rigids = rigids.compose(update.mask(affine_mask))
57
+ affine = rigids.tensor
58
+
59
+ # We approximate the positions of the backbone atoms in the global frame by applying the rigid
60
+ # transformation to the mean of the backbone atoms in the local frame.
61
+ all_bb_coords_local = (
62
+ self.bb_local_coords[None, None, :, :]
63
+ .expand(*x.shape[:-1], 3, 3)
64
+ .to(x.device)
65
+ )
66
+ pred_xyz = rigids[..., None].apply(all_bb_coords_local)
67
+
68
+ return affine, pred_xyz
esm/layers/transformer_stack.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+
6
+ from esm.layers.blocks import UnifiedTransformerBlock
7
+ from esm.utils.structure.affine3d import Affine3D
8
+
9
+
10
+ class TransformerStack(nn.Module):
11
+ """
12
+ A stack of transformer blocks used in the ESM-3 model. Each block is a UnifiedTransformerBlock,
13
+ which can either be geometric attention or standard multi-head attention.
14
+
15
+ Args:
16
+ d_model (int): The dimensionality of the input and output feature vectors.
17
+ n_heads (int): The number of attention heads.
18
+ v_heads (int): The number of voting heads.
19
+ n_layers (int): The number of transformer blocks in the stack.
20
+ n_layers_geom (int, optional): The number of transformer blocks that use geometric attention.
21
+ scale_residue (bool, optional): Whether to scale the residue connections in each transformer block.
22
+ mask_and_zero_frameless (bool, optional): Whether to mask and zero frameless positions in the input.
23
+ Only applies in the geometric attention blocks, which is conditioned on the structure
24
+ """
25
+
26
+ def __init__(
27
+ self,
28
+ d_model: int,
29
+ n_heads: int,
30
+ v_heads: int | None,
31
+ n_layers: int,
32
+ n_layers_geom: int = 1,
33
+ scale_residue: bool = True,
34
+ mask_and_zero_frameless: bool = False,
35
+ bias: bool = False,
36
+ qk_layernorm: bool = True,
37
+ ffn_type: str = "swiglu", # swiglu | gelu
38
+ expansion_ratio: float = 8 / 3,
39
+ ):
40
+ super().__init__()
41
+ self.blocks = nn.ModuleList(
42
+ [
43
+ UnifiedTransformerBlock(
44
+ d_model,
45
+ n_heads,
46
+ v_heads=v_heads,
47
+ use_geom_attn=i < n_layers_geom,
48
+ residue_scaling_factor=(
49
+ math.sqrt(n_layers / 36) if scale_residue else 1.0
50
+ ),
51
+ expansion_ratio=expansion_ratio,
52
+ mask_and_zero_frameless=mask_and_zero_frameless,
53
+ bias=bias,
54
+ qk_layernorm=qk_layernorm,
55
+ ffn_type=ffn_type,
56
+ )
57
+ for i in range(n_layers)
58
+ ]
59
+ )
60
+ self.norm = nn.LayerNorm(d_model, bias=False)
61
+
62
+ def forward(
63
+ self,
64
+ x: torch.Tensor,
65
+ sequence_id: torch.Tensor | None = None,
66
+ affine: Affine3D | None = None,
67
+ affine_mask: torch.Tensor | None = None,
68
+ chain_id: torch.Tensor | None = None,
69
+ ) -> tuple[torch.Tensor, torch.Tensor]:
70
+ """
71
+ Forward pass of the TransformerStack.
72
+
73
+ Args:
74
+ x (torch.Tensor): The input tensor of shape (batch_size, sequence_length, d_model).
75
+ sequence_id (torch.Tensor): The sequence ID tensor of shape (batch_size, sequence_length).
76
+ affine (Affine3D | None): The affine transformation tensor or None.
77
+ affine_mask (torch.Tensor | None): The affine mask tensor or None.
78
+ chain_id (torch.Tensor): The protein chain tensor of shape (batch_size, sequence_length).
79
+ Only used in geometric attention.
80
+
81
+ Returns:
82
+ post_norm: The output tensor of shape (batch_size, sequence_length, d_model).
83
+ pre_norm: The embedding of shape (batch_size, sequence_length, d_model).
84
+ """
85
+ *batch_dims, _ = x.shape
86
+ if sequence_id is None:
87
+ sequence_id = torch.ones(
88
+ size=batch_dims, dtype=torch.int64, device=x.device
89
+ )
90
+ if chain_id is None:
91
+ chain_id = torch.ones(size=batch_dims, dtype=torch.int64, device=x.device)
92
+ for block in self.blocks:
93
+ x = block(x, sequence_id, affine, affine_mask, chain_id)
94
+ return self.norm(x), x
esm/models/esm3.py ADDED
@@ -0,0 +1,798 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ from functools import partial
5
+
6
+ import attr
7
+ import einops
8
+ import torch
9
+ import torch.nn as nn
10
+ from attr import dataclass
11
+
12
+ from esm.layers.regression_head import RegressionHead
13
+ from esm.layers.transformer_stack import TransformerStack
14
+ from esm.models.function_decoder import FunctionTokenDecoder
15
+ from esm.models.vqvae import (
16
+ StructureTokenDecoder,
17
+ StructureTokenEncoder,
18
+ )
19
+ from esm.sdk.api import (
20
+ ESM3InferenceClient,
21
+ ESMProtein,
22
+ ESMProteinTensor,
23
+ ForwardAndSampleOutput,
24
+ ForwardConfig,
25
+ ForwardOutput,
26
+ ForwardTrackData,
27
+ GenerationConfig,
28
+ ProteinType,
29
+ ReturnLogitsConfig,
30
+ SamplingConfig,
31
+ SamplingTrackConfig,
32
+ )
33
+ from esm.tokenization import get_model_tokenizers
34
+ from esm.utils import encoding
35
+ from esm.utils.constants import esm3 as C
36
+ from esm.utils.constants.models import ESM3_OPEN_SMALL
37
+ from esm.utils.decoding import decode_protein_tensor
38
+ from esm.utils.generation import (
39
+ iterative_sampling_raw,
40
+ iterative_sampling_tokens,
41
+ )
42
+ from esm.utils.misc import rbf
43
+ from esm.utils.sampling import (
44
+ get_default_sampling_config,
45
+ sample_function_logits,
46
+ sample_logits,
47
+ sample_residue_annotation_logits,
48
+ )
49
+ from esm.utils.structure.affine3d import (
50
+ build_affine3d_from_coordinates,
51
+ )
52
+
53
+
54
+ @dataclass
55
+ class ESMOutput:
56
+ sequence_logits: torch.Tensor
57
+ structure_logits: torch.Tensor
58
+ secondary_structure_logits: torch.Tensor
59
+ sasa_logits: torch.Tensor
60
+ function_logits: torch.Tensor
61
+ residue_logits: torch.Tensor
62
+ embeddings: torch.Tensor
63
+
64
+
65
+ class EncodeInputs(nn.Module):
66
+ """
67
+ Module for encoding input features in the ESM-3 model.
68
+
69
+ Args:
70
+ d_model (int): The dimensionality of the model's hidden states.
71
+ """
72
+
73
+ def __init__(self, d_model: int):
74
+ super().__init__()
75
+
76
+ # Sequence
77
+ self.sequence_embed = nn.Embedding(64, d_model)
78
+ # Mandatory information
79
+ self.plddt_projection = nn.Linear(16, d_model)
80
+ self.structure_per_res_plddt_projection = nn.Linear(16, d_model)
81
+
82
+ # Structure
83
+ self.structure_tokens_embed = nn.Embedding(4096 + 5, d_model)
84
+
85
+ # "Structural" features
86
+ self.ss8_embed = nn.Embedding(8 + 3, d_model)
87
+ self.sasa_embed = nn.Embedding(16 + 3, d_model)
88
+
89
+ # "Functional" features
90
+ self.function_embed = nn.ModuleList(
91
+ [nn.Embedding(260, d_model // 8, padding_idx=0) for _ in range(8)]
92
+ )
93
+
94
+ self.residue_embed = nn.EmbeddingBag(1478, d_model, mode="sum", padding_idx=0)
95
+
96
+ def forward(
97
+ self,
98
+ sequence_tokens: torch.Tensor,
99
+ structure_tokens: torch.Tensor,
100
+ average_plddt: torch.Tensor,
101
+ per_res_plddt: torch.Tensor,
102
+ ss8_tokens: torch.Tensor,
103
+ sasa_tokens: torch.Tensor,
104
+ function_tokens: torch.Tensor,
105
+ residue_annotation_tokens: torch.Tensor,
106
+ ) -> torch.Tensor:
107
+ sequence_embed = self.sequence_embed(sequence_tokens)
108
+
109
+ rbf_16_fn = partial(rbf, v_min=0.0, v_max=1.0, n_bins=16)
110
+ # the `masked_fill(padding_mask.unsqueeze(2), 0)` for the two below is unnecessary
111
+ # as pad tokens never even interact with the "real" tokens (due to sequence_id)
112
+ plddt_embed = self.plddt_projection(rbf_16_fn(average_plddt))
113
+ structure_per_res_plddt = self.structure_per_res_plddt_projection(
114
+ rbf_16_fn(per_res_plddt)
115
+ )
116
+
117
+ # Structure + "structural features" embeds
118
+ structure_embed = self.structure_tokens_embed(structure_tokens)
119
+ ss8_embed = self.ss8_embed(ss8_tokens)
120
+ sasa_embed = self.sasa_embed(sasa_tokens)
121
+
122
+ # "Functional" features embeds
123
+ function_embed = torch.cat(
124
+ [
125
+ embed_fn(funcs)
126
+ for embed_fn, funcs in zip(
127
+ self.function_embed, function_tokens.unbind(-1)
128
+ )
129
+ ],
130
+ -1,
131
+ )
132
+
133
+ # Residue embeds
134
+ B, L, N = residue_annotation_tokens.shape
135
+ residue_embed = self.residue_embed(
136
+ einops.rearrange(
137
+ residue_annotation_tokens, "B L N -> (B L) N", B=B, L=L, N=N
138
+ )
139
+ )
140
+ residue_embed = einops.rearrange(residue_embed, "(B L) D -> B L D", B=B, L=L)
141
+
142
+ return (
143
+ sequence_embed
144
+ + plddt_embed
145
+ + structure_per_res_plddt
146
+ + structure_embed
147
+ + ss8_embed
148
+ + sasa_embed
149
+ + function_embed
150
+ + residue_embed
151
+ )
152
+
153
+
154
+ class OutputHeads(nn.Module):
155
+ def __init__(self, d_model: int):
156
+ super().__init__()
157
+ self.sequence_head = RegressionHead(d_model, 64)
158
+ self.structure_head = RegressionHead(d_model, 4096)
159
+ self.ss8_head = RegressionHead(d_model, 8 + 3)
160
+ self.sasa_head = RegressionHead(d_model, 16 + 3)
161
+ self.function_head = RegressionHead(d_model, 260 * 8)
162
+ self.residue_head = RegressionHead(d_model, 1478)
163
+
164
+ def forward(self, x: torch.Tensor, embed: torch.Tensor) -> ESMOutput:
165
+ sequence_logits = self.sequence_head(x)
166
+ structure_logits = self.structure_head(x)
167
+ secondary_structure_logits = self.ss8_head(x)
168
+ sasa_logits = self.sasa_head(x)
169
+ function_logits = self.function_head(x)
170
+ function_logits = einops.rearrange(
171
+ function_logits,
172
+ "... (k v) -> ... k v",
173
+ k=8,
174
+ )
175
+
176
+ residue_logits = self.residue_head(x)
177
+
178
+ return ESMOutput(
179
+ sequence_logits=sequence_logits,
180
+ structure_logits=structure_logits,
181
+ secondary_structure_logits=secondary_structure_logits,
182
+ sasa_logits=sasa_logits,
183
+ function_logits=function_logits,
184
+ residue_logits=residue_logits,
185
+ embeddings=embed,
186
+ )
187
+
188
+
189
+ class ESM3(nn.Module, ESM3InferenceClient):
190
+ """
191
+ ESM3 model implementation.
192
+
193
+ Args:
194
+ d_model (int): The dimensionality of the input and output feature vectors.
195
+ n_heads (int): The number of attention heads in the transformer layers.
196
+ v_heads (int): The number of attention heads in the variational transformer layers.
197
+ n_layers (int): The number of transformer layers.
198
+ """
199
+
200
+ def __init__(
201
+ self,
202
+ d_model: int,
203
+ n_heads: int,
204
+ v_heads: int,
205
+ n_layers: int,
206
+ structure_encoder_name: str,
207
+ structure_decoder_name: str,
208
+ function_decoder_name: str,
209
+ ):
210
+ super().__init__()
211
+ self.encoder = EncodeInputs(d_model)
212
+ self.transformer = TransformerStack(
213
+ d_model,
214
+ n_heads,
215
+ v_heads,
216
+ n_layers,
217
+ mask_and_zero_frameless=True,
218
+ )
219
+ self.output_heads = OutputHeads(d_model)
220
+
221
+ self.structure_encoder_name = structure_encoder_name
222
+ self.structure_decoder_name = structure_decoder_name
223
+ self.function_decoder_name = function_decoder_name
224
+
225
+ self.structure_encoder: StructureTokenEncoder | None = None # type: ignore
226
+ self.structure_decoder: StructureTokenDecoder | None = None # type: ignore
227
+ self.function_decoder: FunctionTokenDecoder | None = None # type: ignore
228
+
229
+ self.tokenizers = get_model_tokenizers(ESM3_OPEN_SMALL)
230
+
231
+ @classmethod
232
+ def from_pretrained(
233
+ cls,
234
+ model_name: str = ESM3_OPEN_SMALL,
235
+ device: torch.device | str = "cpu",
236
+ ) -> ESM3:
237
+ from esm.pretrained import load_local_model
238
+ if model_name not in [ESM3_OPEN_SMALL]:
239
+ raise ValueError(f"Model name {model_name} is not a valid ESM3 model name.")
240
+ model: ESM3 = load_local_model(model_name, device=device) # type: ignore
241
+ return model
242
+
243
+ def get_structure_token_encoder(self) -> StructureTokenEncoder:
244
+ if self.structure_encoder is None:
245
+ self.structure_encoder = self.load_model(self.structure_encoder_name) # type: ignore
246
+ return self.structure_encoder # type: ignore
247
+
248
+ def get_structure_token_decoder(self) -> StructureTokenDecoder:
249
+ if self.structure_decoder is None:
250
+ self.structure_decoder = self.load_model(self.structure_decoder_name) # type: ignore
251
+ return self.structure_decoder # type: ignore
252
+
253
+ def get_function_token_decoder(self) -> FunctionTokenDecoder:
254
+ if self.function_decoder is None:
255
+ self.function_decoder = self.load_model(self.function_decoder_name) # type: ignore
256
+ return self.function_decoder # type: ignore
257
+
258
+ def load_model(self, model_name: str):
259
+ # Lazy import from pretrained
260
+ from esm.pretrained import load_local_model
261
+
262
+ return load_local_model(model_name, device=next(self.parameters()).device)
263
+
264
+ def forward(
265
+ self,
266
+ *,
267
+ sequence_tokens: torch.Tensor | None = None,
268
+ structure_tokens: torch.Tensor | None = None,
269
+ ss8_tokens: torch.Tensor | None = None,
270
+ sasa_tokens: torch.Tensor | None = None,
271
+ function_tokens: torch.Tensor | None = None,
272
+ residue_annotation_tokens: torch.Tensor | None = None,
273
+ average_plddt: torch.Tensor | None = None,
274
+ per_res_plddt: torch.Tensor | None = None,
275
+ structure_coords: torch.Tensor | None = None,
276
+ chain_id: torch.Tensor | None = None,
277
+ sequence_id: torch.Tensor | None = None,
278
+ ) -> ESMOutput:
279
+ """
280
+ Performs forward pass through the ESM3 model. Check utils to see how to tokenize inputs from raw data.
281
+
282
+ Args:
283
+ sequence_tokens (torch.Tensor, optional): The amino acid tokens.
284
+ structure_tokens (torch.Tensor, optional): The structure tokens.
285
+ ss8_tokens (torch.Tensor, optional): The secondary structure tokens.
286
+ sasa_tokens (torch.Tensor, optional): The solvent accessible surface area tokens.
287
+ function_tokens (torch.Tensor, optional): The function tokens.
288
+ residue_annotation_tokens (torch.Tensor, optional): The residue annotation tokens.
289
+ average_plddt (torch.Tensor, optional): The average plddt across the entire sequence.
290
+ per_res_plddt (torch.Tensor, optional): The per residue plddt, if you want to specify exact plddts, use this,
291
+ otherwise, use average_plddt.
292
+ structure_coords (torch.Tensor, optional): The structure coordinates, in the form of (B, L, 3, 3).
293
+ chain_id (torch.Tensor, optional): The chain ID
294
+ sequence_id (torch.Tensor, optional): The sequence ID.
295
+
296
+ Returns:
297
+ ESMOutput: The output of the ESM3 model.
298
+
299
+ Raises:
300
+ ValueError: If at least one of the inputs is None.
301
+
302
+ """
303
+ # Reasonable defaults:
304
+ try:
305
+ L, device = next(
306
+ (x.shape[1], x.device)
307
+ for x in [
308
+ sequence_tokens,
309
+ structure_tokens,
310
+ ss8_tokens,
311
+ sasa_tokens,
312
+ structure_coords,
313
+ function_tokens,
314
+ residue_annotation_tokens,
315
+ ]
316
+ if x is not None
317
+ )
318
+ except StopIteration:
319
+ raise ValueError("At least one of the inputs must be non-None")
320
+
321
+ t = self.tokenizers
322
+ defaults = lambda x, tok: (
323
+ torch.full((1, L), tok, dtype=torch.long, device=device) if x is None else x
324
+ )
325
+ sequence_tokens = defaults(sequence_tokens, t.sequence.mask_token_id)
326
+ ss8_tokens = defaults(ss8_tokens, C.SS8_UNK_TOKEN)
327
+ sasa_tokens = defaults(sasa_tokens, C.SASA_UNK_TOKEN)
328
+ average_plddt = defaults(average_plddt, 1).float()
329
+ per_res_plddt = defaults(per_res_plddt, 0).float()
330
+ chain_id = defaults(chain_id, 0)
331
+ sequence_id = defaults(sequence_id, 0)
332
+
333
+ if residue_annotation_tokens is None:
334
+ residue_annotation_tokens = torch.full(
335
+ (1, L, 16), C.RESIDUE_PAD_TOKEN, dtype=torch.long, device=device
336
+ )
337
+
338
+ if function_tokens is None:
339
+ function_tokens = torch.full(
340
+ (1, L, 8), C.INTERPRO_PAD_TOKEN, dtype=torch.long, device=device
341
+ )
342
+
343
+ if structure_coords is None:
344
+ structure_coords = torch.full(
345
+ (1, L, 3, 3), float("nan"), dtype=torch.float, device=device
346
+ )
347
+
348
+ structure_coords = structure_coords[
349
+ ..., :3, :
350
+ ] # In case we pass in an atom14 or atom37 repr
351
+ affine, affine_mask = build_affine3d_from_coordinates(structure_coords)
352
+
353
+ if structure_tokens is None:
354
+ _, structure_tokens = self.get_structure_token_encoder().encode(
355
+ structure_coords
356
+ )
357
+ assert structure_tokens is not None
358
+ structure_tokens = (
359
+ structure_tokens.masked_fill(
360
+ (structure_tokens == -1) | ~affine_mask, C.STRUCTURE_MASK_TOKEN
361
+ )
362
+ .masked_fill(sequence_tokens == C.SEQUENCE_BOS_TOKEN, C.STRUCTURE_BOS_TOKEN)
363
+ .masked_fill(sequence_tokens == C.SEQUENCE_PAD_TOKEN, C.STRUCTURE_PAD_TOKEN)
364
+ .masked_fill(sequence_tokens == C.SEQUENCE_EOS_TOKEN, C.STRUCTURE_EOS_TOKEN)
365
+ .masked_fill(
366
+ sequence_tokens == C.SEQUENCE_CHAINBREAK_TOKEN,
367
+ C.STRUCTURE_CHAINBREAK_TOKEN,
368
+ )
369
+ )
370
+
371
+ x = self.encoder(
372
+ sequence_tokens,
373
+ structure_tokens,
374
+ average_plddt,
375
+ per_res_plddt,
376
+ ss8_tokens,
377
+ sasa_tokens,
378
+ function_tokens,
379
+ residue_annotation_tokens,
380
+ )
381
+ x, embedding = self.transformer(x, sequence_id, affine, affine_mask, chain_id)
382
+ return self.output_heads(x, embedding)
383
+
384
+ # The following methods are for the ESM3InferenceClient interface
385
+ def generate(self, input: ProteinType, config: GenerationConfig) -> ProteinType:
386
+ if isinstance(input, ESMProtein):
387
+ return iterative_sampling_raw(self, input, config)
388
+ elif isinstance(input, ESMProteinTensor):
389
+ return iterative_sampling_tokens(self, input, config, self.tokenizers)
390
+ else:
391
+ raise ValueError("Input must be an ESMProtein or ESMProteinTensor")
392
+
393
+ def encode(self, input: ESMProtein) -> ESMProteinTensor:
394
+ input = attr.evolve(input) # Make a copy
395
+
396
+ sequence_tokens = None
397
+ structure_tokens = None
398
+ secondary_structure_tokens = None
399
+ sasa_tokens = None
400
+ function_tokens = None
401
+ residue_annotation_tokens = None
402
+
403
+ coordinates = None
404
+
405
+ if input.sequence is not None:
406
+ sequence_tokens = encoding.tokenize_sequence(
407
+ input.sequence, self.tokenizers.sequence, add_special_tokens=True
408
+ )
409
+ if input.secondary_structure is not None:
410
+ secondary_structure_tokens = encoding.tokenize_secondary_structure(
411
+ input.secondary_structure,
412
+ self.tokenizers.secondary_structure,
413
+ add_special_tokens=True,
414
+ )
415
+ if input.sasa is not None:
416
+ sasa_tokens = encoding.tokenize_sasa(
417
+ input.sasa, self.tokenizers.sasa, add_special_tokens=True
418
+ )
419
+
420
+ # Infer input length
421
+ sequence_length = -1
422
+ if sequence_tokens is not None:
423
+ sequence_length = len(sequence_tokens)
424
+ elif secondary_structure_tokens is not None:
425
+ sequence_length = len(secondary_structure_tokens)
426
+ elif sasa_tokens is not None:
427
+ sequence_length = len(sasa_tokens)
428
+
429
+ # Try to infer input length from structure data
430
+ if input.coordinates is not None:
431
+ coordinates, _, structure_tokens = encoding.tokenize_structure(
432
+ input.coordinates,
433
+ self.get_structure_token_encoder(),
434
+ structure_tokenizer=self.tokenizers.structure,
435
+ reference_sequence=input.sequence or "",
436
+ add_special_tokens=True,
437
+ )
438
+ if sequence_length == -1:
439
+ sequence_length = len(structure_tokens)
440
+
441
+ if sequence_length == -1:
442
+ raise ValueError(
443
+ "Cannot infer input length from input data. Please provide one of: sequence, structure, secondary_structure, sasa.\n"
444
+ "To condition on sequence length only, use ESM3LocalInferenceClient.get_default_sequence(sequence_length) to generate a default sequence input."
445
+ )
446
+
447
+ # Function and Residue annotations
448
+ if input.function_annotations is not None:
449
+ if input.sequence is None:
450
+ reference_sequence = encoding.get_default_sequence(sequence_length - 2)
451
+ else:
452
+ reference_sequence = input.sequence
453
+ (
454
+ function_tokens,
455
+ residue_annotation_tokens,
456
+ ) = encoding.tokenize_function_annotations(
457
+ input.function_annotations,
458
+ reference_sequence=reference_sequence,
459
+ function_tokenizer=self.tokenizers.function,
460
+ residue_annotation_tokenizer=self.tokenizers.residue_annotations,
461
+ add_special_tokens=True,
462
+ )
463
+
464
+ return ESMProteinTensor(
465
+ sequence=sequence_tokens,
466
+ structure=structure_tokens,
467
+ secondary_structure=secondary_structure_tokens,
468
+ sasa=sasa_tokens,
469
+ function=function_tokens,
470
+ residue_annotations=residue_annotation_tokens,
471
+ coordinates=coordinates,
472
+ ).to(next(self.parameters()).device)
473
+
474
+ def decode(
475
+ self,
476
+ input: ESMProteinTensor,
477
+ ) -> ESMProtein:
478
+ return decode_protein_tensor(
479
+ input=input,
480
+ tokenizers=self.tokenizers,
481
+ structure_token_decoder=self.get_structure_token_decoder(),
482
+ function_token_decoder=self.get_function_token_decoder(),
483
+ )
484
+
485
+ def _forward(
486
+ self, input: ESMProteinTensor, config: ForwardConfig = ForwardConfig()
487
+ ) -> ForwardOutput:
488
+ # Default plddt conditioning for inference. 1s where coordinates are provided.
489
+ if input.coordinates is None:
490
+ per_res_plddt = None
491
+ else:
492
+ # 1.0 if all coordinates at specific indices have valid non-nan values.
493
+ per_res_plddt = input.coordinates.isfinite().all(dim=-1).any(dim=-1).float()
494
+
495
+ with torch.no_grad() if self.eval else contextlib.nullcontext():
496
+ output = self.forward(
497
+ sequence_tokens=input.sequence,
498
+ structure_tokens=input.structure,
499
+ ss8_tokens=input.secondary_structure,
500
+ sasa_tokens=input.sasa,
501
+ function_tokens=input.function,
502
+ residue_annotation_tokens=input.residue_annotations,
503
+ average_plddt=torch.tensor(1.0, device=input.device),
504
+ per_res_plddt=per_res_plddt,
505
+ structure_coords=input.coordinates,
506
+ chain_id=None,
507
+ sequence_id=None,
508
+ )
509
+
510
+ if config.return_logits:
511
+ logits = ForwardTrackData(
512
+ sequence=output.sequence_logits,
513
+ structure=output.structure_logits,
514
+ secondary_structure=output.secondary_structure_logits,
515
+ sasa=output.sasa_logits,
516
+ function=output.function_logits,
517
+ )
518
+ else:
519
+ logits = None
520
+
521
+ return ForwardOutput(
522
+ logits=logits,
523
+ residue_annotation_logits=output.residue_logits,
524
+ embeddings=output.embeddings if config.return_embeddings else None,
525
+ )
526
+
527
+ def forward_and_sample(
528
+ self, input: ESMProteinTensor, sampling_configuration: SamplingConfig
529
+ ) -> ForwardAndSampleOutput:
530
+ protein_tensor = attr.evolve(input) # Make a copy
531
+
532
+ def maybe_clone(x: torch.Tensor | None) -> torch.Tensor | None:
533
+ return x.clone() if x is not None else None
534
+
535
+ device = next(self.parameters()).device
536
+
537
+ sampling_config = sampling_configuration
538
+ if sampling_config is None:
539
+ sampling_config = get_default_sampling_config(self.tokenizers)
540
+
541
+ # Initialize default values for missing tracks
542
+ default_protein_tensor = ESMProteinTensor.empty(
543
+ len(input) - 2, tokenizers=self.tokenizers, device=input.device
544
+ )
545
+ for track in attr.fields(ESMProteinTensor):
546
+ if getattr(protein_tensor, track.name, None) is None:
547
+ setattr(
548
+ protein_tensor,
549
+ track.name,
550
+ getattr(default_protein_tensor, track.name, None),
551
+ )
552
+
553
+ # Preprocessing
554
+ sequence_length: int = -1
555
+ for track in [
556
+ "sequence",
557
+ "structure",
558
+ "secondary_structure",
559
+ "sasa",
560
+ "function",
561
+ "residue_annotations",
562
+ ]:
563
+ input_tensor: torch.Tensor | None = getattr(protein_tensor, track, None)
564
+ if input_tensor is not None:
565
+ # Add batch dimension if necessary
566
+ if track in ["sequence", "structure", "secondary_structure", "sasa"]:
567
+ if len(input_tensor.size()) == 1:
568
+ input_tensor = input_tensor.unsqueeze(0) # (L,) -> (1, L)
569
+ elif track in ["function", "residue_annotations"]:
570
+ if len(input_tensor.size()) == 2:
571
+ input_tensor = input_tensor.unsqueeze(0) # (L, O) -> (1, L, O)
572
+
573
+ # Check length consistency
574
+ if sequence_length == -1:
575
+ sequence_length = input_tensor.size(1)
576
+ else:
577
+ if input_tensor.size(1) != sequence_length:
578
+ raise ValueError(
579
+ f"Length mismatch for track {track}. Expected {sequence_length}, got {input_tensor.size(1)}"
580
+ )
581
+
582
+ # Move input tensor to model device
583
+ input_tensor = input_tensor.to(device)
584
+ setattr(protein_tensor, track, input_tensor)
585
+
586
+ if protein_tensor.coordinates is not None:
587
+ coordinates = protein_tensor.coordinates
588
+ if len(coordinates.size()) == 3:
589
+ coordinates = coordinates.unsqueeze(0)
590
+ protein_tensor.coordinates = coordinates.to(device)
591
+ sequence_length = coordinates.size(1)
592
+
593
+ if sequence_length == -1:
594
+ raise ValueError("No input data provided")
595
+
596
+ # Forward pass
597
+ forward_output = self._forward(
598
+ protein_tensor,
599
+ ForwardConfig(
600
+ ReturnLogitsConfig(
601
+ sequence=True,
602
+ structure=True,
603
+ secondary_structure=True,
604
+ sasa=True,
605
+ function=True,
606
+ residue_annotations=True,
607
+ ),
608
+ return_embeddings=True,
609
+ ),
610
+ )
611
+
612
+ # Sampling
613
+ tokens_dir = {}
614
+ track_sampling_metadata_dir: dict[str, dict | None] = {}
615
+ for track in ["sequence", "structure", "secondary_structure", "sasa"]:
616
+ config = getattr(sampling_config, track)
617
+ if config is None:
618
+ tokens_dir[track] = maybe_clone(getattr(input, track))
619
+ continue
620
+ sampling_metadata = self._sample_track(
621
+ logits=getattr(forward_output.logits, track)[0, ...],
622
+ tokens=getattr(protein_tensor, track)[0, ...],
623
+ sampling_track_config=config,
624
+ mask_idx=getattr(self.tokenizers, track).mask_token_id,
625
+ )
626
+ tokens_dir[track] = sampling_metadata.pop("sampled_tokens") # (L,)
627
+ track_sampling_metadata_dir[track] = sampling_metadata
628
+
629
+ # Sample function and residue annotations separately
630
+ config = getattr(sampling_config, "function")
631
+ if config is None:
632
+ tokens_dir["function"] = maybe_clone(getattr(input, "function"))
633
+ tokens_dir["residue_annotations"] = maybe_clone(
634
+ getattr(input, "residue_annotations")
635
+ )
636
+ else:
637
+ sampling_metadata = self._sample_function_track(
638
+ tokens=getattr(protein_tensor, "function")[0, ...],
639
+ logits=getattr(forward_output.logits, "function")[0, ...],
640
+ sampling_track_config=config,
641
+ )
642
+ tokens_dir["function"] = sampling_metadata.pop("sampled_tokens") # (L, D)
643
+ track_sampling_metadata_dir["function"] = sampling_metadata
644
+
645
+ sampled_tokens, _ = sample_residue_annotation_logits(
646
+ logits=forward_output.residue_annotation_logits[0, ...] # type: ignore
647
+ )
648
+ tokens_dir["residue_annotations"] = sampled_tokens # (L, MAX_R)
649
+
650
+ # Format output
651
+ forward_and_sample_output_dir = {}
652
+ forward_and_sample_output_dir["protein_tensor"] = ESMProteinTensor(**tokens_dir)
653
+ for property in [
654
+ "entropy",
655
+ "prob",
656
+ "logprob",
657
+ "top_prob",
658
+ "topk_logprob",
659
+ "topk_tokens",
660
+ ]:
661
+ is_all_none = True
662
+ forward_track_data_dir = {}
663
+ for track in track_sampling_metadata_dir.keys():
664
+ values = track_sampling_metadata_dir[track]
665
+ if values is not None and values.get(property, None) is not None:
666
+ forward_track_data_dir[track] = values.get(property, None)
667
+ is_all_none = False
668
+ if not is_all_none:
669
+ forward_and_sample_output_dir[property] = ForwardTrackData(
670
+ **forward_track_data_dir
671
+ )
672
+ else:
673
+ forward_and_sample_output_dir[property] = None
674
+
675
+ perres_embed = (
676
+ forward_output.embeddings[0] # type: ignore
677
+ if sampling_configuration.return_per_residue_embeddings
678
+ else None
679
+ )
680
+ mean_embedding = (
681
+ forward_output.embeddings[0].mean(1) # type: ignore
682
+ if sampling_configuration.return_per_residue_embeddings
683
+ else None
684
+ )
685
+
686
+ return ForwardAndSampleOutput(
687
+ per_residue_embedding=perres_embed,
688
+ mean_embedding=mean_embedding,
689
+ **forward_and_sample_output_dir,
690
+ )
691
+
692
+ def _sample_track(
693
+ self,
694
+ logits: torch.Tensor,
695
+ tokens: torch.Tensor,
696
+ sampling_track_config: SamplingTrackConfig,
697
+ mask_idx: int,
698
+ ) -> dict[str, torch.Tensor]:
699
+ # Sample in all positions
700
+ temperature = sampling_track_config.temperature
701
+ sampled_tokens = sample_logits(
702
+ logits, temperature=temperature, top_p=sampling_track_config.top_p
703
+ )
704
+ log_probs = logits.log_softmax(-1)
705
+
706
+ # Do not sample at BOS and EOS tokens
707
+ sampling_mask = torch.ones_like(tokens, dtype=torch.bool) # (L, )
708
+ sampling_mask[0] = False
709
+ sampling_mask[-1] = False
710
+
711
+ # Do not sample at special token positions but allow sampling at mask token
712
+ special_minus_mask = list(set(sampling_track_config.invalid_ids) - {mask_idx})
713
+ if len(special_minus_mask) > 0:
714
+ special_tokens = torch.tensor(special_minus_mask, device=tokens.device)
715
+ assert special_tokens.numel() > 0
716
+ sampling_mask = sampling_mask & (
717
+ tokens[..., None] != special_tokens[None, :]
718
+ ).all(-1)
719
+
720
+ # Keep only samples from masked positions (if specified)
721
+ if sampling_track_config.only_sample_masked_tokens:
722
+ masked_tokens = tokens == mask_idx
723
+ sampling_mask = sampling_mask & masked_tokens
724
+ sampled_tokens = torch.where(sampling_mask, sampled_tokens, tokens)
725
+
726
+ return self._compute_track_metadata(
727
+ sampled_tokens,
728
+ log_probs,
729
+ sampling_mask,
730
+ top_k=sampling_track_config.topk_logprobs,
731
+ )
732
+
733
+ def _sample_function_track(
734
+ self,
735
+ tokens: torch.Tensor,
736
+ logits: torch.Tensor,
737
+ sampling_track_config: SamplingTrackConfig,
738
+ ) -> dict[str, torch.Tensor]:
739
+ # Do not sample at BOS and EOS tokens
740
+ sampling_mask = torch.ones_like(tokens, dtype=torch.bool)
741
+ sampling_mask[0] = False
742
+ sampling_mask[-1] = False
743
+
744
+ sampled_tokens, probs = sample_function_logits(
745
+ logits,
746
+ self.tokenizers.function,
747
+ top_p=sampling_track_config.top_p,
748
+ temperature=sampling_track_config.temperature,
749
+ )
750
+
751
+ if sampling_track_config.only_sample_masked_tokens:
752
+ raise ValueError(
753
+ "Sampling only masked tokens is undefined for function tokens."
754
+ )
755
+
756
+ sampled_tokens = torch.where(sampling_mask, sampled_tokens, tokens) # (L, D)
757
+
758
+ return self._compute_track_metadata(
759
+ sampled_tokens,
760
+ probs,
761
+ sampling_mask,
762
+ top_k=sampling_track_config.topk_logprobs,
763
+ )
764
+
765
+ @staticmethod
766
+ def _compute_track_metadata(
767
+ sampled_tokens: torch.Tensor,
768
+ log_probs: torch.Tensor,
769
+ sampling_mask: torch.Tensor,
770
+ top_k: int,
771
+ ) -> dict:
772
+ probs = torch.exp(log_probs) # (B, L)
773
+ entropy = torch.distributions.Categorical(probs=probs).entropy() # (B, L)
774
+
775
+ # Only compute probabilities for sampled tokens
776
+ sampled_logprob = torch.zeros_like(
777
+ sampled_tokens, dtype=torch.float32
778
+ ) # (B, L)
779
+ sampled_tokens_valid = sampled_tokens[sampling_mask]
780
+ sampled_log_probs_valid = log_probs[sampling_mask, sampled_tokens_valid]
781
+ sampled_logprob[sampling_mask] = sampled_log_probs_valid
782
+
783
+ # Calculate extra metadata
784
+ sampled_prob = torch.exp(sampled_logprob)
785
+ top_prob = torch.max(probs, dim=-1).values
786
+ topk_logprobs, topk_tokens = torch.topk(log_probs, top_k, dim=-1)
787
+ topk_logprobs = None if top_k == 0 else topk_logprobs
788
+ topk_tokens = None if top_k == 0 else topk_tokens
789
+
790
+ return {
791
+ "entropy": entropy,
792
+ "sampled_tokens": sampled_tokens,
793
+ "prob": sampled_prob,
794
+ "logprob": sampled_logprob,
795
+ "top_prob": top_prob,
796
+ "topk_logprob": topk_logprobs,
797
+ "topk_tokens": topk_tokens,
798
+ }
esm/models/function_decoder.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Function Token Decoder."""
2
+ from collections import defaultdict
3
+ from dataclasses import dataclass, field
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ import torch
8
+ import torch.nn as nn
9
+ import torch.nn.functional as F
10
+
11
+ from esm.layers.regression_head import RegressionHead
12
+ from esm.layers.transformer_stack import TransformerStack
13
+ from esm.tokenization.function_tokenizer import (
14
+ InterProQuantizedTokenizer,
15
+ )
16
+ from esm.utils.constants import esm3 as C
17
+ from esm.utils.misc import merge_ranges
18
+ from esm.utils.types import FunctionAnnotation
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class FunctionTokenDecoderConfig:
23
+ """Configures function token decoder."""
24
+
25
+ # Embedding dimension of decoder.
26
+ d_model: int = 1024
27
+ # Number of attention heads of decoder.
28
+ n_heads: int = 8
29
+ # Number of layers of decoder.
30
+ n_layers: int = 3
31
+ # Number of integer values that function tokens may assume.
32
+ function_token_vocab_size: int = 260
33
+ # Number of function tokens at each position.
34
+ function_token_depth: int = 8
35
+ # Number of InterPro labels that can be decoded.
36
+ num_interpro_classes: int = 29026
37
+ # Number of function keywords that can be decoded.
38
+ keyword_vocabulary_size: int = 58641
39
+ # List of supported InterPro ids.
40
+ interpro_entry_list: str = field(
41
+ default_factory=lambda: str(C.data_root() / C.INTERPRO_ENTRY)
42
+ )
43
+ # Path to keywords vocabulary.
44
+ keyword_vocabulary_path: str = field(
45
+ default_factory=lambda: str(C.data_root() / C.KEYWORDS_VOCABULARY)
46
+ )
47
+ # Whether to unpack LSH bits into single-bit tokens.
48
+ unpack_lsh_bits: bool = True
49
+ # The number of special tokens in the function tokenizer vocabulary which come
50
+ # before the LSH tokens.
51
+ num_special_tokens: int = 4
52
+ # The number of bits per LSH token in the function tokenizer.
53
+ bits_per_token: int = 8
54
+
55
+
56
+ class FunctionTokenDecoder(nn.Module):
57
+ def __init__(self, config: FunctionTokenDecoderConfig | None = None):
58
+ """Constructs function token decoder."""
59
+ super().__init__()
60
+ if config is None:
61
+ config = FunctionTokenDecoderConfig()
62
+ self.config = config
63
+
64
+ # Get the supported set of InterPro ids.
65
+ df = pd.read_csv(config.interpro_entry_list, sep="\t")
66
+ self.interpro_ids = sorted(df.ENTRY_AC)
67
+ self.interpro2index = {
68
+ interpro_id: i for i, interpro_id in enumerate(self.interpro_ids)
69
+ }
70
+ assert len(self.interpro_ids) == config.num_interpro_classes
71
+
72
+ with open(config.keyword_vocabulary_path, "r") as f:
73
+ self.keywords_vocabulary: list[str] = list(f.read().strip().split("\n"))
74
+ assert len(self.keywords_vocabulary) == config.keyword_vocabulary_size
75
+
76
+ if config.unpack_lsh_bits:
77
+ vocab_size = 2 * config.function_token_depth * config.bits_per_token
78
+ else:
79
+ # Function-token id's re-use the same token ids at each position along the depth
80
+ # dimension, despite distinct meanings. The decoder should take this into
81
+ # account so create distinct embeddings for tokens at each position.
82
+ vocab_size = (
83
+ self.config.function_token_depth * self.config.function_token_vocab_size
84
+ )
85
+
86
+ self.embedding = nn.Embedding(
87
+ # Function-token id's re-use the same token ids at each position along the
88
+ # depth dimension, despite distinct meanings. The decoder should take this
89
+ # into account so create distinct embeddings for tokens at each position.
90
+ num_embeddings=(vocab_size),
91
+ embedding_dim=config.d_model,
92
+ )
93
+ self.decoder = TransformerStack(
94
+ d_model=config.d_model,
95
+ n_heads=config.n_heads,
96
+ v_heads=None,
97
+ n_layers=config.n_layers,
98
+ n_layers_geom=0,
99
+ scale_residue=False,
100
+ bias=True,
101
+ qk_layernorm=False,
102
+ ffn_type="gelu",
103
+ expansion_ratio=4,
104
+ )
105
+ self.heads = nn.ModuleDict(
106
+ {
107
+ # Binary classification head predicting which keywords are present.
108
+ "keyword_logits": RegressionHead(
109
+ d_model=config.d_model,
110
+ output_dim=config.keyword_vocabulary_size,
111
+ hidden_dim=4 * config.d_model,
112
+ ),
113
+ # Regresses the TF-IDF value of each present keyword.
114
+ "keyword_tfidf": RegressionHead(
115
+ d_model=config.d_model,
116
+ output_dim=config.keyword_vocabulary_size,
117
+ hidden_dim=4 * config.d_model,
118
+ ),
119
+ # Predicts which InterPro annotations are present.
120
+ "interpro_logits": RegressionHead(
121
+ d_model=config.d_model,
122
+ output_dim=config.num_interpro_classes,
123
+ hidden_dim=4 * config.d_model,
124
+ ),
125
+ }
126
+ )
127
+
128
+ def forward(self, token_ids: torch.Tensor) -> dict[str, torch.Tensor]:
129
+ """Forward pass through function token decoder.
130
+
131
+ Args:
132
+ token_ids: <int>[batch_size, function_token_depth] batch of function tokens
133
+ ids to decode.
134
+ Returns:
135
+ interpro_logits: binary classification logits tensor of shape
136
+ <float>[batch_size, num_interpro_classes]
137
+ """
138
+ assert token_ids.ndim == 2
139
+ assert token_ids.shape[1] == self.config.function_token_depth
140
+ batch_size, depth = token_ids.shape
141
+
142
+ if self.config.unpack_lsh_bits:
143
+ # Shift values into [0, 2^bits/token)
144
+ lsh_bits = token_ids - self.config.num_special_tokens
145
+ # extract each bit. (hob stands for highest-order bit)
146
+ bits = torch.concat(
147
+ [
148
+ torch.bitwise_and(lsh_bits, 1 << hob).gt(0).to(torch.int32)
149
+ for hob in range(self.config.bits_per_token)
150
+ ],
151
+ dim=1,
152
+ )
153
+ assert bits.shape == (batch_size, depth * self.config.bits_per_token)
154
+
155
+ # Shift each bit into individual vocabulary ranges, so they get distinct
156
+ # embeddings.
157
+ vocab_offsets = 2 * torch.arange(
158
+ depth * self.config.bits_per_token, device=token_ids.device
159
+ )
160
+ inputs = vocab_offsets[None, :] + bits
161
+
162
+ # zero-out special tokens, i.e. non LSH tokens.
163
+ where_special = token_ids < self.config.num_special_tokens
164
+ inputs = torch.where(where_special.any(dim=1, keepdim=True), 0, inputs)
165
+ else:
166
+ # Apply depth-position offset to use distinct vocabs. See __init__ for
167
+ # explaination.
168
+ vocab_offsets = self.config.function_token_vocab_size * torch.arange(
169
+ self.config.function_token_depth,
170
+ device=token_ids.device,
171
+ )
172
+ inputs = token_ids + vocab_offsets[None, :]
173
+
174
+ embed = self.embedding(inputs)
175
+ encoding, _ = self.decoder(embed)
176
+ pooled = torch.mean(encoding, dim=1)
177
+
178
+ return {name: head(pooled) for name, head in self.heads.items()}
179
+
180
+ @property
181
+ def device(self) -> torch.device:
182
+ return next(self.parameters()).device
183
+
184
+ def decode(
185
+ self,
186
+ function_token_ids: torch.Tensor,
187
+ tokenizer: InterProQuantizedTokenizer,
188
+ decode_annotations: bool = True,
189
+ annotation_threshold: float = 0.1,
190
+ decode_keywords=True,
191
+ keywords_threshold: float = 0.5,
192
+ annotation_min_length: int | None = 5,
193
+ annotation_gap_merge_max: int | None = 3,
194
+ ):
195
+ """Decodes function tokens into predicted annotations and keywords.
196
+
197
+ Args:
198
+ function_token_ids: <int>[length, depth] function token ids. NOTE:
199
+ without <bos>/<eos> prefix
200
+ tokenizer: function tokenizer.
201
+ decode_annotations: whether to decode InterPro annotations.
202
+ annotation_threshold: threshold for emitting a function annotation.
203
+ decode_keywords: whether to decode function keywords.
204
+ keywords_threshold: threshold for emitting a keyword.
205
+ annotation_min_length: optional minimum length of predicted annotations for
206
+ size filtering.
207
+ annotation_gap_merge_max: optional merge adjacent annotation of the same type
208
+ Returns:
209
+ Decoder outputs:
210
+ - "interpro_logits": <float>[length, num_interpro] predicted interpro logits.
211
+ - "interpro_preds": <bool>[length, num_interpro] predicted intepro labels.
212
+ - "interpro_annotations": list[FunctionAnnotation] predicted InterPro
213
+ annotations
214
+ - "keyword_logits": <float>[length, keyword_vocabulary] binary prediciton
215
+ logits for keywrods.
216
+ - "function_keywords": list[FunctionAnnotation] predicted function keyword
217
+ ranges.
218
+ """
219
+ assert function_token_ids.ndim == 2
220
+ assert function_token_ids.shape[1] == tokenizer.depth
221
+ assert self.config.function_token_depth == tokenizer.depth
222
+
223
+ outputs = {}
224
+
225
+ outputs = self(function_token_ids.to(self.device))
226
+
227
+ # Only decode in positions that have function tokens.
228
+ where_decode = torch.all(
229
+ (function_token_ids != tokenizer.vocab_to_index["<pad>"])
230
+ & (function_token_ids != tokenizer.vocab_to_index["<none>"])
231
+ & (function_token_ids != tokenizer.vocab_to_index["<unk>"]),
232
+ dim=1,
233
+ )
234
+
235
+ # Decode InterPro annotations ranges.
236
+ interpro_preds = F.sigmoid(outputs["interpro_logits"])
237
+ interpro_preds = interpro_preds >= annotation_threshold
238
+ interpro_preds[~where_decode, :] = False
239
+ outputs["interpro_preds"] = interpro_preds
240
+ if decode_annotations:
241
+ annotations: list[FunctionAnnotation] = []
242
+ preds: np.ndarray = interpro_preds.detach().cpu().numpy()
243
+ for position_index, class_index in zip(*preds.nonzero()):
244
+ interpro_id = self.interpro_ids[class_index]
245
+ annotation = FunctionAnnotation(
246
+ label=interpro_id,
247
+ start=position_index + 1, # zero-index -> one-index inclusive
248
+ end=position_index + 1, # zero-index -> one-index inclusive
249
+ )
250
+ annotations.append(annotation)
251
+
252
+ annotations = _merge_annotations(
253
+ annotations,
254
+ merge_gap_max=annotation_gap_merge_max,
255
+ )
256
+
257
+ # Drop very small annotations.
258
+ if annotation_min_length is not None:
259
+ annotations = [
260
+ annotation
261
+ for annotation in annotations
262
+ if annotation.end - annotation.start + 1 >= annotation_min_length
263
+ ]
264
+
265
+ outputs["interpro_annotations"] = annotations
266
+
267
+ # Decode function keyword ranges.
268
+ keyword_logits = outputs["keyword_logits"]
269
+ keyword_logits[~where_decode, :] = -torch.inf
270
+ if decode_keywords:
271
+ keyword_preds = F.sigmoid(keyword_logits) >= keywords_threshold
272
+ outputs["function_keywords"] = self._preds_to_keywords(
273
+ keyword_preds.detach().cpu().numpy()
274
+ )
275
+
276
+ return outputs
277
+
278
+ def _preds_to_keywords(self, keyword_preds: np.ndarray) -> list[FunctionAnnotation]:
279
+ """Converts output log-TFDF to predicted keywords over the sequence.
280
+
281
+ Args:
282
+ keyword_precs: <bool>[length, keyword_vocab] positional predictions of
283
+ function keywords from the keyword prediction head.
284
+ Returns:
285
+ Non-overlapping keyword annotated ranges along the sequence. Note that indices
286
+ will index into the *sequence*, not the function token array which has a
287
+ <pad> prefix.
288
+ """
289
+ assert keyword_preds.ndim == 2
290
+ assert keyword_preds.shape[1] == self.config.keyword_vocabulary_size
291
+
292
+ keyword_positions: dict[str, list[range]] = defaultdict(list)
293
+ for position, keyword_id in zip(*np.nonzero(keyword_preds)):
294
+ keyword = self.keywords_vocabulary[keyword_id]
295
+ keyword_positions[keyword].append(range(position, position + 1))
296
+
297
+ annotations: list[FunctionAnnotation] = []
298
+ for keyword, ranges in keyword_positions.items():
299
+ for range_ in merge_ranges(ranges):
300
+ annotation = FunctionAnnotation(
301
+ label=keyword,
302
+ start=range_.start + 1, # zero-index -> one-index
303
+ end=range_.stop + 1 - 1, # zero-index excl -> one-index incl
304
+ )
305
+ annotations.append(annotation)
306
+
307
+ return annotations
308
+
309
+
310
+ def _merge_annotations(
311
+ annotations: list[FunctionAnnotation],
312
+ merge_gap_max: int | None = None,
313
+ ) -> list[FunctionAnnotation]:
314
+ """Merges annotations into non-overlapping segments.
315
+
316
+ Args:
317
+ annotations: annotations to merge.
318
+ merge_gap_max: optionally merge neighboring ranges that are separated by a gap
319
+ no larger than this size.
320
+ Returns:
321
+ non-overlapping annotations with gaps merged.
322
+ """
323
+ grouped: dict[str, list[range]] = defaultdict(list)
324
+ for a in annotations:
325
+ # Convert one-indexed inclusive-inclusive, to range()
326
+ grouped[a.label].append(range(a.start, a.end + 1))
327
+
328
+ merged = []
329
+ for label, ranges in grouped.items():
330
+ merged_ranges = merge_ranges(ranges, merge_gap_max=merge_gap_max)
331
+ for range_ in merged_ranges:
332
+ annotation = FunctionAnnotation(
333
+ label=label,
334
+ start=range_.start + 1, # zero-index -> one-index
335
+ end=range_.stop - 1, # zero-index excl -> one-index incl
336
+ )
337
+ merged.append(annotation)
338
+ return merged
esm/models/vqvae.py ADDED
@@ -0,0 +1,450 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ from esm.layers.blocks import UnifiedTransformerBlock
5
+ from esm.layers.codebook import EMACodebook
6
+ from esm.layers.structure_proj import Dim6RotStructureHead
7
+ from esm.layers.transformer_stack import TransformerStack
8
+ from esm.utils.constants import esm3 as C
9
+ from esm.utils.misc import knn_graph
10
+ from esm.utils.structure.affine3d import (
11
+ Affine3D,
12
+ build_affine3d_from_coordinates,
13
+ )
14
+ from esm.utils.structure.predicted_aligned_error import (
15
+ compute_predicted_aligned_error,
16
+ compute_tm,
17
+ )
18
+
19
+
20
+ class RelativePositionEmbedding(nn.Module):
21
+ """
22
+ Embedding layer for relative position embeddings. `bins` is the number of positions relative
23
+ to the query position that are considered before clipping. For instance, if `bins=10`, then
24
+ the relative position embedding will have 21 positions, [-10, 10].
25
+ """
26
+
27
+ def __init__(self, bins, embedding_dim, init_std=0.02):
28
+ super().__init__()
29
+ self.bins = bins
30
+
31
+ self.embedding = torch.nn.Embedding(2 * bins + 2, embedding_dim)
32
+ self.embedding.weight.data.normal_(0, init_std)
33
+
34
+ def forward(self, query_residue_index, key_residue_index):
35
+ """
36
+ Input:
37
+ query_residue_index: (B, ) tensor of source indices (dytpe=torch.long)
38
+ key_residue_index: (B, L) tensor of target indices (dytpe=torch.long)
39
+ Output:
40
+ embeddings: B x L x embedding_dim tensor of embeddings
41
+ """
42
+
43
+ assert query_residue_index.dtype == torch.long
44
+ assert key_residue_index.dtype == torch.long
45
+ assert query_residue_index.ndim == 1
46
+ assert key_residue_index.ndim == 2
47
+
48
+ diff = key_residue_index - query_residue_index.unsqueeze(1)
49
+ diff = diff.clamp(-self.bins, self.bins)
50
+ diff = diff + self.bins + 1 # add 1 to adjust for padding index
51
+ output = self.embedding(diff)
52
+ return output
53
+
54
+
55
+ class PairwisePredictionHead(nn.Module):
56
+ def __init__(
57
+ self,
58
+ input_dim: int,
59
+ downproject_dim: int,
60
+ hidden_dim: int,
61
+ n_bins: int,
62
+ bias: bool = True,
63
+ pairwise_state_dim: int = 0,
64
+ ):
65
+ super().__init__()
66
+ self.downproject = nn.Linear(input_dim, downproject_dim, bias=bias)
67
+ self.linear1 = nn.Linear(
68
+ downproject_dim + pairwise_state_dim, hidden_dim, bias=bias
69
+ )
70
+ self.activation_fn = nn.GELU()
71
+ self.norm = nn.LayerNorm(hidden_dim)
72
+ self.linear2 = nn.Linear(hidden_dim, n_bins, bias=bias)
73
+
74
+ def forward(self, x, pairwise: torch.Tensor | None = None):
75
+ """
76
+ Args:
77
+ x: [B x L x D]
78
+
79
+ Output:
80
+ [B x L x L x K]
81
+ """
82
+ x = self.downproject(x)
83
+ # Let x_i be a vector of size (B, D).
84
+ # Input is {x_1, ..., x_L} of size (B, L, D)
85
+ # Output is 2D where x_ij = cat([x_i * x_j, x_i - x_j])
86
+ q, k = x.chunk(2, dim=-1)
87
+
88
+ prod = q[:, None, :, :] * k[:, :, None, :]
89
+ diff = q[:, None, :, :] - k[:, :, None, :]
90
+ x_2d = [
91
+ prod,
92
+ diff,
93
+ ]
94
+ if pairwise is not None:
95
+ x_2d.append(pairwise)
96
+ x = torch.cat(x_2d, dim=-1)
97
+ x = self.linear1(x)
98
+ x = self.activation_fn(x)
99
+ x = self.norm(x)
100
+ x = self.linear2(x)
101
+ return x
102
+
103
+
104
+ class RegressionHead(nn.Module):
105
+ def __init__(self, embed_dim: int, output_dim: int):
106
+ super().__init__()
107
+ self.dense = nn.Linear(embed_dim, embed_dim)
108
+ self.activation_fn = nn.GELU()
109
+ self.norm = nn.LayerNorm(embed_dim)
110
+ self.output = nn.Linear(embed_dim, output_dim)
111
+
112
+ def forward(self, features):
113
+ x = self.dense(features)
114
+ x = self.activation_fn(x)
115
+ x = self.norm(x)
116
+ x = self.output(x)
117
+ return x
118
+
119
+
120
+ class CategoricalMixture:
121
+ def __init__(self, param, bins=50, start=0, end=1):
122
+ # All tensors are of shape ..., bins.
123
+ self.logits = param
124
+ bins = torch.linspace(
125
+ start, end, bins + 1, device=self.logits.device, dtype=torch.float32
126
+ )
127
+ self.v_bins = (bins[:-1] + bins[1:]) / 2
128
+
129
+ def log_prob(self, true):
130
+ # Shapes are:
131
+ # self.probs: ... x bins
132
+ # true : ... (floating point # for target)
133
+ true_index = (
134
+ (true.unsqueeze(-1) - self.v_bins[[None] * true.ndim]).abs().argmin(-1)
135
+ )
136
+ nll = self.logits.log_softmax(-1)
137
+ return torch.take_along_dim(nll, true_index.unsqueeze(-1), dim=-1).squeeze(-1)
138
+
139
+ def mean(self):
140
+ return (
141
+ self.logits.to(self.v_bins.dtype).softmax(-1) @ self.v_bins.unsqueeze(1)
142
+ ).squeeze(-1)
143
+
144
+ def median(self):
145
+ return self.v_bins[self.logits.max(-1).indices]
146
+
147
+
148
+ class GeometricEncoderStack(TransformerStack):
149
+ def __init__(self, d_model, n_heads, v_heads, n_layers):
150
+ super().__init__(d_model, n_heads, v_heads, 0)
151
+ self.blocks = nn.ModuleList(
152
+ [
153
+ UnifiedTransformerBlock(
154
+ d_model,
155
+ n_heads,
156
+ v_heads=v_heads,
157
+ use_geom_attn=True,
158
+ use_plain_attn=False,
159
+ expansion_ratio=4,
160
+ bias=True,
161
+ )
162
+ for i in range(n_layers)
163
+ ]
164
+ )
165
+ self.norm = nn.Identity()
166
+
167
+
168
+ def batched_gather(data, inds, dim=0, no_batch_dims=0):
169
+ ranges = []
170
+ for i, s in enumerate(data.shape[:no_batch_dims]):
171
+ r = torch.arange(s)
172
+ r = r.view(*(*((1,) * i), -1, *((1,) * (len(inds.shape) - i - 1))))
173
+ ranges.append(r)
174
+
175
+ remaining_dims = [slice(None) for _ in range(len(data.shape) - no_batch_dims)]
176
+ remaining_dims[dim - no_batch_dims if dim >= 0 else dim] = inds
177
+ ranges.extend(remaining_dims)
178
+ return data[ranges]
179
+
180
+
181
+ def node_gather(s: torch.Tensor, edges: torch.Tensor) -> torch.Tensor:
182
+ return batched_gather(s.unsqueeze(-3), edges, -2, no_batch_dims=len(s.shape) - 1)
183
+
184
+
185
+ class StructureTokenEncoder(nn.Module):
186
+ def __init__(self, d_model, n_heads, v_heads, n_layers, d_out, n_codes):
187
+ super().__init__()
188
+ # We only support fully-geometric structure token encoders for now...
189
+ # setting n_layers_geom to something that's not n_layers won't work because
190
+ # sequence ID isn't supported fully in this repo for plain-old transformers
191
+ self.transformer = GeometricEncoderStack(d_model, n_heads, v_heads, n_layers)
192
+ self.pre_vq_proj = nn.Linear(d_model, d_out)
193
+ self.codebook = EMACodebook(n_codes, d_out)
194
+ self.relative_positional_embedding = RelativePositionEmbedding(
195
+ 32, d_model, init_std=0.02
196
+ )
197
+ self.knn = 16
198
+
199
+ def encode_local_structure(
200
+ self,
201
+ coords: torch.Tensor,
202
+ affine: Affine3D,
203
+ attention_mask: torch.Tensor,
204
+ sequence_id: torch.Tensor | None,
205
+ affine_mask: torch.Tensor,
206
+ residue_index: torch.Tensor | None = None,
207
+ ):
208
+ """This function allows for a multi-layered encoder to encode tokens with a local receptive fields. The implementation is as follows:
209
+
210
+ 1. Starting with (B, L) frames, we find the KNN in structure space. This now gives us (B, L, K) where the last dimension is the local
211
+ neighborhood of all (B, L) residues.
212
+ 2. We reshape these frames to (B*L, K) so now we have a large batch of a bunch of local neighborhoods.
213
+ 3. Pass the (B*L, K) local neighborhoods through a stack of geometric reasoning blocks, effectively getting all to all communication between
214
+ all frames in the local neighborhood.
215
+ 4. This gives (B*L, K, d_model) embeddings, from which we need to get a single embedding per local neighborhood. We do this by simply
216
+ taking the embedding corresponding to the query node. This gives us (B*L, d_model) embeddings.
217
+ 5. Reshape back to (B, L, d_model) embeddings
218
+ """
219
+ assert coords.size(-1) == 3 and coords.size(-2) == 3, "need N, CA, C"
220
+ with torch.no_grad():
221
+ knn_edges, _ = self.find_knn_edges(
222
+ coords,
223
+ ~attention_mask,
224
+ coord_mask=affine_mask,
225
+ sequence_id=sequence_id,
226
+ knn=self.knn,
227
+ )
228
+ B, L, E = knn_edges.shape
229
+
230
+ affine_tensor = affine.tensor # for easier manipulation
231
+ T_D = affine_tensor.size(-1)
232
+ knn_affine_tensor = node_gather(affine_tensor, knn_edges)
233
+ knn_affine_tensor = knn_affine_tensor.view(-1, E, T_D).contiguous()
234
+ affine = Affine3D.from_tensor(knn_affine_tensor)
235
+ knn_sequence_id = (
236
+ node_gather(sequence_id.unsqueeze(-1), knn_edges).view(-1, E)
237
+ if sequence_id is not None
238
+ else torch.zeros(L, E, dtype=torch.int64, device=coords.device)
239
+ )
240
+ knn_affine_mask = node_gather(affine_mask.unsqueeze(-1), knn_edges).view(
241
+ -1, E
242
+ )
243
+ knn_chain_id = torch.zeros(L, E, dtype=torch.int64, device=coords.device)
244
+
245
+ if residue_index is None:
246
+ res_idxs = knn_edges.view(-1, E)
247
+ else:
248
+ res_idxs = node_gather(residue_index.unsqueeze(-1), knn_edges).view(
249
+ -1, E
250
+ )
251
+
252
+ z = self.relative_positional_embedding(res_idxs[:, 0], res_idxs)
253
+
254
+ z, _ = self.transformer.forward(
255
+ x=z,
256
+ sequence_id=knn_sequence_id,
257
+ affine=affine,
258
+ affine_mask=knn_affine_mask,
259
+ chain_id=knn_chain_id,
260
+ )
261
+
262
+ # Unflatten the output and take the query node embedding, which will always be the first one because
263
+ # a node has distance 0 with itself and the KNN are sorted.
264
+ z = z.view(B, L, E, -1)
265
+ z = z[:, :, 0, :]
266
+
267
+ return z
268
+
269
+ @staticmethod
270
+ def find_knn_edges(
271
+ coords,
272
+ padding_mask,
273
+ coord_mask,
274
+ sequence_id: torch.Tensor | None = None,
275
+ knn: int | None = None,
276
+ ) -> tuple:
277
+ assert knn is not None, "Must specify a non-null knn to find_knn_edges"
278
+ # Coords are N, CA, C
279
+ coords = coords.clone()
280
+ coords[~coord_mask] = 0
281
+
282
+ if sequence_id is None:
283
+ sequence_id = torch.zeros(
284
+ (coords.shape[0], coords.shape[1]), device=coords.device
285
+ ).long()
286
+
287
+ with torch.no_grad(), torch.cuda.amp.autocast(enabled=False): # type: ignore
288
+ ca = coords[..., 1, :]
289
+ edges, edge_mask = knn_graph(
290
+ ca,
291
+ coord_mask,
292
+ padding_mask,
293
+ sequence_id,
294
+ no_knn=knn,
295
+ )
296
+
297
+ return edges, edge_mask
298
+
299
+ def encode(
300
+ self,
301
+ coords: torch.Tensor,
302
+ attention_mask: torch.Tensor | None = None,
303
+ sequence_id: torch.Tensor | None = None,
304
+ residue_index: torch.Tensor | None = None,
305
+ ):
306
+ coords = coords[..., :3, :]
307
+ affine, affine_mask = build_affine3d_from_coordinates(coords=coords)
308
+
309
+ if attention_mask is None:
310
+ attention_mask = torch.ones_like(affine_mask, dtype=torch.bool)
311
+ attention_mask = attention_mask.bool()
312
+
313
+ if sequence_id is None:
314
+ sequence_id = torch.zeros_like(affine_mask, dtype=torch.int64)
315
+
316
+ z = self.encode_local_structure(
317
+ coords=coords,
318
+ affine=affine,
319
+ attention_mask=attention_mask,
320
+ sequence_id=sequence_id,
321
+ affine_mask=affine_mask,
322
+ residue_index=residue_index,
323
+ )
324
+
325
+ z = z.masked_fill(~affine_mask.unsqueeze(2), 0)
326
+ z = self.pre_vq_proj(z)
327
+
328
+ z_q, min_encoding_indices, _ = self.codebook(z)
329
+
330
+ return z_q, min_encoding_indices
331
+
332
+
333
+ class StructureTokenDecoder(nn.Module):
334
+ def __init__(
335
+ self,
336
+ d_model,
337
+ n_heads,
338
+ n_layers,
339
+ ):
340
+ super().__init__()
341
+ self.decoder_channels = d_model
342
+
343
+ self.vqvae_codebook_size = C.VQVAE_CODEBOOK_SIZE
344
+ self.special_tokens = C.VQVAE_SPECIAL_TOKENS
345
+ self.max_pae_bin = C.VQVAE_MAX_PAE_BIN
346
+
347
+ self.embed = nn.Embedding(
348
+ self.vqvae_codebook_size + len(self.special_tokens), d_model
349
+ )
350
+ self.decoder_stack = TransformerStack(
351
+ d_model, n_heads, 1, n_layers, scale_residue=False, n_layers_geom=0
352
+ )
353
+
354
+ self.affine_output_projection = Dim6RotStructureHead(
355
+ self.decoder_channels, 10, predict_torsion_angles=False
356
+ )
357
+
358
+ direction_loss_bins = C.VQVAE_DIRECTION_LOSS_BINS
359
+ pae_bins = C.VQVAE_PAE_BINS
360
+ self.pairwise_bins = [
361
+ 64, # distogram
362
+ direction_loss_bins * 6, # direction bins
363
+ pae_bins, # predicted aligned error
364
+ ]
365
+ self.pairwise_classification_head = PairwisePredictionHead(
366
+ self.decoder_channels,
367
+ downproject_dim=128,
368
+ hidden_dim=128,
369
+ n_bins=sum(self.pairwise_bins),
370
+ bias=False,
371
+ )
372
+
373
+ plddt_bins = C.VQVAE_PLDDT_BINS
374
+ self.plddt_head = RegressionHead(
375
+ embed_dim=self.decoder_channels, output_dim=plddt_bins
376
+ )
377
+
378
+ def decode(
379
+ self,
380
+ structure_tokens: torch.Tensor,
381
+ attention_mask: torch.Tensor | None = None,
382
+ sequence_id: torch.Tensor | None = None,
383
+ ):
384
+ if attention_mask is None:
385
+ attention_mask = torch.ones_like(structure_tokens, dtype=torch.bool)
386
+
387
+ attention_mask = attention_mask.bool()
388
+ if sequence_id is None:
389
+ sequence_id = torch.zeros_like(structure_tokens, dtype=torch.int64)
390
+ # not supported for now
391
+ chain_id = torch.zeros_like(structure_tokens, dtype=torch.int64)
392
+
393
+ # check that BOS and EOS are set correctly
394
+ assert (
395
+ structure_tokens[:, 0].eq(self.special_tokens["BOS"]).all()
396
+ ), "First token in structure_tokens must be BOS token"
397
+ assert (
398
+ structure_tokens[
399
+ torch.arange(structure_tokens.shape[0]), attention_mask.sum(1) - 1
400
+ ]
401
+ .eq(self.special_tokens["EOS"])
402
+ .all()
403
+ ), "Last token in structure_tokens must be EOS token"
404
+ assert (
405
+ (structure_tokens < 0).sum() == 0
406
+ ), "All structure tokens set to -1 should be replaced with BOS, EOS, PAD, or MASK tokens by now, but that isn't the case!"
407
+
408
+ x = self.embed(structure_tokens)
409
+ # !!! NOTE: Attention mask is actually unused here so watch out
410
+ x, _ = self.decoder_stack.forward(
411
+ x, affine=None, affine_mask=None, sequence_id=sequence_id, chain_id=chain_id
412
+ )
413
+
414
+ tensor7_affine, bb_pred = self.affine_output_projection(
415
+ x, affine=None, affine_mask=torch.zeros_like(attention_mask)
416
+ )
417
+
418
+ pae, ptm = None, None
419
+ pairwise_logits = self.pairwise_classification_head(x)
420
+ _, _, pae_logits = [
421
+ (o if o.numel() > 0 else None)
422
+ for o in pairwise_logits.split(self.pairwise_bins, dim=-1)
423
+ ]
424
+
425
+ special_tokens_mask = structure_tokens >= min(self.special_tokens.values())
426
+ pae = compute_predicted_aligned_error(
427
+ pae_logits, # type: ignore
428
+ aa_mask=~special_tokens_mask,
429
+ sequence_id=sequence_id,
430
+ max_bin=self.max_pae_bin,
431
+ )
432
+ # This might be broken for chainbreak tokens? We might align to the chainbreak
433
+ ptm = compute_tm(
434
+ pae_logits, # type: ignore
435
+ aa_mask=~special_tokens_mask,
436
+ max_bin=self.max_pae_bin,
437
+ )
438
+
439
+ plddt_logits = self.plddt_head(x)
440
+ plddt_value = CategoricalMixture(
441
+ plddt_logits, bins=plddt_logits.shape[-1]
442
+ ).mean()
443
+
444
+ return dict(
445
+ tensor7_affine=tensor7_affine,
446
+ bb_pred=bb_pred,
447
+ plddt=plddt_value,
448
+ ptm=ptm,
449
+ predicted_aligned_error=pae,
450
+ )