Zaixi commited on
Commit
89c0b51
·
1 Parent(s): 1e1a7b4

Add large file

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -34
  2. .gitignore +141 -0
  3. README.md +0 -12
  4. app.py +506 -4
  5. assets/foldmark.png +0 -0
  6. assets/foldmark_head.png +0 -0
  7. configs/__init__.py +0 -0
  8. configs/configs_base.py +409 -0
  9. configs/configs_data.py +216 -0
  10. configs/configs_inference.py +31 -0
  11. cutlass +1 -0
  12. dataset/7pzb.pkl.gz +0 -0
  13. dataset/7pzb_unwatermarked.cif +0 -0
  14. dataset/output.csv +23 -0
  15. docs/colabfold_compatiable_msa.md +33 -0
  16. docs/docker_installation.md +30 -0
  17. docs/infer_json_format.md +243 -0
  18. docs/kernels.md +24 -0
  19. docs/model_performance.md +51 -0
  20. docs/msa_pipeline.md +101 -0
  21. docs/prepare_training_data.md +119 -0
  22. docs/training.md +88 -0
  23. examples/7dc6.pdb +0 -0
  24. examples/7dc6_watermarked.pdb +0 -0
  25. examples/7pzb/msa/1/non_pairing.a3m +0 -0
  26. examples/7pzb/msa/1/pairing.a3m +0 -0
  27. examples/7pzb_need_search_msa/msa_resmsa_seq_0/0.a3m +0 -0
  28. examples/7pzb_need_search_msa/msa_resmsa_seq_0/0/non_pairing.a3m +0 -0
  29. examples/7pzb_need_search_msa/msa_resmsa_seq_0/0/pairing.a3m +0 -0
  30. examples/7pzb_need_search_msa/msa_resmsa_seq_0/msa.sh +33 -0
  31. Protenix_new.zip → examples/7pzb_need_search_msa/msa_resmsa_seq_0/out.tar.gz +2 -2
  32. examples/7pzb_need_search_msa/msa_resmsa_seq_0/pdb70_220313_db.m8 +135 -0
  33. examples/7pzb_need_search_msa/msa_resmsa_seq_0/tmp_5561987135da4188987956d9f05d1af2.fasta +2 -0
  34. examples/7pzb_need_search_msa/msa_resmsa_seq_0/uniref_tax.m8 +0 -0
  35. examples/7pzb_unwatermarked.cif +0 -0
  36. examples/7r6r/msa/1/non_pairing.a3m +0 -0
  37. examples/7r6r/msa/1/pairing.a3m +0 -0
  38. examples/7r6r_watermarked.cif +0 -0
  39. examples/7wux/msa/1/non_pairing.a3m +0 -0
  40. examples/7wux/msa/1/pairing.a3m +544 -0
  41. examples/7wux/msa/2/non_pairing.a3m +436 -0
  42. examples/7wux/msa/2/pairing.a3m +0 -0
  43. examples/dimer.fasta +2 -0
  44. examples/example.json +3 -0
  45. examples/example_with_msa.json +3 -0
  46. examples/finetune_subset.txt +5 -0
  47. examples/ligands/7wux_smiles.smi +1 -0
  48. examples/ligands/compounds-3d-R.sdf +123 -0
  49. examples/ligands/compounds-3d-RS.sdf +246 -0
  50. examples/prot.fasta +4 -0
.gitattributes CHANGED
@@ -1,35 +1,3 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  *.pt filter=lfs diff=lfs merge=lfs -text
2
+ *.json filter=lfs diff=lfs merge=lfs -text
3
+ examples/7pzb_need_search_msa/msa_resmsa_seq_0/out.tar.gz filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ *__pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+ *.o
9
+ *.obj
10
+ *.d
11
+ # ninjia relate
12
+ *ninja*
13
+ lock
14
+ # Distribution / packaging
15
+ .Python
16
+ .vscode
17
+ build/
18
+ develop-eggs/
19
+ dist/
20
+ downloads/
21
+ eggs/
22
+ .eggs/
23
+ lib/
24
+ lib64/
25
+ parts/
26
+ sdist/
27
+ var/
28
+ wheels/
29
+ share/python-wheels/
30
+ *.egg-info/
31
+ .installed.cfg
32
+ *.egg
33
+ MANIFEST
34
+
35
+ # PyInstaller
36
+ # Usually these files are written by a python script from a template
37
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
38
+ *.manifest
39
+ *.spec
40
+
41
+ # Installer logs
42
+ pip-log.txt
43
+ pip-delete-this-directory.txt
44
+
45
+ # Unit test / coverage reports
46
+ .tox/
47
+ .nox/
48
+ .coverage
49
+ .coverage.*
50
+ .cache
51
+ *.cover
52
+ *.py,cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+
56
+ # Translations
57
+ *.mo
58
+ *.pot
59
+
60
+ # Django stuff:
61
+ *.log
62
+ local_settings.py
63
+ db.sqlite3
64
+ db.sqlite3-journal
65
+
66
+ # Flask stuff:
67
+ instance/
68
+ .webassets-cache
69
+
70
+ # Scrapy stuff:
71
+ .scrapy
72
+
73
+ # Sphinx documentation
74
+ docs/_build/
75
+
76
+ # PyBuilder
77
+ .pybuilder/
78
+ target/
79
+
80
+ # Jupyter Notebook
81
+ .ipynb_checkpoints
82
+
83
+ # IPython
84
+ profile_default/
85
+ ipython_config.py
86
+
87
+ # data cache and checkpoints
88
+ data_cache/
89
+ checkpoints/
90
+
91
+ .pdm.toml
92
+
93
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
94
+ __pypackages__/
95
+
96
+ # Celery stuff
97
+ celerybeat-schedule
98
+ celerybeat.pid
99
+
100
+ # SageMath parsed files
101
+ *.sage.py
102
+
103
+ # Environments
104
+ .env
105
+ .venv
106
+ env/
107
+ venv/
108
+ ENV/
109
+ env.bak/
110
+ venv.bak/
111
+
112
+ # Spyder project settings
113
+ .spyderproject
114
+ .spyproject
115
+
116
+ # Rope project settings
117
+ .ropeproject
118
+
119
+ # mkdocs documentation
120
+ /site
121
+
122
+ # mypy
123
+ .mypy_cache/
124
+ .dmypy.json
125
+ dmypy.json
126
+
127
+ # Pyre type checker
128
+ .pyre/
129
+
130
+ # pytype static type analyzer
131
+ .pytype/
132
+
133
+ # Cython debug symbols
134
+ cython_debug/
135
+
136
+ wandb/
137
+ run_logs/
138
+ .gradio/
139
+ flagged/
140
+ release_data/
141
+ .pt
README.md DELETED
@@ -1,12 +0,0 @@
1
- ---
2
- title: Protenix New
3
- emoji: 💻
4
- colorFrom: gray
5
- colorTo: pink
6
- sdk: gradio
7
- sdk_version: 5.15.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,7 +1,509 @@
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
  import gradio as gr
3
+ import os
4
+ import uuid
5
+ from datetime import datetime
6
+ import numpy as np
7
+ import matplotlib.pyplot as plt
8
+ from configs.configs_base import configs as configs_base
9
+ from configs.configs_data import data_configs
10
+ from configs.configs_inference import inference_configs
11
+ from runner.inference import download_infercence_cache, update_inference_configs, infer_predict, infer_detect, InferenceRunner
12
+ from protenix.config import parse_configs, parse_sys_args
13
+ from runner.msa_search import update_infer_json
14
+ from protenix.web_service.prediction_visualization import plot_best_confidence_measure, PredictionLoader
15
+ from process_data import process_data
16
+ import json
17
+ from typing import Dict, List
18
+ from Bio.PDB import MMCIFParser, PDBIO
19
+ import tempfile
20
+ import shutil
21
+ from Bio import PDB
22
+ from gradio_molecule3d import Molecule3D
23
 
24
+ EXAMPLE_PATH = './examples/example.json'
25
+ example_json=[{'sequences': [{'proteinChain': {'sequence': 'MAEVIRSSAFWRSFPIFEEFDSETLCELSGIASYRKWSAGTVIFQRGDQGDYMIVVVSGRIKLSLFTPQGRELMLRQHEAGALFGEMALLDGQPRSADATAVTAAEGYVIGKKDFLALITQRPKTAEAVIRFLCAQLRDTTDRLETIALYDLNARVARFFLATLRQIHGSEMPQSANLRLTLSQTDIASILGASRPKVNRAILSLEESGAIKRADGIICCNVGRLLSIADPEEDLEHHHHHHHH', 'count': 2}}, {'dnaSequence': {'sequence': 'CTAGGTAACATTACTCGCG', 'count': 2}}, {'dnaSequence': {'sequence': 'GCGAGTAATGTTAC', 'count': 2}}, {'ligand': {'ligand': 'CCD_PCG', 'count': 2}}], 'name': '7pzb_need_search_msa'}]
26
 
27
+ # Custom CSS for styling
28
+ custom_css = """
29
+ #logo {
30
+ width: 50%;
31
+ }
32
+ .title {
33
+ font-size: 32px;
34
+ font-weight: bold;
35
+ color: #4CAF50;
36
+ display: flex;
37
+ align-items: center; /* Vertically center the logo and text */
38
+ }
39
+ """
40
+
41
+
42
+ os.environ["LAYERNORM_TYPE"] = "fast_layernorm"
43
+ os.environ["USE_DEEPSPEED_EVO_ATTTENTION"] = "true"
44
+ # Set environment variable in the script
45
+ os.environ['CUTLASS_PATH'] = './cutlass'
46
+
47
+ # reps = [
48
+ # {
49
+ # "model": 0,
50
+ # "chain": "",
51
+ # "resname": "",
52
+ # "style": "cartoon", # Use cartoon style
53
+ # "color": "whiteCarbon",
54
+ # "residue_range": "",
55
+ # "around": 0,
56
+ # "byres": False,
57
+ # "visible": True # Ensure this representation is visible
58
+ # }
59
+ # ]
60
+
61
+ reps = [
62
+ {
63
+ "model": 0,
64
+ "chain": "",
65
+ "resname": "",
66
+ "style": "cartoon",
67
+ "color": "whiteCarbon",
68
+ "residue_range": "",
69
+ "around": 0,
70
+ "byres": False,
71
+ "opacity": 0.2,
72
+ },
73
+ {
74
+ "model": 1,
75
+ "chain": "",
76
+ "resname": "",
77
+ "style": "cartoon",
78
+ "color": "cyanCarbon",
79
+ "residue_range": "",
80
+ "around": 0,
81
+ "byres": False,
82
+ "opacity": 0.8,
83
+ }
84
+ ]
85
+
86
+
87
+ def align_pdb_files(pdb_file_1, pdb_file_2):
88
+ # Load the structures
89
+ parser = PDB.PPBuilder()
90
+ io = PDB.PDBIO()
91
+ structure_1 = PDB.PDBParser(QUIET=True).get_structure('Structure_1', pdb_file_1)
92
+ structure_2 = PDB.PDBParser(QUIET=True).get_structure('Structure_2', pdb_file_2)
93
+
94
+ # Superimpose the second structure onto the first
95
+ super_imposer = PDB.Superimposer()
96
+ model_1 = structure_1[0]
97
+ model_2 = structure_2[0]
98
+
99
+ # Extract the coordinates from the two structures
100
+ atoms_1 = [atom for atom in model_1.get_atoms() if atom.get_name() == "CA"] # Use CA atoms
101
+ atoms_2 = [atom for atom in model_2.get_atoms() if atom.get_name() == "CA"]
102
+
103
+ # Align the structures based on the CA atoms
104
+ coord_1 = [atom.get_coord() for atom in atoms_1]
105
+ coord_2 = [atom.get_coord() for atom in atoms_2]
106
+
107
+ super_imposer.set_atoms(atoms_1, atoms_2)
108
+ super_imposer.apply(model_2) # Apply the transformation to model_2
109
+
110
+ # Save the aligned structure back to the original file
111
+ io.set_structure(structure_2) # Save the aligned structure to the second file (original file)
112
+ io.save(pdb_file_2)
113
+
114
+ # Function to convert .cif to .pdb and save as a temporary file
115
+ def convert_cif_to_pdb(cif_path):
116
+ """
117
+ Convert a CIF file to a PDB file and save it as a temporary file.
118
+
119
+ Args:
120
+ cif_path (str): Path to the input CIF file.
121
+
122
+ Returns:
123
+ str: Path to the temporary PDB file.
124
+ """
125
+ # Initialize the MMCIF parser
126
+ parser = MMCIFParser()
127
+ structure = parser.get_structure("protein", cif_path)
128
+
129
+ # Create a temporary file for the PDB output
130
+ with tempfile.NamedTemporaryFile(suffix=".pdb", delete=False) as temp_file:
131
+ temp_pdb_path = temp_file.name
132
+
133
+ # Save the structure as a PDB file
134
+ io = PDBIO()
135
+ io.set_structure(structure)
136
+ io.save(temp_pdb_path)
137
+
138
+ return temp_pdb_path
139
+
140
+ def plot_3d(pred_loader):
141
+ # Get the CIF file path for the given prediction ID
142
+ cif_path = sorted(pred_loader.cif_paths)[0]
143
+
144
+ # Convert the CIF file to a temporary PDB file
145
+ temp_pdb_path = convert_cif_to_pdb(cif_path)
146
+
147
+ return temp_pdb_path, cif_path
148
+
149
+
150
+ def parse_json_input(json_data: List[Dict]) -> Dict:
151
+ """Convert Protenix JSON format to UI-friendly structure"""
152
+ components = {
153
+ "protein_chains": [],
154
+ "dna_sequences": [],
155
+ "ligands": [],
156
+ "complex_name": ""
157
+ }
158
+
159
+ for entry in json_data:
160
+ components["complex_name"] = entry.get("name", "")
161
+ for seq in entry["sequences"]:
162
+ if "proteinChain" in seq:
163
+ components["protein_chains"].append({
164
+ "sequence": seq["proteinChain"]["sequence"],
165
+ "count": seq["proteinChain"]["count"]
166
+ })
167
+ elif "dnaSequence" in seq:
168
+ components["dna_sequences"].append({
169
+ "sequence": seq["dnaSequence"]["sequence"],
170
+ "count": seq["dnaSequence"]["count"]
171
+ })
172
+ elif "ligand" in seq:
173
+ components["ligands"].append({
174
+ "type": seq["ligand"]["ligand"],
175
+ "count": seq["ligand"]["count"]
176
+ })
177
+ return components
178
+
179
+ def create_protenix_json(input_data: Dict) -> List[Dict]:
180
+ """Convert UI inputs to Protenix JSON format"""
181
+ sequences = []
182
+
183
+ for pc in input_data["protein_chains"]:
184
+ sequences.append({
185
+ "proteinChain": {
186
+ "sequence": pc["sequence"],
187
+ "count": pc["count"]
188
+ }
189
+ })
190
+
191
+ for dna in input_data["dna_sequences"]:
192
+ sequences.append({
193
+ "dnaSequence": {
194
+ "sequence": dna["sequence"],
195
+ "count": dna["count"]
196
+ }
197
+ })
198
+
199
+ for lig in input_data["ligands"]:
200
+ sequences.append({
201
+ "ligand": {
202
+ "ligand": lig["type"],
203
+ "count": lig["count"]
204
+ }
205
+ })
206
+
207
+ return [{
208
+ "sequences": sequences,
209
+ "name": input_data["complex_name"]
210
+ }]
211
+
212
+
213
+
214
+ def predict_structure(input_collector: dict):
215
+ """Handle both input types"""
216
+ os.makedirs("./output", exist_ok=True)
217
+
218
+ # Generate random filename with timestamp
219
+ random_name = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
220
+ save_path = os.path.join("./output", f"{random_name}.json")
221
+
222
+ print(input_collector)
223
+
224
+ # Handle JSON input
225
+ if input_collector["json"]:
226
+ # Handle different input types
227
+ if isinstance(input_collector["json"], str): # Example JSON case (file path)
228
+ input_data = json.load(open(input_collector["json"]))
229
+ elif hasattr(input_collector["json"], "name"): # File upload case
230
+ input_data = json.load(open(input_collector["json"].name))
231
+ else: # Direct JSON data case
232
+ input_data = input_collector["json"]
233
+ else: # Manual input case
234
+ input_data = create_protenix_json(input_collector["data"])
235
+
236
+ with open(save_path, "w") as f:
237
+ json.dump(input_data, f, indent=2)
238
+
239
+ if input_data==example_json and input_collector['watermark']==True:
240
+ configs.saved_path = './output/example_output/'
241
+ else:
242
+ # run msa
243
+ json_file = update_infer_json(save_path, './output', True)
244
+
245
+ # Run prediction
246
+ configs.input_json_path = json_file
247
+ configs.watermark = input_collector['watermark']
248
+ configs.saved_path = os.path.join("./output/", random_name)
249
+ infer_predict(runner, configs)
250
+ #saved_path = os.path.join('./output', f"{sample_name}", f"seed_{seed}", 'predictions')
251
+
252
+ # Generate visualizations
253
+ pred_loader = PredictionLoader(os.path.join(configs.saved_path, 'predictions'))
254
+ view3d, cif_path = plot_3d(pred_loader=pred_loader)
255
+ if configs.watermark:
256
+ pred_loader = PredictionLoader(os.path.join(configs.saved_path, 'predictions_orig'))
257
+ view3d_orig, _ = plot_3d(pred_loader=pred_loader)
258
+ align_pdb_files(view3d, view3d_orig)
259
+ view3d = [view3d, view3d_orig]
260
+ plot_best_confidence_measure(os.path.join(configs.saved_path, 'predictions'))
261
+ confidence_img_path = os.path.join(os.path.join(configs.saved_path, 'predictions'), "best_sample_confidence.png")
262
+
263
+ return view3d, confidence_img_path, cif_path
264
+
265
+
266
+ logger = logging.getLogger(__name__)
267
+ LOG_FORMAT = "%(asctime)s,%(msecs)-3d %(levelname)-8s [%(filename)s:%(lineno)s %(funcName)s] %(message)s"
268
+ logging.basicConfig(
269
+ format=LOG_FORMAT,
270
+ level=logging.INFO,
271
+ datefmt="%Y-%m-%d %H:%M:%S",
272
+ filemode="w",
273
+ )
274
+ configs_base["use_deepspeed_evo_attention"] = (
275
+ os.environ.get("USE_DEEPSPEED_EVO_ATTTENTION", False) == "true"
276
+ )
277
+ arg_str = "--seeds 101 --dump_dir ./output --input_json_path ./examples/example.json --model.N_cycle 10 --sample_diffusion.N_sample 5 --sample_diffusion.N_step 200 "
278
+ configs = {**configs_base, **{"data": data_configs}, **inference_configs}
279
+ configs = parse_configs(
280
+ configs=configs,
281
+ arg_str=arg_str,
282
+ fill_required_with_null=True,
283
+ )
284
+ configs.load_checkpoint_path='./checkpoint.pt'
285
+ download_infercence_cache(configs, model_version="v0.2.0")
286
+ runner = InferenceRunner(configs)
287
+ add_watermark = gr.Checkbox(label="Add Watermark", value=True)
288
+ add_watermark1 = gr.Checkbox(label="Add Watermark", value=True)
289
+
290
+ with gr.Blocks(title="FoldMark", css=custom_css) as demo:
291
+ with gr.Row():
292
+ # Use a Column to align the logo and title horizontally
293
+ gr.Image(value="./assets/foldmark_head.png", elem_id="logo", label="Logo", height=150, show_label=False)
294
+
295
+ with gr.Tab("Structure Predictor (JSON Upload)"):
296
+ # First create the upload component
297
+ json_upload = gr.File(label="Upload JSON", file_types=[".json"])
298
+
299
+ # Then create the example component that references it
300
+ gr.Examples(
301
+ examples=[[EXAMPLE_PATH]],
302
+ inputs=[json_upload],
303
+ label="Click to use example JSON:",
304
+ examples_per_page=1
305
+ )
306
+
307
+ # Rest of the components
308
+ upload_name = gr.Textbox(label="Complex Name (optional)")
309
+ upload_output = gr.JSON(label="Parsed Components")
310
+
311
+ json_upload.upload(
312
+ fn=lambda f: parse_json_input(json.load(open(f.name))),
313
+ inputs=json_upload,
314
+ outputs=upload_output
315
+ )
316
+
317
+ # Shared prediction components
318
+ with gr.Row():
319
+ add_watermark.render()
320
+ submit_btn = gr.Button("Predict Structure", variant="primary")
321
+ #structure_view = gr.HTML(label="3D Visualization")
322
+
323
+ with gr.Row():
324
+ view3d = Molecule3D(label="3D Visualization", reps=reps)
325
+ legend = gr.Markdown("""
326
+ **Color Legend:**
327
+
328
+ - <span style="color:grey">Unwatermarked Structure</span>
329
+ - <span style="color:cyan">Watermarked Structure</span>
330
+ """)
331
+ with gr.Row():
332
+ cif_file = gr.File(label="Download CIF File")
333
+ with gr.Row():
334
+ confidence_plot_image = gr.Image(label="Confidence Measures")
335
+
336
+ input_collector = gr.JSON(visible=False)
337
+
338
+ # Map inputs to a dictionary
339
+ submit_btn.click(
340
+ fn=lambda j, w: {"json": j, "watermark": w},
341
+ inputs=[json_upload, add_watermark],
342
+ outputs=input_collector
343
+ ).then(
344
+ fn=predict_structure,
345
+ inputs=input_collector,
346
+ outputs=[view3d, confidence_plot_image, cif_file]
347
+ )
348
+
349
+ gr.Markdown("""
350
+ The example of the uploaded json file for structure prediction.
351
+ <pre>
352
+ [{
353
+ "sequences": [
354
+ {
355
+ "proteinChain": {
356
+ "sequence": "MAEVIRSSAFWRSFPIFEEFDSETLCELSGIASYRKWSAGTVIFQRGDQGDYMIVVVSGRIKLSLFTPQGRELMLRQHEAGALFGEMALLDGQPRSADATAVTAAEGYVIGKKDFLALITQRPKTAEAVIRFLCAQLRDTTDRLETIALYDLNARVARFFLATLRQIHGSEMPQSANLRLTLSQTDIASILGASRPKVNRAILSLEESGAIKRADGIICCNVGRLLSIADPEEDLEHHHHHHHH",
357
+ "count": 2
358
+ }
359
+ },
360
+ {
361
+ "dnaSequence": {
362
+ "sequence": "CTAGGTAACATTACTCGCG",
363
+ "count": 2
364
+ }
365
+ },
366
+ {
367
+ "dnaSequence": {
368
+ "sequence": "GCGAGTAATGTTAC",
369
+ "count": 2
370
+ }
371
+ },
372
+ {
373
+ "ligand": {
374
+ "ligand": "CCD_PCG",
375
+ "count": 2
376
+ }
377
+ }
378
+ ],
379
+ "name": "7pzb"
380
+ }]
381
+ </pre>
382
+ """)
383
+
384
+ with gr.Tab("Structure Predictor (Manual Input)"):
385
+ with gr.Row():
386
+ complex_name = gr.Textbox(label="Complex Name")
387
+
388
+ # Replace gr.Group with gr.Accordion
389
+ with gr.Accordion(label="Protein Chains", open=True):
390
+ protein_chains = gr.Dataframe(
391
+ headers=["Sequence", "Count"],
392
+ datatype=["str", "number"],
393
+ row_count=1,
394
+ col_count=(2, "fixed")
395
+ )
396
+
397
+ # Repeat for other groups
398
+ with gr.Accordion(label="DNA Sequences", open=True):
399
+ dna_sequences = gr.Dataframe(
400
+ headers=["Sequence", "Count"],
401
+ datatype=["str", "number"],
402
+ row_count=1
403
+ )
404
+
405
+ with gr.Accordion(label="Ligands", open=True):
406
+ ligands = gr.Dataframe(
407
+ headers=["Ligand Type", "Count"],
408
+ datatype=["str", "number"],
409
+ row_count=1
410
+ )
411
+
412
+ manual_output = gr.JSON(label="Generated JSON")
413
+
414
+ complex_name.change(
415
+ fn=lambda x: {"complex_name": x},
416
+ inputs=complex_name,
417
+ outputs=manual_output
418
+ )
419
+
420
+ # Shared prediction components
421
+ with gr.Row():
422
+ add_watermark1.render()
423
+ submit_btn = gr.Button("Predict Structure", variant="primary")
424
+ #structure_view = gr.HTML(label="3D Visualization")
425
+
426
+ with gr.Row():
427
+ view3d = Molecule3D(label="3D Visualization", reps=reps)
428
+ legend = gr.Markdown("""
429
+ **Color Legend:**
430
+
431
+ - <span style="color:grey">Unwatermarked Structure</span>
432
+ - <span style="color:cyan">Watermarked Structure</span>
433
+ """)
434
+ with gr.Row():
435
+ cif_file = gr.File(label="Download CIF File")
436
+ with gr.Row():
437
+ confidence_plot_image = gr.Image(label="Confidence Measures")
438
+
439
+ input_collector = gr.JSON(visible=False)
440
+
441
+ # Map inputs to a dictionary
442
+ submit_btn.click(
443
+ fn=lambda c, p, d, l, w: {"data": {"complex_name": c, "protein_chains": p, "dna_sequences": d, "ligands": l}, "watermark": w},
444
+ inputs=[complex_name, protein_chains, dna_sequences, ligands, add_watermark1],
445
+ outputs=input_collector
446
+ ).then(
447
+ fn=predict_structure,
448
+ inputs=input_collector,
449
+ outputs=[view3d, confidence_plot_image, cif_file]
450
+ )
451
+
452
+ def is_watermarked(file):
453
+ # Generate a unique subdirectory and filename
454
+ unique_id = str(uuid.uuid4())
455
+ subdir = os.path.join('./output', unique_id)
456
+ os.makedirs(subdir, exist_ok=True)
457
+ filename = f"{unique_id}.cif"
458
+ file_path = os.path.join(subdir, filename)
459
+
460
+ # Save the uploaded file to the new location
461
+ shutil.copy(file.name, file_path)
462
+
463
+ # Call your processing functions
464
+ configs.process_success = process_data(subdir)
465
+ configs.subdir = subdir
466
+ result = infer_detect(runner, configs)
467
+ # This function should return 'Watermarked' or 'Not Watermarked'
468
+ temp_pdb_path = convert_cif_to_pdb(file_path)
469
+ if result==False:
470
+ return "Not Watermarked", temp_pdb_path
471
+ else:
472
+ return "Watermarked", temp_pdb_path
473
+
474
+
475
+
476
+ with gr.Tab("Watermark Detector"):
477
+ # First create the upload component
478
+ cif_upload = gr.File(label="Upload .cif", file_types=["..cif"])
479
+
480
+ with gr.Row():
481
+ cif_3d_view = Molecule3D(label="3D Visualization of Input", reps=reps)
482
+
483
+ # Prediction output
484
+ prediction_output = gr.Textbox(label="Prediction")
485
+
486
+ # Define the interaction
487
+ cif_upload.change(is_watermarked, inputs=cif_upload, outputs=[prediction_output, cif_3d_view])
488
+
489
+ # Example files
490
+ example_files = [
491
+ "./examples/7r6r_watermarked.cif",
492
+ "./examples/7pzb_unwatermarked.cif"
493
+ ]
494
+
495
+ gr.Examples(examples=example_files, inputs=cif_upload)
496
+
497
+
498
+
499
+
500
+
501
+
502
+ if __name__ == "__main__":
503
+
504
+ demo.launch(
505
+ server_name="0.0.0.0",
506
+ server_port=7860,
507
+ show_api=False,
508
+ share=True
509
+ )
assets/foldmark.png ADDED
assets/foldmark_head.png ADDED
configs/__init__.py ADDED
File without changes
configs/configs_base.py ADDED
@@ -0,0 +1,409 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 ByteDance and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # pylint: disable=C0114,C0301
16
+ from protenix.config.extend_types import (
17
+ GlobalConfigValue,
18
+ ListValue,
19
+ RequiredValue,
20
+ ValueMaybeNone,
21
+ )
22
+
23
+ basic_configs = {
24
+ "project": RequiredValue(str),
25
+ "run_name": RequiredValue(str),
26
+ "base_dir": RequiredValue(str),
27
+ # training
28
+ "eval_interval": RequiredValue(int),
29
+ "log_interval": RequiredValue(int),
30
+ "checkpoint_interval": -1,
31
+ "eval_first": False, # run evaluate() before training steps
32
+ "iters_to_accumulate": 1,
33
+ "eval_only": False,
34
+ "load_checkpoint_path": "",
35
+ "load_ema_checkpoint_path": "",
36
+ "load_strict": False,
37
+ "load_params_only": True,
38
+ "skip_load_step": False,
39
+ "skip_load_optimizer": False,
40
+ "skip_load_scheduler": False,
41
+ "train_confidence_only": False,
42
+ "use_wandb": True,
43
+ "wandb_id": "",
44
+ "seed": 66,
45
+ "deterministic": False,
46
+ "ema_decay": -1.0,
47
+ "eval_ema_only": False, # whether wandb only tracking ema checkpoint metrics
48
+ "ema_mutable_param_keywords": [""],
49
+ }
50
+ data_configs = {
51
+ # Data
52
+ "train_crop_size": 256,
53
+ "test_max_n_token": -1,
54
+ "train_lig_atom_rename": False,
55
+ "train_shuffle_mols": False,
56
+ "train_shuffle_sym_ids": False,
57
+ "test_lig_atom_rename": False,
58
+ "test_shuffle_mols": False,
59
+ "test_shuffle_sym_ids": False,
60
+ }
61
+ optim_configs = {
62
+ # Optim
63
+ "lr": 0.0018,
64
+ "lr_scheduler": "af3",
65
+ "warmup_steps": 10,
66
+ "max_steps": RequiredValue(int),
67
+ "min_lr_ratio": 0.1,
68
+ "decay_every_n_steps": 50000,
69
+ "grad_clip_norm": 10,
70
+ # Optim - Adam
71
+ "adam": {
72
+ "beta1": 0.9,
73
+ "beta2": 0.95,
74
+ "weight_decay": 1e-8,
75
+ "lr": GlobalConfigValue("lr"),
76
+ "use_adamw": False,
77
+ },
78
+ # Optim - LRScheduler
79
+ "af3_lr_scheduler": {
80
+ "warmup_steps": GlobalConfigValue("warmup_steps"),
81
+ "decay_every_n_steps": GlobalConfigValue("decay_every_n_steps"),
82
+ "decay_factor": 0.95,
83
+ "lr": GlobalConfigValue("lr"),
84
+ },
85
+ }
86
+ model_configs = {
87
+ # Model
88
+ "c_s": 384,
89
+ "c_z": 128,
90
+ "c_s_inputs": 449, # c_s_inputs == c_token + 32 + 32 + 1
91
+ "watermark": 32,
92
+ "c_atom": 128,
93
+ "c_atompair": 16,
94
+ "c_token": 384,
95
+ "n_blocks": 48,
96
+ "max_atoms_per_token": 24, # DNA G max_atoms = 23
97
+ "no_bins": 64,
98
+ "sigma_data": 16.0,
99
+ "diffusion_batch_size": 48,
100
+ "diffusion_chunk_size": ValueMaybeNone(4), # chunksize of diffusion_batch_size
101
+ "blocks_per_ckpt": ValueMaybeNone(
102
+ 1
103
+ ), # NOTE: Number of blocks in each activation checkpoint, if None, no checkpointing is performed.
104
+ # switch of kernels
105
+ "use_memory_efficient_kernel": False,
106
+ "use_deepspeed_evo_attention": True,
107
+ "use_flash": False,
108
+ "use_lma": False,
109
+ "use_xformer": False,
110
+ "find_unused_parameters": False,
111
+ "dtype": "bf16", # default training dtype: bf16
112
+ "loss_metrics_sparse_enable": True, # the swicth for both sparse lddt metrics and sparse bond/smooth lddt loss
113
+ "skip_amp": {
114
+ "sample_diffusion": True,
115
+ "confidence_head": True,
116
+ "sample_diffusion_training": True,
117
+ "loss": True,
118
+ },
119
+ "infer_setting": {
120
+ "chunk_size": ValueMaybeNone(
121
+ 64
122
+ ), # should set to null for normal training and small dataset eval [for efficiency]
123
+ "sample_diffusion_chunk_size": ValueMaybeNone(
124
+ 1
125
+ ), # should set to null for normal training and small dataset eval [for efficiency]
126
+ "lddt_metrics_sparse_enable": GlobalConfigValue("loss_metrics_sparse_enable"),
127
+ "lddt_metrics_chunk_size": ValueMaybeNone(
128
+ 1
129
+ ), # only works if loss_metrics_sparse_enable, can set as default 1
130
+ },
131
+ "train_noise_sampler": {
132
+ "p_mean": -1.2,
133
+ "p_std": 1.5,
134
+ "sigma_data": 16.0, # NOTE: in EDM, this is 1.0
135
+ },
136
+ "inference_noise_scheduler": {
137
+ "s_max": 160.0,
138
+ "s_min": 4e-4,
139
+ "rho": 7,
140
+ "sigma_data": 16.0, # NOTE: in EDM, this is 1.0
141
+ },
142
+ "sample_diffusion": {
143
+ "gamma0": 0.8,
144
+ "gamma_min": 1.0,
145
+ "noise_scale_lambda": 1.003,
146
+ "step_scale_eta": 1.5,
147
+ "N_step": 200,
148
+ "N_sample": 5,
149
+ "N_step_mini_rollout": 20,
150
+ "N_sample_mini_rollout": 1,
151
+ },
152
+ "model": {
153
+ "N_model_seed": 1, # for inference
154
+ "N_cycle": 4,
155
+ "input_embedder": {
156
+ "c_atom": GlobalConfigValue("c_atom"),
157
+ "c_atompair": GlobalConfigValue("c_atompair"),
158
+ "c_token": GlobalConfigValue("c_token"),
159
+ },
160
+ "relative_position_encoding": {
161
+ "r_max": 32,
162
+ "s_max": 2,
163
+ "c_z": GlobalConfigValue("c_z"),
164
+ },
165
+ "template_embedder": {
166
+ "c": 64,
167
+ "c_z": GlobalConfigValue("c_z"),
168
+ "n_blocks": 0,
169
+ "dropout": 0.25,
170
+ "blocks_per_ckpt": GlobalConfigValue("blocks_per_ckpt"),
171
+ },
172
+ "msa_module": {
173
+ "c_m": 64,
174
+ "c_z": GlobalConfigValue("c_z"),
175
+ "c_s_inputs": GlobalConfigValue("c_s_inputs"),
176
+ "n_blocks": 4,
177
+ "msa_dropout": 0.15,
178
+ "pair_dropout": 0.25,
179
+ "blocks_per_ckpt": GlobalConfigValue("blocks_per_ckpt"),
180
+ },
181
+ "pairformer": {
182
+ "n_blocks": GlobalConfigValue("n_blocks"),
183
+ "c_z": GlobalConfigValue("c_z"),
184
+ "c_s": GlobalConfigValue("c_s"),
185
+ "n_heads": 16,
186
+ "dropout": 0.25,
187
+ "blocks_per_ckpt": GlobalConfigValue("blocks_per_ckpt"),
188
+ },
189
+ "pairformer_encoder": {
190
+ "n_blocks": 6,
191
+ "c_z": GlobalConfigValue("c_z"),
192
+ "c_s": GlobalConfigValue("c_s"),
193
+ "n_heads": 16,
194
+ "dropout": 0.25,
195
+ "blocks_per_ckpt": GlobalConfigValue("blocks_per_ckpt"),
196
+ },
197
+ "pairformer_decoder": {
198
+ "n_blocks": 6,
199
+ "c_z": GlobalConfigValue("c_z"),
200
+ "c_s": GlobalConfigValue("c_s"),
201
+ "n_heads": 16,
202
+ "dropout": 0.25,
203
+ "blocks_per_ckpt": GlobalConfigValue("blocks_per_ckpt"),
204
+ },
205
+ "diffusion_module": {
206
+ "use_fine_grained_checkpoint": True,
207
+ "sigma_data": GlobalConfigValue("sigma_data"),
208
+ "c_token": 768,
209
+ "c_atom": GlobalConfigValue("c_atom"),
210
+ "c_atompair": GlobalConfigValue("c_atompair"),
211
+ "c_z": GlobalConfigValue("c_z"),
212
+ "c_s": GlobalConfigValue("c_s"),
213
+ "c_s_inputs": GlobalConfigValue("c_s_inputs"),
214
+ "initialization": {
215
+ "zero_init_condition_transition": False,
216
+ "zero_init_atom_encoder_residual_linear": False,
217
+ "he_normal_init_atom_encoder_small_mlp": False,
218
+ "he_normal_init_atom_encoder_output": False,
219
+ "glorot_init_self_attention": False,
220
+ "zero_init_adaln": True,
221
+ "zero_init_residual_condition_transition": False,
222
+ "zero_init_dit_output": True,
223
+ "zero_init_atom_decoder_linear": False,
224
+ },
225
+ "atom_encoder": {
226
+ "n_blocks": 3,
227
+ "n_heads": 4,
228
+ },
229
+ "transformer": {
230
+ "n_blocks": 24,
231
+ "n_heads": 16,
232
+ },
233
+ "atom_decoder": {
234
+ "n_blocks": 3,
235
+ "n_heads": 4,
236
+ },
237
+ "blocks_per_ckpt": GlobalConfigValue("blocks_per_ckpt"),
238
+ },
239
+ "diffusion_module_encoder_decoder": {
240
+ "use_fine_grained_checkpoint": True,
241
+ "sigma_data": GlobalConfigValue("sigma_data"),
242
+ "c_token": 768,
243
+ "c_atom": GlobalConfigValue("c_atom"),
244
+ "c_atompair": GlobalConfigValue("c_atompair"),
245
+ "c_z": GlobalConfigValue("c_z"),
246
+ "c_s": GlobalConfigValue("c_s"),
247
+ "c_s_inputs": GlobalConfigValue("c_s_inputs"),
248
+ "watermark": GlobalConfigValue("watermark"),
249
+ "initialization": {
250
+ "zero_init_condition_transition": False,
251
+ "zero_init_atom_encoder_residual_linear": False,
252
+ "he_normal_init_atom_encoder_small_mlp": False,
253
+ "he_normal_init_atom_encoder_output": False,
254
+ "glorot_init_self_attention": False,
255
+ "zero_init_adaln": True,
256
+ "zero_init_residual_condition_transition": False,
257
+ "zero_init_dit_output": True,
258
+ "zero_init_atom_decoder_linear": False,
259
+ },
260
+ "atom_encoder": {
261
+ "n_blocks": 3,
262
+ "n_heads": 4,
263
+ },
264
+ "transformer": {
265
+ "n_blocks": 6,
266
+ "n_heads": 16,
267
+ },
268
+ "atom_decoder": {
269
+ "n_blocks": 3,
270
+ "n_heads": 4,
271
+ },
272
+ "blocks_per_ckpt": GlobalConfigValue("blocks_per_ckpt"),
273
+ },
274
+ "confidence_head": {
275
+ "c_z": GlobalConfigValue("c_z"),
276
+ "c_s": GlobalConfigValue("c_s"),
277
+ "c_s_inputs": GlobalConfigValue("c_s_inputs"),
278
+ "n_blocks": 4,
279
+ "max_atoms_per_token": GlobalConfigValue("max_atoms_per_token"),
280
+ "pairformer_dropout": 0.0,
281
+ "blocks_per_ckpt": GlobalConfigValue("blocks_per_ckpt"),
282
+ "distance_bin_start": 3.25,
283
+ "distance_bin_end": 52.0,
284
+ "distance_bin_step": 1.25,
285
+ "stop_gradient": True,
286
+ },
287
+ "distogram_head": {
288
+ "c_z": GlobalConfigValue("c_z"),
289
+ "no_bins": GlobalConfigValue("no_bins"),
290
+ },
291
+ },
292
+ }
293
+ perm_configs = {
294
+ # Chain and Atom Permutation
295
+ "chain_permutation": {
296
+ "train": {
297
+ "mini_rollout": True,
298
+ "diffusion_sample": False,
299
+ },
300
+ "test": {
301
+ "diffusion_sample": True,
302
+ },
303
+ "permute_by_pocket": True,
304
+ "configs": {
305
+ "use_center_rmsd": False,
306
+ "find_gt_anchor_first": False,
307
+ "accept_it_as_it_is": False,
308
+ "enumerate_all_anchor_pairs": False,
309
+ "selection_metric": "aligned_rmsd",
310
+ },
311
+ },
312
+ "atom_permutation": {
313
+ "train": {
314
+ "mini_rollout": True,
315
+ "diffusion_sample": False,
316
+ },
317
+ "test": {
318
+ "diffusion_sample": True,
319
+ },
320
+ "permute_by_pocket": True,
321
+ "global_align_wo_symmetric_atom": False,
322
+ },
323
+ }
324
+ loss_configs = {
325
+ "loss": {
326
+ "diffusion_lddt_chunk_size": ValueMaybeNone(1),
327
+ "diffusion_bond_chunk_size": ValueMaybeNone(1),
328
+ "diffusion_chunk_size_outer": ValueMaybeNone(-1),
329
+ "diffusion_sparse_loss_enable": GlobalConfigValue("loss_metrics_sparse_enable"),
330
+ "diffusion_lddt_loss_dense": True, # only set true in initial training for training speed
331
+ "resolution": {"min": 0.1, "max": 4.0},
332
+ "weight": {
333
+ "alpha_confidence": 1e-4,
334
+ "alpha_pae": 0.0, # or 1 in finetuning stage 3
335
+ "alpha_except_pae": 1.0,
336
+ "alpha_diffusion": 4.0,
337
+ "alpha_distogram": 3e-2,
338
+ "alpha_bond": 0.0, # or 1 in finetuning stages
339
+ "smooth_lddt": 1.0, # or 0 in finetuning stages
340
+ "watermark": 1.0,
341
+ },
342
+ "plddt": {
343
+ "min_bin": 0,
344
+ "max_bin": 1.0,
345
+ "no_bins": 50,
346
+ "normalize": True,
347
+ "eps": 1e-6,
348
+ },
349
+ "pde": {
350
+ "min_bin": 0,
351
+ "max_bin": 32,
352
+ "no_bins": 64,
353
+ "eps": 1e-6,
354
+ },
355
+ "resolved": {
356
+ "eps": 1e-6,
357
+ },
358
+ "pae": {
359
+ "min_bin": 0,
360
+ "max_bin": 32,
361
+ "no_bins": 64,
362
+ "eps": 1e-6,
363
+ },
364
+ "diffusion": {
365
+ "mse": {
366
+ "weight_mse": 1 / 3,
367
+ "weight_dna": 5.0,
368
+ "weight_rna": 5.0,
369
+ "weight_ligand": 10.0,
370
+ "eps": 1e-6,
371
+ },
372
+ "bond": {
373
+ "eps": 1e-6,
374
+ },
375
+ "smooth_lddt": {
376
+ "eps": 1e-6,
377
+ },
378
+ },
379
+ "watermark": {
380
+ "eps": 1e-6,
381
+ },
382
+ "distogram": {
383
+ "min_bin": 2.3125,
384
+ "max_bin": 21.6875,
385
+ "no_bins": 64,
386
+ "eps": 1e-6,
387
+ },
388
+ },
389
+ "metrics": {
390
+ "lddt": {
391
+ "eps": 1e-6,
392
+ },
393
+ "complex_ranker_keys": ListValue(["plddt", "gpde", "ranking_score"]),
394
+ "chain_ranker_keys": ListValue(["chain_ptm", "chain_plddt"]),
395
+ "interface_ranker_keys": ListValue(
396
+ ["chain_pair_iptm", "chain_pair_iptm_global", "chain_pair_plddt"]
397
+ ),
398
+ "clash": {"af3_clash_threshold": 1.1, "vdw_clash_threshold": 0.75},
399
+ },
400
+ }
401
+
402
+ configs = {
403
+ **basic_configs,
404
+ **data_configs,
405
+ **optim_configs,
406
+ **model_configs,
407
+ **perm_configs,
408
+ **loss_configs,
409
+ }
configs/configs_data.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 ByteDance and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # pylint: disable=C0114,C0301
16
+ import os
17
+ from copy import deepcopy
18
+
19
+ from protenix.config.extend_types import GlobalConfigValue, ListValue
20
+
21
+ default_test_configs = {
22
+ "sampler_configs": {
23
+ "sampler_type": "uniform",
24
+ },
25
+ "cropping_configs": {
26
+ "method_weights": [
27
+ 0.0, # ContiguousCropping
28
+ 0.0, # SpatialCropping
29
+ 1.0, # SpatialInterfaceCropping
30
+ ],
31
+ "crop_size": -1,
32
+ },
33
+ "lig_atom_rename": GlobalConfigValue("test_lig_atom_rename"),
34
+ "shuffle_mols": GlobalConfigValue("test_shuffle_mols"),
35
+ "shuffle_sym_ids": GlobalConfigValue("test_shuffle_sym_ids"),
36
+ }
37
+
38
+ default_weighted_pdb_configs = {
39
+ "sampler_configs": {
40
+ "sampler_type": "weighted",
41
+ "beta_dict": {
42
+ "chain": 0.5,
43
+ "interface": 1,
44
+ },
45
+ "alpha_dict": {
46
+ "prot": 3,
47
+ "nuc": 3,
48
+ "ligand": 1,
49
+ },
50
+ "force_recompute_weight": True,
51
+ },
52
+ "cropping_configs": {
53
+ "method_weights": ListValue([0.2, 0.4, 0.4]),
54
+ "crop_size": GlobalConfigValue("train_crop_size"),
55
+ },
56
+ "sample_weight": 0.5,
57
+ "limits": -1,
58
+ "lig_atom_rename": GlobalConfigValue("train_lig_atom_rename"),
59
+ "shuffle_mols": GlobalConfigValue("train_shuffle_mols"),
60
+ "shuffle_sym_ids": GlobalConfigValue("train_shuffle_sym_ids"),
61
+ }
62
+
63
+ DATA_ROOT_DIR = "./"
64
+
65
+ # Use CCD cache created by scripts/gen_ccd_cache.py priority. (without date in filename)
66
+ # See: docs/prepare_data.md
67
+ # CCD_COMPONENTS_FILE_PATH = os.path.join(DATA_ROOT_DIR, "components.cif")
68
+ # CCD_COMPONENTS_RDKIT_MOL_FILE_PATH = os.path.join(
69
+ # DATA_ROOT_DIR, "components.cif.rdkit_mol.pkl"
70
+ # )
71
+
72
+ # if (not os.path.exists(CCD_COMPONENTS_FILE_PATH)) or (
73
+ # not os.path.exists(CCD_COMPONENTS_RDKIT_MOL_FILE_PATH)
74
+ # ):
75
+ CCD_COMPONENTS_FILE_PATH = os.path.join(DATA_ROOT_DIR, "components.v20240608.cif")
76
+ CCD_COMPONENTS_RDKIT_MOL_FILE_PATH = os.path.join(
77
+ DATA_ROOT_DIR, "components.v20240608.cif.rdkit_mol.pkl"
78
+ )
79
+
80
+
81
+ # This is a patch in inference stage for users that do not have root permission.
82
+ # If you run
83
+ # ```
84
+ # bash inference_demo.sh
85
+ # ```
86
+ # or
87
+ # ```
88
+ # protenix predict --input examples/example.json --out_dir ./output
89
+ # ````
90
+ # The checkpoint and the data cache will be downloaded to the current code directory.
91
+ if (not os.path.exists(CCD_COMPONENTS_FILE_PATH)) or (
92
+ not os.path.exists(CCD_COMPONENTS_RDKIT_MOL_FILE_PATH)
93
+ ):
94
+ print("Try to find the ccd cache data in the code directory for inference.")
95
+ current_file_path = os.path.abspath(__file__)
96
+ current_directory = os.path.dirname(current_file_path)
97
+ code_directory = os.path.dirname(current_directory)
98
+
99
+ data_cache_dir = os.path.join(code_directory, "release_data/ccd_cache")
100
+ CCD_COMPONENTS_FILE_PATH = os.path.join(data_cache_dir, "components.cif")
101
+ CCD_COMPONENTS_RDKIT_MOL_FILE_PATH = os.path.join(
102
+ data_cache_dir, "components.cif.rdkit_mol.pkl"
103
+ )
104
+ if (not os.path.exists(CCD_COMPONENTS_FILE_PATH)) or (
105
+ not os.path.exists(CCD_COMPONENTS_RDKIT_MOL_FILE_PATH)
106
+ ):
107
+
108
+ CCD_COMPONENTS_FILE_PATH = os.path.join(
109
+ data_cache_dir, "components.v20240608.cif"
110
+ )
111
+ CCD_COMPONENTS_RDKIT_MOL_FILE_PATH = os.path.join(
112
+ data_cache_dir, "components.v20240608.cif.rdkit_mol.pkl"
113
+ )
114
+
115
+ data_configs = {
116
+ "num_dl_workers": 16,
117
+ "epoch_size": 10000,
118
+ "train_ref_pos_augment": True,
119
+ "test_ref_pos_augment": True,
120
+ "train_sets": ListValue(["weightedPDB_before2109_wopb_nometalc_0925"]),
121
+ "train_sampler": {
122
+ "train_sample_weights": ListValue([1.0]),
123
+ "sampler_type": "weighted",
124
+ },
125
+ "test_sets": ListValue(["recentPDB_1536_sample384_0925"]),
126
+ "weightedPDB_before2109_wopb_nometalc_0925": {
127
+ "base_info": {
128
+ "mmcif_dir": os.path.join(DATA_ROOT_DIR, "mmcif"),
129
+ "bioassembly_dict_dir": os.path.join(DATA_ROOT_DIR, "mmcif_bioassembly"),
130
+ "indices_fpath": os.path.join(
131
+ DATA_ROOT_DIR,
132
+ "indices/weightedPDB_indices_before_2021-09-30_wo_posebusters_resolution_below_9.csv.gz",
133
+ ),
134
+ "pdb_list": "",
135
+ "random_sample_if_failed": True,
136
+ "max_n_token": -1, # can be used for removing data with too many tokens.
137
+ "use_reference_chains_only": False,
138
+ "exclusion": { # do not sample the data based on ions.
139
+ "mol_1_type": ListValue(["ions"]),
140
+ "mol_2_type": ListValue(["ions"]),
141
+ },
142
+ },
143
+ **deepcopy(default_weighted_pdb_configs),
144
+ },
145
+ "recentPDB_1536_sample384_0925": {
146
+ "base_info": {
147
+ "mmcif_dir": os.path.join(DATA_ROOT_DIR, "mmcif"),
148
+ "bioassembly_dict_dir": os.path.join(
149
+ DATA_ROOT_DIR, "recentPDB_bioassembly"
150
+ ),
151
+ "indices_fpath": os.path.join(
152
+ DATA_ROOT_DIR, "indices/recentPDB_low_homology_maxtoken1536.csv"
153
+ ),
154
+ "pdb_list": os.path.join(
155
+ DATA_ROOT_DIR,
156
+ "indices/recentPDB_low_homology_maxtoken1024_sample384_pdb_id.txt",
157
+ ),
158
+ "max_n_token": GlobalConfigValue("test_max_n_token"), # filter data
159
+ "sort_by_n_token": False,
160
+ "group_by_pdb_id": True,
161
+ "find_eval_chain_interface": True,
162
+ },
163
+ **deepcopy(default_test_configs),
164
+ },
165
+ "posebusters_0925": {
166
+ "base_info": {
167
+ "mmcif_dir": os.path.join(DATA_ROOT_DIR, "posebusters_mmcif"),
168
+ "bioassembly_dict_dir": os.path.join(
169
+ DATA_ROOT_DIR, "posebusters_bioassembly"
170
+ ),
171
+ "indices_fpath": os.path.join(
172
+ DATA_ROOT_DIR, "indices/posebusters_indices_mainchain_interface.csv"
173
+ ),
174
+ "pdb_list": "",
175
+ "find_pocket": True,
176
+ "find_all_pockets": False,
177
+ "max_n_token": GlobalConfigValue("test_max_n_token"), # filter data
178
+ },
179
+ **deepcopy(default_test_configs),
180
+ },
181
+ "msa": {
182
+ "enable": True,
183
+ "enable_rna_msa": False,
184
+ "prot": {
185
+ "pairing_db": "uniref100",
186
+ "non_pairing_db": "mmseqs_other",
187
+ "pdb_mmseqs_dir": os.path.join(DATA_ROOT_DIR, "mmcif_msa"),
188
+ "seq_to_pdb_idx_path": os.path.join(DATA_ROOT_DIR, "seq_to_pdb_index.json"),
189
+ "indexing_method": "sequence",
190
+ },
191
+ "rna": {
192
+ "seq_to_pdb_idx_path": "",
193
+ "rna_msa_dir": "",
194
+ "indexing_method": "sequence",
195
+ },
196
+ "strategy": "random",
197
+ "merge_method": "dense_max",
198
+ "min_size": {
199
+ "train": 1,
200
+ "test": 2048,
201
+ },
202
+ "max_size": {
203
+ "train": 16384,
204
+ "test": 16384,
205
+ },
206
+ "sample_cutoff": {
207
+ "train": 2048,
208
+ "test": 2048,
209
+ },
210
+ },
211
+ "template": {
212
+ "enable": False,
213
+ },
214
+ "ccd_components_file": CCD_COMPONENTS_FILE_PATH,
215
+ "ccd_components_rdkit_mol_file": CCD_COMPONENTS_RDKIT_MOL_FILE_PATH,
216
+ }
configs/configs_inference.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 ByteDance and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # pylint: disable=C0114
16
+ import os
17
+
18
+ from protenix.config.extend_types import ListValue, RequiredValue
19
+
20
+ code_directory = '/n/netscratch/mzitnik_lab/Lab/zzx/'
21
+ # The model will be download to the following dir if not exists:
22
+ # "./release_data/checkpoint/model_v0.2.0.pt"
23
+ inference_configs = {
24
+ "seeds": ListValue([101]),
25
+ "dump_dir": "./output",
26
+ "need_atom_confidence": True,
27
+ "sorted_by_ranking_score": True,
28
+ "input_json_path": RequiredValue(str),
29
+ "num_workers": 16,
30
+ "use_msa": True,
31
+ }
cutlass ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit f7b19de32c5d1f3cedfc735c2849f12b537522ee
dataset/7pzb.pkl.gz ADDED
Binary file (136 kB). View file
 
dataset/7pzb_unwatermarked.cif ADDED
The diff for this file is too large to render. See raw diff
 
dataset/output.csv ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "entity_1_id","chain_1_id","mol_1_type","cluster_1_id","entity_2_id","chain_2_id","mol_2_type","cluster_2_id","cluster_id","pdb_id","assembly_id","release_date","num_tokens","num_prot_chains","resolution","type","mol_type_group","sub_mol_1_type","sub_mol_2_type","eval_type"
2
+ "1","A","prot","NotInClusterTxt","","","","","NotInClusterTxt","7pzb","","9999-12-31",600,2,-1.0,"chain","intra_prot","prot","","intra_prot"
3
+ "1","B","prot","NotInClusterTxt","","","","","NotInClusterTxt","7pzb","","9999-12-31",600,2,-1.0,"chain","intra_prot","prot","","intra_prot"
4
+ "2","C","nuc","CTAGGTAACATTACTCGCG","","","","","CTAGGTAACATTACTCGCG","7pzb","","9999-12-31",600,2,-1.0,"chain","intra_nuc","dna","","intra_dna"
5
+ "2","D","nuc","CTAGGTAACATTACTCGCG","","","","","CTAGGTAACATTACTCGCG","7pzb","","9999-12-31",600,2,-1.0,"chain","intra_nuc","dna","","intra_dna"
6
+ "3","E","nuc","GCGAGTAATGTTAC","","","","","GCGAGTAATGTTAC","7pzb","","9999-12-31",600,2,-1.0,"chain","intra_nuc","dna","","intra_dna"
7
+ "3","F","nuc","GCGAGTAATGTTAC","","","","","GCGAGTAATGTTAC","7pzb","","9999-12-31",600,2,-1.0,"chain","intra_nuc","dna","","intra_dna"
8
+ "4","G","ligand","PCG","","","","","PCG","7pzb","","9999-12-31",600,2,-1.0,"chain","intra_ligand","non_bonded_ligand","","intra_ligand"
9
+ "4","H","ligand","PCG","","","","","PCG","7pzb","","9999-12-31",600,2,-1.0,"chain","intra_ligand","non_bonded_ligand","","intra_ligand"
10
+ "1","A","prot","NotInClusterTxt","1","B","prot","NotInClusterTxt","NotInClusterTxt:NotInClusterTxt","7pzb","","9999-12-31",600,2,-1.0,"interface","prot_prot","prot","prot","prot_prot"
11
+ "1","A","prot","NotInClusterTxt","2","C","nuc","CTAGGTAACATTACTCGCG","CTAGGTAACATTACTCGCG:NotInClusterTxt","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_prot","prot","dna","dna_prot"
12
+ "1","A","prot","NotInClusterTxt","3","E","nuc","GCGAGTAATGTTAC","GCGAGTAATGTTAC:NotInClusterTxt","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_prot","prot","dna","dna_prot"
13
+ "1","A","prot","NotInClusterTxt","4","G","ligand","PCG","NotInClusterTxt:PCG","7pzb","","9999-12-31",600,2,-1.0,"interface","ligand_prot","prot","non_bonded_ligand","ligand_prot"
14
+ "1","A","prot","NotInClusterTxt","4","H","ligand","PCG","NotInClusterTxt:PCG","7pzb","","9999-12-31",600,2,-1.0,"interface","ligand_prot","prot","non_bonded_ligand","ligand_prot"
15
+ "1","B","prot","NotInClusterTxt","2","D","nuc","CTAGGTAACATTACTCGCG","CTAGGTAACATTACTCGCG:NotInClusterTxt","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_prot","prot","dna","dna_prot"
16
+ "1","B","prot","NotInClusterTxt","3","F","nuc","GCGAGTAATGTTAC","GCGAGTAATGTTAC:NotInClusterTxt","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_prot","prot","dna","dna_prot"
17
+ "1","B","prot","NotInClusterTxt","4","G","ligand","PCG","NotInClusterTxt:PCG","7pzb","","9999-12-31",600,2,-1.0,"interface","ligand_prot","prot","non_bonded_ligand","ligand_prot"
18
+ "1","B","prot","NotInClusterTxt","4","H","ligand","PCG","NotInClusterTxt:PCG","7pzb","","9999-12-31",600,2,-1.0,"interface","ligand_prot","prot","non_bonded_ligand","ligand_prot"
19
+ "2","C","nuc","CTAGGTAACATTACTCGCG","2","D","nuc","CTAGGTAACATTACTCGCG","CTAGGTAACATTACTCGCG:CTAGGTAACATTACTCGCG","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_nuc","dna","dna","nuc_nuc"
20
+ "2","C","nuc","CTAGGTAACATTACTCGCG","3","E","nuc","GCGAGTAATGTTAC","CTAGGTAACATTACTCGCG:GCGAGTAATGTTAC","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_nuc","dna","dna","nuc_nuc"
21
+ "2","C","nuc","CTAGGTAACATTACTCGCG","3","F","nuc","GCGAGTAATGTTAC","CTAGGTAACATTACTCGCG:GCGAGTAATGTTAC","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_nuc","dna","dna","nuc_nuc"
22
+ "2","D","nuc","CTAGGTAACATTACTCGCG","3","E","nuc","GCGAGTAATGTTAC","CTAGGTAACATTACTCGCG:GCGAGTAATGTTAC","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_nuc","dna","dna","nuc_nuc"
23
+ "2","D","nuc","CTAGGTAACATTACTCGCG","3","F","nuc","GCGAGTAATGTTAC","CTAGGTAACATTACTCGCG:GCGAGTAATGTTAC","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_nuc","dna","dna","nuc_nuc"
docs/colabfold_compatiable_msa.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Using Local Colabfold_search to Generate Protenix-Compatible MSA
2
+
3
+ Colabfold provides an easy-to-use and efficient MSA search pipeline that's ideal for generating MSAs during inference. Unfortunately, this pipeline cannot fully match Protenix's MSA search process designed for training, as the current `colabfold_search` omits species information in the MSA, preventing correct pairing by Protenix's data pipeline. To address this issue, we provide the `scripts/colabfold_msa.py` script, which post-processes `colabfold_search` results by adding pseudo taxonomy IDs to paired MSAs to match Protenix's data pipeline.
4
+
5
+ Here's an example:
6
+ ```bash
7
+ python3 scripts/colabfold_msa.py examples/dimer.fasta <path/to/colabfold_db> dimer_colabfold_msa --db1 uniref30_2103_db --db3 colabfold_envdb_202108_db --mmseqs_path <path/to/mmseqs>
8
+ ```
9
+
10
+ #### Configuring Colabfold_search
11
+ Installation of colabfold and mmseqs2 is required.
12
+
13
+ colabfold can be installed with: `pip install colabfold[alphafold]`.
14
+
15
+ Build MMseqs2 from source:
16
+
17
+ ```bash
18
+ wget https://github.com/soedinglab/MMseqs2/archive/refs/tags/16-747c6.tar.gz
19
+ tar xzf 16-747c6.tar.gz
20
+ cd MMseqs2-16-747c6/
21
+ mkdir build && cd build
22
+ cmake -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_INSTALL_PREFIX=. ..
23
+ make -j8
24
+ make install
25
+ ```
26
+
27
+ Download ColabFold database:
28
+ ```bash
29
+ git clone https://github.com/sokrypton/ColabFold.git
30
+ cd ColabFold
31
+ # Configure database:
32
+ MMSEQS_NO_INDEX=1 ./setup_databases.sh <path/to/colabfold_db>
33
+ ```
docs/docker_installation.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Run with Docker
2
+
3
+ 1. Install Docker (with GPU Support)
4
+
5
+ Ensure that Docker is installed and configured with GPU support. Follow these steps:
6
+ * Install [Docker](https://www.docker.com/) if not already installed.
7
+ * Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) to enable GPU support.
8
+ * Verify the setup with:
9
+ ```bash
10
+ docker run --rm --gpus all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi
11
+ ```
12
+
13
+ 2. Pull the Docker image, which was built based on this [Dockerfile](../Dockerfile)
14
+ ```bash
15
+ docker pull ai4s-cn-beijing.cr.volces.com/infra/protenix:v0.0.1
16
+ ```
17
+
18
+ 3. Clone this repository and `cd` into it
19
+ ```bash
20
+ git clone https://github.com/bytedance/protenix.git
21
+ cd ./protenix
22
+ pip install -e .
23
+ ```
24
+
25
+ 4. Run Docker with an interactive shell
26
+ ```bash
27
+ docker run --gpus all -it -v $(pwd):/workspace -v /dev/shm:/dev/shm ai4s-cn-beijing.cr.volces.com/infra/protenix:v0.0.1 /bin/bash
28
+ ```
29
+
30
+ After running above commands, you’ll be inside the container’s environment and can execute commands as you would on a normal Linux terminal.
docs/infer_json_format.md ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Format of the input JSON file
2
+ The JSON file format closely resembles that used by the AlphaFold Server, with a few key differences:
3
+
4
+ 1. There are no restrictions on the types of ligands, ions, and modifications, whereas the AlphaFold Server currently supports only a limited set of specific CCD codes.
5
+ 2. Users can specify bonds between entities, such as covalent bonds between ligands and polymers.
6
+ 3. It supports inputting ligands in the form of SMILES strings or molecular structure files.
7
+ 4. Ligands composed of multiple CCD codes can be treated as a single entity. This feature is useful for representing glycans, for example, "NAG-NAG".
8
+ 5. The "glycans" field is no longer supported. Glycans can be fully represented by inputting multiple ligands with defined bonding or by providing their SMILES strings.
9
+
10
+ Here is an overview of the JSON file format:
11
+ ```json
12
+ [
13
+ {
14
+ "name": "Test Fold Job Number One",
15
+ "sequences": [...],
16
+ "covalent_bonds": [...]
17
+ }
18
+ ]
19
+ ```
20
+ The JSON file consists of a list of dictionaries, where each dictionary represents a set of sequences you want to model.
21
+ Even if you are modeling only one set of sequences, the top-level structure should still be a list.
22
+
23
+ Each dictionary contains the following three keys:
24
+ * `name`: A string representing the name of the inference job.
25
+ * `sequences`: A list of dictionaries that describe the entities (e.g., proteins, DNA, RNA, small molecules, and ions) involved in the inference.
26
+ * `covalent_bonds`: An optional list of dictionaries that define the covalent bonds between atoms from different entities.
27
+
28
+ Details of `sequences` and `covalent_bonds` are provided below.
29
+
30
+ #### sequences
31
+ There are 5 kinds of supported sequences:
32
+ * `proteinChain` – used for proteins
33
+ * `dnaSequence` – used for DNA (single strand)
34
+ * `rnaSequence` – used for RNA (single strand)
35
+ * `ligand` – used for ligands
36
+ * `ion` – used for ions
37
+
38
+ ##### proteinChain
39
+ ```json
40
+ {
41
+ "proteinChain": {
42
+ "sequence": "PREACHINGS",
43
+ "count": 1,
44
+ "modifications": [
45
+ {
46
+ "ptmType": "CCD_HY3",
47
+ "ptmPosition": 1,
48
+ },
49
+ {
50
+ "ptmType": "CCD_P1L",
51
+ "ptmPosition": 5
52
+ }
53
+ ],
54
+ "msa":{
55
+ "precomputed_msa_dir": "./precomputed_msa",
56
+ "pairing_db": "uniref100",
57
+ },
58
+ },
59
+ }
60
+ ```
61
+ * `sequence`: A string representating a protein sequence, which can only contain the 20 standard amino acid type and X (UNK) for unknown residues.
62
+ * `count`: The number of copies of this protein chain (integer).
63
+ * `modifications`: An optional list of dictionaries that describe post-translational modifications.
64
+
65
+ * `ptmType`: A string containing CCD code of the modification.
66
+ * `ptmPosition`: The position of the modified amino acid (integer).
67
+ * `msa`: A dictionary containing options for Multiple Sequence Alignment (MSA). **If you want to search MSAs using our inference pipeline, you should not set this field or set it to an empty dictionary**:
68
+ * `precomputed_msa_dir`: The path to a directory containing precomputed MSAs. This directory should contain two specific files: "pairing.a3m" for MSAs used for pairing, and "non_pairing.a3m" for non-pairing MSAs.
69
+ * `pairing_db`: The name of the genomic database used for pairing MSAs. The default is "uniref100" and you should not change it. In fact, The MSA search against the UniRef30, a clustered version of the UniRef100.
70
+
71
+ ##### dnaSequence
72
+ ```json
73
+ {
74
+ "dnaSequence": {
75
+ "sequence": "GATTACA",
76
+ "modifications": [
77
+ {
78
+ "modificationType": "CCD_6OG",
79
+ "basePosition": 1
80
+ },
81
+ {
82
+ "modificationType": "CCD_6MA",
83
+ "basePosition": 2
84
+ }
85
+ ],
86
+ "count": 1
87
+ }
88
+ },
89
+ {
90
+ "dnaSequence": {
91
+ "sequence": "TGTAATC",
92
+ "count": 1
93
+ }
94
+ }
95
+ ```
96
+ Please note that the `dnaSequence` type refers to a single stranded DNA sequence. If you
97
+ wish to model double-stranded DNA, please add a second `dnaSequence` entry representing
98
+ the sequence of the reverse complement strand.
99
+
100
+ * `sequence`: A string containing a DNA sequence; only letters A, T, G, C and N (unknown ribonucleotide) are allowed.
101
+ * `count`: The number of copies of this DNA chain (integer).
102
+ * `modifications`: An optional list of dictionaries describing of
103
+ the DNA chemical modifications:
104
+ * `modificationType`: A string containing CCD code of modification.
105
+ * `basePosition`: A position of the modified nucleotide (integer).
106
+
107
+ ##### rnaSequence
108
+ ```json
109
+ {
110
+ "rnaSequence": {
111
+ "sequence": "GUAC",
112
+ "modifications": [
113
+ {
114
+ "modificationType": "CCD_2MG",
115
+ "basePosition": 1
116
+ },
117
+ {
118
+ "modificationType": "CCD_5MC",
119
+ "basePosition": 4
120
+ }
121
+ ],
122
+ "count": 1
123
+ }
124
+ }
125
+ ```
126
+ * `sequence`: A string representing the RNA sequence (single-stranded); only letters A, U, G, C and N (unknown nucleotides) are allowed.
127
+ * `count`: The number of copies of this RNA chain (integer).
128
+ * `modifications`: An optional list of dictionaries describing RNA chemical modifications:
129
+ * `modificationType`: A string containing
130
+ CCD code of modification.
131
+ * `basePosition`: The position of the modified nucleotide (integer).
132
+
133
+ ##### ligand
134
+ ```json
135
+ {
136
+ "ligand": {
137
+ "ligand": "CCD_ATP",
138
+ "count": 1
139
+ }
140
+ },
141
+ {
142
+ "ligand": {
143
+ "ligand": "FILE_your_file_path/atp.sdf",
144
+ "count": 1
145
+ }
146
+ },
147
+ {
148
+ "ligand": {
149
+ "ligand": "Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O",
150
+ "count": 1
151
+ }
152
+ }
153
+ ```
154
+ * `ligand`: A string representing the ligand. `ligand` can be one of the following three:
155
+ * A string containing the CCD code of the ligand, prefixed with "CCD_". For glycans or similar structures, this can be a concatenation of multiple CCD codes, for example, "CCD_NAG_BMA_BGC".
156
+ * A molecular SMILES string representing the ligand.
157
+ * A path to a molecular structure file, prefixed with "FILE_", where the supported file formats are PDB, SDF, MOL, and MOL2. The file must include the 3D conformation of the molecule.
158
+
159
+ * `count` is the number of copies of this ligand (integer).
160
+
161
+ ##### ion
162
+ ```json
163
+ {
164
+ "ion": {
165
+ "ion": "MG",
166
+ "count": 2
167
+ }
168
+ },
169
+ {
170
+ "ion": {
171
+ "ion": "NA",
172
+ "count": 3
173
+ }
174
+ }
175
+ ```
176
+ * `ion`: A string containing the CCD code for the ion. Note that, unlike ligands, the ion code **does not** start with "CCD_".
177
+ * `count`: The number of copies of this ion (integer).
178
+
179
+ #### covalent_bonds
180
+ ```json
181
+ "covalent_bonds": [
182
+ {
183
+ "entity1": "2",
184
+ "copy1": 1,
185
+ "position1": "2",
186
+ "atom1": "N6",
187
+ "entity2": "3",
188
+ "copy2": 1,
189
+ "position2": "1",
190
+ "atom2": "C1"
191
+ }
192
+ ]
193
+ ```
194
+
195
+ The `covalent_bonds` section specifies covalent bonds between a polymer and a ligand, or between two ligands.
196
+ To define a covalent bond, two atoms involved in the bond must be identified. The following fields are used:
197
+
198
+ * `entity1`, `entity2`: The entity numbers for the two atoms involved in the bond.
199
+ The entity number corresponds to the order in which the entity appears in the `sequences` list, starting from 1.
200
+ * `copy2`, `copy2`: The copy index (starting from 1) of the `left_entity` and `right_entity`, respectively. These fields are optional, but if specified, both `left_copy` and `right_copy` must be filled simultaneously or left empty at the same time. If neither field is provided, a bond will be created between all pairs of copies of the two entities. For example, if both entity1 and entity2 have two copies, a bond will be formed between entity1.copy1 and entity2.copy1, as well as between entity1.copy2 and entity2.copy2. In this case, the number of copies for both entities must be equal.
201
+ * `position1`, `position2` - The position of the residue (or ligand part) within the entity.
202
+ The position value starts at 1 and can vary based on the type of entity:
203
+ * For **polymers** (e.g., proteins, DNA, RNA), the position corresponds to the location of the residue in the sequence.
204
+ * For **ligands** composed of multiple CCD codes, the position refers to the serial number of the CCD code.
205
+ * For **single CCD code ligands**, or ligands defined by **SMILES** or **FILE**, the position is always set to 1.
206
+
207
+ * `atom1`, `atom2` - The atom names (or atom indices) of the atoms to be bonded.
208
+ * If the entity is a polymer or described by a CCD code, the atom names are consistent with those defined in the CCD.
209
+ * If the entity is a ligand defined by SMILES or a FILE, atoms can be specified by their atom index. The atom index corresponds to the position of the atom in the file or in the SMILES string, starting from 0.
210
+
211
+ Deprecation Notice: The previous fields such as old `left_entity`, `right_entity`, and other fields starting with `left`/`right` have been updated to use `1` and `2` to denote the two atoms forming a bond. The current code still supports the old field names, but they may be deprecated in the future, leaving only the new field names.
212
+
213
+ ### Format of the model output
214
+ The outputs will be saved in the directory provided via the `--dump_dir` flag in the inference script. The outputs include the predicted structures in CIF format and the confidence in JSON files. The `--dump_dir` will have the following structure:
215
+
216
+ ```bash
217
+ ├── <name>/ # specified in the input JSON file
218
+ │ ├── <seed>/ # specified via the `--seeds` flag in the inference script
219
+ │ │ ├── <name>_<seed>_sample_0.cif
220
+ │ │ ├── <name>_<seed>_summary_confidence_sample_0.json
221
+ │ │ └──... # the number of samples in each seed is specified via `--sample_diffusion.N_sample ` flag in the inference script
222
+ │ └──...
223
+ └── ...
224
+ ```
225
+
226
+ The contents of each output file are as follows:
227
+ - `<name>_<seed>_sample_*.cif` - A CIF format text file containing the predicted structure
228
+ - `<name>_<seed>_summary_confidence_sample_*.json` - A JSON format text file containing various confidence scores for assessing the reliability of predictions. Here’s a description of each score:
229
+
230
+ - `plddt` - Predicted Local Distance Difference Test (pLDDT) score. Higher values indicate greater confidence.
231
+ - `gpde` - Globl Predicted Distance Error (PDE) score. Lower values indicate greater confidence.
232
+ - `ptm` - Predicted TM-score (pTM). Values closer to 1 indicate greater confidence.
233
+ - `iptm` - Interface Predicted TM-score, used to estimate the accuracy of interfaces between chains. Values closer to 1 indicate greater confidence.
234
+ - `chain_ptm` - pTM score calculated for individual chains with the shape of [N_chains], indicating the reliability of specific chain structure.
235
+ - `chain_pair_iptm`: Pairwise interface pTM scores between chain pairs with the shape of [N_chains, N_chains], indicating the reliability of specific chain-chain interactions.
236
+ - `chain_iptm` - Average ipTM scores for each chain with the shape of [N_chains].
237
+ - `chain_pair_iptm_global` - Averge `chain_iptm` between chain pairs with the shape of [N_chains, N_chains]. For interface containing a small molecule, ion, or bonded ligand chain (named `C*`), this value is equal to the `chain_iptm` value of `C*`.
238
+ - `chain_plddt` - pLDDT scores calculated for individual chains with the shape of [N_chains].
239
+ - `chain_pair_plddt` - Pairwise pLDDT scores for chain pairs with the shape of [N_chains, N_chains].
240
+ - `has_clash` - Boolean flag indicating if there are steric clashes in the predicted structure.
241
+ - `disorder` - Predicted regions of intrinsic disorder within the protein, highlighting residues that may be flexible or unstructured.
242
+ - `ranking_score` - Predicted confidence score for ranking complexes. Higher values indicate greater confidence.
243
+ - `num_recycles`: Number of recycling steps used during inference.
docs/kernels.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Setting up kernels
2
+
3
+ - **Custom CUDA layernorm kernels** modified from [FastFold](https://github.com/hpcaitech/FastFold) and [Oneflow](https://github.com/Oneflow-Inc/oneflow) accelerate about 30%-50% during different training stages. To use this feature, run the following command:
4
+ ```bash
5
+ export LAYERNORM_TYPE=fast_layernorm
6
+ ```
7
+ If the environment variable `LAYERNORM_TYPE` is set to `fast_layernorm`, the model will employ the layernorm we have developed; otherwise, the naive PyTorch layernorm will be adopted. The kernels will be compiled when `fast_layernorm` is called for the first time.
8
+ - **[DeepSpeed DS4Sci_EvoformerAttention kernel](https://www.deepspeed.ai/tutorials/ds4sci_evoformerattention/)** is a memory-efficient attention kernel developed as part of a collaboration between OpenFold and the DeepSpeed4Science initiative. To use this feature, run the following command:
9
+ ```bash
10
+ export USE_DEEPSPEED_EVO_ATTTENTION=true
11
+ ```
12
+ DS4Sci_EvoformerAttention is implemented based on [CUTLASS](https://github.com/NVIDIA/cutlass). If you use this feature, You need to clone the CUTLASS repository and specify the path to it in the environment variable CUTLASS_PATH. The [Dockerfile](Dockerfile) has already include this setting:
13
+ ```bash
14
+ RUN git clone -b v3.5.1 https://github.com/NVIDIA/cutlass.git /opt/cutlass
15
+ ENV CUTLASS_PATH=/opt/cutlass
16
+ ```
17
+ If you set up `Protenix` by `pip`, you can set environment variable `CUTLASS_PATH` as follows:
18
+
19
+ ```bash
20
+ git clone -b v3.5.1 https://github.com/NVIDIA/cutlass.git /path/to/cutlass
21
+ export CUTLASS_PATH=/path/to/cutlass
22
+ ```
23
+
24
+ The kernels will be compiled when DS4Sci_EvoformerAttention is called for the first time.
docs/model_performance.md ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Training
2
+ Some settings follow those in the [AlphaFold 3](https://www.nature.com/articles/s41586-024-07487-w) paper, The table below shows the training settings for different fine-tuning stages:
3
+
4
+ | Arguments | Initial training | Fine tuning 1 | Fine tuning 2 | Fine tuning 3 |
5
+ |-----------------------------------------|--------|---------|-------|-----|
6
+ | `train_crop_size` | 384 | 640 | 768 | 768 |
7
+ | `diffusion_batch_size` | 48 | 32 | 32 | 32 |
8
+ | `loss.weight.alpha_pae` | 0 | 0 | 0 | 1.0 |
9
+ | `loss.weight.alpha_bond` | 0 | 1.0 | 1.0 | 0 |
10
+ | `loss.weight.smooth_lddt` | 1.0 | 0 | 0 | 0 |
11
+ | `loss.weight.alpha_confidence` | 1e-4 | 1e-4 | 1e-4 | 1e-4|
12
+ | `loss.weight.alpha_diffusion` | 4.0 | 4.0 | 4.0 | 0 |
13
+ | `loss.weight.alpha_distogram` | 0.03 | 0.03 | 0.03 | 0 |
14
+ | `train_confidence_only` | False | False | False | True|
15
+ | full BF16-mixed speed(A100, s/step) | ~12 | ~30 | ~44 | ~13 |
16
+ | full BF16-mixed peak memory (G) | ~34 | ~35 | ~48 | ~24 |
17
+
18
+ We recommend carrying out the training on A100-80G or H20/H100 GPUs. If utilizing full BF16-Mixed precision training, the initial training stage can also be performed on A800-40G GPUs. GPUs with smaller memory, such as A30, you'll need to reduce the model size, such as decreasing `model.pairformer.nblocks` and `diffusion_batch_size`.
19
+
20
+ ### Inference
21
+
22
+ The model will be infered in BF16 Mixed precision, by **default**, the `SampleDiffusion`,`ConfidenceHead` part will still be infered in FP32 precision.
23
+
24
+ Below are reference examples of cuda memory usage (G).
25
+
26
+ | Ntoken | Natom | Default | Full BF16 Mixed |
27
+ |--------|-------|-------|------------------|
28
+ | 500 | 10000 | 5.6 | 5.1 |
29
+ | 1500 | 30000 | 24.8 | 19.2 |
30
+ | 2500 | 25000 | 52.2 | 34.8 |
31
+ | 3500 | 35000 | 67.6 | 38.2 |
32
+ | 4500 | 45000 | 77.0 | 59.2 |
33
+ | 5000 | 50000 | OOM | 72.8 |
34
+
35
+ The script in [runner/inference.py](../runner/inference.py) will automatically change the default precision to compute `SampleDiffusion`,`ConfidenceHead` to avoid OOM as follows:
36
+ ```python
37
+ def update_inference_configs(configs: Any, N_token: int):
38
+ # Setting the default inference configs for different N_token and N_atom
39
+ # when N_token is larger than 3000, the default config might OOM even on a
40
+ # A100 80G GPUS,
41
+ if N_token > 3840:
42
+ configs.skip_amp.confidence_head = False
43
+ configs.skip_amp.sample_diffusion = False
44
+ elif N_token > 2560:
45
+ configs.skip_amp.confidence_head = False
46
+ configs.skip_amp.sample_diffusion = True
47
+ else:
48
+ configs.skip_amp.confidence_head = True
49
+ configs.skip_amp.sample_diffusion = True
50
+ return configs
51
+ ```
docs/msa_pipeline.md ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## MSA data pipeline
2
+ If you download our released wwPDB dataset as in [training.md](./training.md), the mmcif_msa [450G] dir has the following directory structure.
3
+ ```bash
4
+ ├── seq_to_pdb_index.json [45M] # sequence to integers mapping file
5
+ ├── mmcif_msa [450G] # msa files
6
+ ├── 0
7
+ ├── uniref100_hits.a3m
8
+ ├── mmseqs_other_hits.a3m
9
+ ├── 1
10
+ ├── uniref100_hits.a3m
11
+ ├── mmseqs_other_hits.a3m
12
+ ├── 2
13
+ ├── uniref100_hits.a3m
14
+ ├── mmseqs_other_hits.a3m
15
+ ...
16
+ ├── 157201
17
+ ├── uniref100_hits.a3m
18
+ ├── mmseqs_other_hits.a3m
19
+
20
+ ```
21
+
22
+ Each integer in the first-level directory under mmcif_msa (for example, 0, 1, 2, and 157201) represents a unique protein sequence. The key of `seq_to_pdb_index.json` is the unique protein sequence, and the value is the integer corresponding to the first-level subdirectory of mmcif_msa mentioned above.
23
+
24
+ This document is used to provide the steps to convert the MSA obtained from colabfold into the Protenix training format.
25
+
26
+ ### Steps to get your own MSA data for training
27
+
28
+ #### Step1: get input protein sequence
29
+ Run the following command:
30
+
31
+ ```python
32
+ python3 scripts/msa/step1-get_prot_seq.py
33
+ ```
34
+ you will get outputs in `scripts/msa/data/pdb_seqs` dir. The result dir is as follows,
35
+
36
+ ```bash
37
+ ├── pdb_index_to_seq.json # mapping integers to sequences
38
+ ├── seq_to_pdb_index.json # mapping sequences to integers identifiers when saving MSA, This file is required in training for finding local MSA path from sequence
39
+ ├── pdb_seq.fasta # Input of MSA
40
+ ├── pdb_seq.csv # Intermediate Files
41
+ ├── seq_to_pdb_id_entity_id.json # Intermediate Files
42
+ ```
43
+
44
+ #### Step2: run msa search
45
+ We give detailed environment configuration and search commands in
46
+
47
+ ```python
48
+ scripts/msa/step2-get_msa.ipynb
49
+ ```
50
+
51
+ The searched MSA is in `scripts/msa/data/mmcif_msa_initial`, The result dir is as follows,
52
+ ```bash
53
+ ├── 0.a3m
54
+ ├── 1.a3m
55
+ ├── 2.a3m
56
+ ├── 3.a3m
57
+ ├── pdb70_220313_db.m8
58
+ ├── uniref_tax.m8 # record Taxonomy ID which is used by MSA Pairing
59
+ ```
60
+ #### Steps3: MSA Post-Processing
61
+
62
+ The overall solution is to search the MSA containing taxonomy information only once for the unique sequence, and pair it according to the species information of each MSA.
63
+
64
+ For MSA Post-Processing, Taxonomy ID from UniRef30 DB is added to MSAs and MSAs is split into `uniref100_hits.a3m` and `mmseqs_other_hits.a3m`, which correspond to `pairing.a3m` and `non_pairing.a3m` in inference stage respectively.
65
+
66
+ You can run:
67
+ ```python
68
+ python3 scripts/msa/step3-uniref_add_taxid.py
69
+
70
+ python3 scripts/msa/step4-split_msa_to_uniref_and_others.py
71
+ ```
72
+
73
+ The final pairing and non_pairing MSAs in `scripts/msa/data/mmcif_msa` is as follows:
74
+
75
+
76
+ ```
77
+ >query
78
+ GPTHRFVQKVEEMVQNHMTYSLQDVGGDANWQLVVEEGEMKVYRREVEENGIVLDPLKATHAVKGVTGHEVCNYFWNVDVRNDWETTIENFHVVETLADNAIIIYQTHKRVWPASQRDVLYLSVIRKIPALTENDPETWIVCNFSVDHDSAPLNNRCVRAKINVAMICQTLVSPPEGNQEISRDNILCKITYVANVNPGGWAPASVLRAVAKREYPKFLKRFTSYVQEKTAGKPILF
79
+ >UniRef100_A0A0S7JZT1_188132/ 246 0.897 6.614E-70 2 236 237 97 331 332
80
+ --THRFADKVEEMVQNHMTYSLQDVGGDANWQLVIEEGEMKVYRREVEENGIVLDPLKATHAVKGVTGHEVCHYFWDTDVRNDWETTIDNFNVVETLSDNAIIVYQTHKRVWPASQRDILFLSAIRKILAKNENDPDTWLVCNFSVDHDKAPPTNRCVRAKINVAMICQTLVSPPEGDKEISRDNILCKITYVANVNPGGWAPASVLRAVAKREYPKFLKRFTSYVQEKTAGNPILF
81
+ >UniRef100_A0A4W6GBN4_8187/ 246 0.893 9.059E-70 2 236 237 373 607 608
82
+ --THRFANKVEEMVQNHMTYSLQDVGGDANWQLVIEEGEMKVYRREVEENGIVLDPLKATHSVKGVTGHEVCHYFWDTDVRMDWETTIENFNVVEKLSENAIIVYQTHKRVWPASQRDVLYLSAIRKIMATNENDPDTWLVCNFSVDHNNAPPTNRCVRAKINVAMICQTLVSPPEGDKEISRDNILCKITYVANVNPGGWAPASVLRAVAKREYPKFLKRFTSYVQEKTAGKPILF
83
+ ```
84
+
85
+ ```
86
+ >query
87
+ MAEVIRSSAFWRSFPIFEEFDSETLCELSGIASYRKWSAGTVIFQRGDQGDYMIVVVSGRIKLSLFTPQGRELMLRQHEAGALFGEMALLDGQPRSADATAVTAAEGYVIGKKDFLALITQRPKTAEAVIRFLCAQLRDTTDRLETIALYDLNARVARFFLATLRQIHGSEMPQSANLRLTLSQTDIASILGASRPKVNRAILSLEESGAIKRADGIICCNVGRLLSIADPEEDLEHHHHHHHH
88
+ >MGYP001165762451 218 0.325 1.019E-59 5 230 244 3 228 230
89
+ -----DKVEFLKGVPLFSELPEAHLQSLGELLIERSYRRGATIFFEGDPGDALYIVRSGIVKISRVAEDGREKTLAFLGKGEPFGEMALIDGGPRSAIAQALEATSLYALHRADFLAALTENPALSLGVIKVLSARLQQANAQLMDLVFRDVRGRVAQALLDLARR-HGVPLTNGRMISVKLTHQEIANLVGTARETVSRTFAELQDSGIIRIeGRNIVLLDAAQLEGYAAG-------------
90
+ >A0A160T8V6 218 0.285 1.019E-59 0 227 244 0 229 237
91
+ MPTTRDsnAVQALQVVPFFANLPEDHVAALAKALVPRRFSPGQVIFHLGDPGGLLYLISRGKIKISHTTSDGQEVVLAILGPGDFFGEMALIDDAPRSATAITLEPSETWTLHREEFIQYLTDNPEFALHVLKTLARHIRRLNTQLADIFFLDLPGRLARTLLNLADQ-YGRRAADGTIIDLSLTQTDLAEMTGATRVSINKALGRFRRAGWIQvTGRQVTVLDRAALEAL----------------
92
+ >AP58_3_1055460.scaffolds.fasta_scaffold1119545_2 216 0.304 3.581E-59 10 225 244 5 221 226
93
+ ----------LSRVPLFAELPPERIHELAQSVRRRTYHRGETIFHKGDPGNGLYIIAAGQVKIVLPSEMGEEAMLAVLEGGEFFGELALFDGLPRSATVVAVQNAEVLVLHRDDFMSFVGRNPEVVSALFAALSRRLRDADEMIEDAIFLDVPGRLAKRLLDLAEKHGRAEEKGGVAIDLKLTQQDLAAMVGATRESVNKHLGWMRDHGLIQLDRqRIVILKPDDLR------------------
94
+ ```
95
+ ### Format of MSA
96
+ In `uniref100_hits.a3m`(training stage) or `pairing.a3m`(inference stage), the header must starts with the following format, which we use for pairing:
97
+ ```
98
+ >UniRef100_{hitname}_{taxonomyid}/
99
+ ```
100
+
101
+ we also provide a pipeline of local Colabfold_search to Generate Protenix-Compatible MSAs in [colabfold_compatiable_msa.md](./colabfold_compatiable_msa.md).
docs/prepare_training_data.md ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Start with CIF files and prepare your own training data.
2
+
3
+ ## Data Preparation
4
+
5
+ 1. **Prepare CIF Files**: Place the CIF files you want to convert into training data in a folder. Alternatively, you can use a `txt` file to record the paths to these CIF files, with each line corresponding to the path of a specific CIF file.
6
+
7
+ 2. **Prepare Protein Clustering File (Optional)**: The protein clustering file contains category information for each `[PDB ID]_[Entity ID]`. In the Protenix training data, we cluster protein sequences using a 40% sequence identity threshold.
8
+
9
+ You can download the official clustering results file provided by RCSB PDB using the following command, and use it directly:
10
+ ```bash
11
+ wget https://cdn.rcsb.org/resources/sequence/clusters/clusters-by-entity-40.txt
12
+ ```
13
+
14
+ If you prefer to perform your own clustering of protein sequences, ensure the final results are formatted as a text file like this:
15
+ Each line represents a cluster, containing `[PDB ID]_[Entity ID]` entries separated by spaces.
16
+
17
+ 3. **Update the CCD (Chemical Component Dictionary) Cache File (If needed)**: We provide a pre-processed file, with a cutoff date of 2024-06-08, that records the reference conformers for each CCD Code. If the training data you're preparing is more recent than this date, there may be issues with some CCD Codes might be missing. For example, the CCD Code "WTB," appearing in the PDB ID: 8P3K released on 2024-11-20, is not defined in the previously provided CCD file. In such cases, you need to run the following script to download and update the CCD CIF files:
18
+
19
+ ```bash
20
+ python3 scripts/gen_ccd_cache.py -c [ccd_cache_dir] -n [num_cpu]
21
+ ```
22
+
23
+ After running the script, three files will be generated in the specified "ccd_cache_dir":
24
+
25
+ - `components.cif` (CCD CIF file downloaded from RCSB)
26
+ - `components.cif.rdkit_mol.pkl` (pre-processed dictionary, where the key is the CCD Code and the value is an RDKit Mol object with 3D structure)
27
+ - `components.txt` (a list containing all the CCD Codes)
28
+
29
+ When running Protenix, it first uses
30
+ ```bash
31
+ `release_data/ccd_cache/components.cif`
32
+ `release_data/ccd_cache/components.cif.rdkit_mol.pkl`
33
+ ```
34
+ if unavailable, it switches to
35
+ ```bash
36
+ `release_data/ccd_cache/components.v20240608.cif`
37
+ `release_data/ccd_cache/components.v20240608.cif.rdkit_mol.pkl`
38
+ ```
39
+ Notes:
40
+ - The `-c` parameter is optional. If not specified, files will be saved in the "release_data/ccd_cache" folder within the Protenix code directory by default.
41
+ - You can add the `-d` parameter when running the script to skip the CIF file download step, in which case the script will directly process the "components.cif" file located in the "ccd_cache_dir".
42
+
43
+ ## Data Preprocessing
44
+ Execute the script to preprocess the data:
45
+ ```bash
46
+ python3 scripts/prepare_training_data.py -i [input_path] -o [output_csv] -b [output_dir] -c [cluster_txt] -n [num_cpu]
47
+ ```
48
+
49
+ The preprocessed structures will be saved as `.pkl.gz` files. Additionally, a `CSV` file will be generated to catalog the chains and interfaces within these structures, which will facilitate sampling during the training process.
50
+
51
+ You can view the explanation of the parameters by using the `--help` command.
52
+ ```
53
+ python3 scripts/prepare_training_data.py --help
54
+ ```
55
+
56
+ Note that there is an optional parameter `-d` in the script. When this parameter is not used, the script processes CIF files downloaded from RCSB PDB by applying the full set of WeightedPDB training data filters. These filters include:
57
+
58
+ - Removing water molecules
59
+ - Removing hydrogen atoms
60
+ - Deleting polymer chains composed entirely of unknown residues
61
+ - Eliminating chains where the Cα distance between adjacent numbered residues exceeds 10 angstroms
62
+ - Removing elements labeled as "X"
63
+ - Deleting chains where no residues have been resolved
64
+ - When the number of chains exceeds 20, selecting one central atom from those capable of forming interfaces and retaining the 20 nearest chains to it. If a ligand is covalently bonded to a polymer, it is considered as one chain together. Additionally, if the number of chains is greater than 20 but the total number of tokens in these chains is less than 5120, more chains will be retained until the 5120 token limit is reached.
65
+ - Removing chains with one-third of their heavy atoms colliding
66
+
67
+ For CIF files generated through model inference where these filtering steps aren't desired, you can run the script with the `-d` parameter, which disables all these filters. The CIF structure will not be expanded to Assembly 1 in this case.
68
+
69
+
70
+ ## Output Format
71
+ ### Bioassembly Dict
72
+ In the folder specified by the `-b` parameter of the data preprocessing script, a corresponding `[pdb_id].pkl.gz` file is generated for each successfully processed CIF file. This file contains a dictionary saved with `pickle.dump`, with the following contents:
73
+ ```
74
+ | Key | Value Type | Description |
75
+ |----------------------------|---------------|-------------------------------------------------------------------------------|
76
+ | pdb_id | str | PDB Code |
77
+ | assembly_id | str | Assembly ID |
78
+ | sequences | dict[str, str]| Key is polymer's label_entity_id, value is canonical_sequence |
79
+ | release_date | str | PDB's Release Date |
80
+ | num_assembly_polymer_chains| int | Number of assembly polymer chains (pdbx_struct_assembly.oligomeric_count) |
81
+ | num_prot_chains | int | Number of protein chains in AtomArray |
82
+ | entity_poly_type | dict[str, str]| Key is polymer's label_entity_id, value is corresponding to entity_poly.type |
83
+ | resolution | float | Resolution; if no resolution, value is -1 |
84
+ | num_tokens | int | Number of tokens |
85
+ | atom_array | AtomArray | AtomArray from structure processing |
86
+ | token_array | TokenArray | TokenArray generated based on AtomArray |
87
+ | msa_features | None | (Placeholder) |
88
+ | template_features | None | (Placeholder) |
89
+ ```
90
+
91
+ ### Indices CSV
92
+ After the script successfully completes, a CSV file will be generated in the directory specified by `-o`.
93
+ Each row contains information about a pre-processed chain or interface, and the content of each column is described as follows:
94
+ ```
95
+ | Column Name | Value Type | Meaning | Required |
96
+ |----------------|------------|------------------------------------------------------------------------|----------|
97
+ | type | str | "chain" or "interface" | Y |
98
+ | pdb_id | str | PDB Code (entry.id) | Y |
99
+ | cluster_id | str | Cluster_id of the chain/interface | Y |
100
+ | assembly_id | str | Assembly id | N |
101
+ | release_date | str | Release date | N |
102
+ | resolution | float | Resolution; if no resolution, value is -1 | N |
103
+ | num_tokens | int | Number of tokens in AtomArray of Bioassembly Dict | N |
104
+ | num_prot_chains| int | Number of protein chains in AtomArray of Bioassembly Dict | N |
105
+ | eval_type | str | Classification used for evaluation | N |
106
+ | entity_1_id | str | Chain 1's label_entity_id | Y |
107
+ | chain_1_id | str | Chain 1's chain ID | Y |
108
+ | mol_1_type | str | Chain 1's corresponding mol_type ("protein", "nuc", "ligand", "ions") | Y |
109
+ | sub_mol_1_type | str | Sub-classification of Chain 1's entity corresponding to mol_type | N |
110
+ | cluster_1_id | str | Chain 1's cluster ID | Y |
111
+ | entity_2_id | str | Chain 2's label_entity_id | Y |
112
+ | chain_2_id | str | Chain 2's chain ID | Y |
113
+ | mol_2_type | str | Chain 2's corresponding mol_type ("protein", "nuc", "ligand", "ions") | Y |
114
+ | sub_mol_2_type | str | Sub-classification of Chain 2's entity corresponding to mol_type | N |
115
+ | cluster_2_id | str | Chain 2's cluster_id | Y |
116
+ ```
117
+ Notes:
118
+ - In the table, columns marked with 'Y' under 'Required' indicate that these columns are essential for training. If you are creating your own CSV for training purposes, these columns must be included. Columns marked with 'N' are optional and can be excluded.
119
+ - For rows where the "type" is "chain", the values in columns related to Chain 2 should all be filled with empty strings.
docs/training.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Preparing the datasets
2
+ To download the [wwPDB dataset](https://www.wwpdb.org/) and proprecessed training data, you need at least 1T disk space.
3
+
4
+ Use the following command to download the preprocessed wwpdb training databases:
5
+
6
+ ```bash
7
+ wget -P /af3-dev/release_data/ https://af3-dev.tos-cn-beijing.volces.com/release_data.tar.gz
8
+ tar -xzvf /af3-dev/release_data/release_data.tar.gz -C /af3-dev/release_data/
9
+ rm /af3-dev/release_data/release_data.tar.gz
10
+ ```
11
+
12
+
13
+ The data should be placed in the `/af3-dev/release_data/` directory. You can also download it to a different directory, but remember to modify the `DATA_ROOT_DIR` in [configs/configs_data.py](../configs/configs_data.py) correspondingly. Data hierarchy after extraction is as follows:
14
+
15
+ ```bash
16
+ ├── components.v20240608.cif [408M] # ccd source file
17
+ ├── components.v20240608.cif.rdkit_mol.pkl [121M] # rdkit Mol object generated by ccd source file
18
+ ├── indices [33M] # chain or interface entries
19
+ ├── mmcif [283G] # raw mmcif data
20
+ ├── mmcif_bioassembly [36G] # preprocessed wwPDB structural data
21
+ ├── mmcif_msa [450G] # msa files
22
+ ├── posebusters_bioassembly [42M] # preprocessed posebusters structural data
23
+ ├── posebusters_mmcif [361M] # raw mmcif data
24
+ ├── recentPDB_bioassembly [1.5G] # preprocessed recentPDB structural data
25
+ └── seq_to_pdb_index.json [45M] # sequence to pdb id mapping file
26
+ ```
27
+
28
+ Data processing scripts have also been released. you can refer to [prepare_training_data.md](./prepare_training_data.md) for generating `{dataset}_bioassembly` and `indices`. And you can refer to [msa_pipeline.md](./msa_pipeline.md) for pipelines to get `mmcif_msa` and `seq_to_pdb_index.json`.
29
+
30
+ ### Training demo
31
+ After the installation and data preparations, you can run the following command to train the model from scratch:
32
+
33
+ ```bash
34
+ bash train_demo.sh
35
+ ```
36
+ Key arguments in this scripts are explained as follows:
37
+ * `dtype`: data type used in training. Valid options include `"bf16"` and `"fp32"`.
38
+ * `--dtype fp32`: the model will be trained in full FP32 precision.
39
+ * `--dtype bf16`: the model will be trained in BF16 Mixed precision, by default, the `SampleDiffusion`,`ConfidenceHead`, `Mini-rollout` and `Loss` part will still be training in FP32 precision. if you want to train and infer the model in full BF16 Mixed precision, pass the following arguments to the [train_demo.sh](../train_demo.sh):
40
+ ```bash
41
+ --skip_amp.sample_diffusion_training false \
42
+ --skip_amp.confidence_head false \
43
+ --skip_amp.sample_diffusion false \
44
+ --skip_amp.loss false \
45
+ ```
46
+ * `ema_decay`: the decay rate of the EMA, default is 0.999.
47
+ * `sample_diffusion.N_step`: during evalutaion, the number of steps for the diffusion process is reduced to 20 to improve efficiency.
48
+
49
+ * `data.train_sets/data.test_sets`: the datasets used for training and evaluation. If there are multiple datasets, separate them with commas.
50
+ * Some settings follow those in the [AlphaFold 3](https://www.nature.com/articles/s41586-024-07487-w) paper, The table in [model_performance.md](../docs/model_performance.md) shows the training settings and memory usages for different training stages.
51
+ * In this version, we do not use the template and RNA MSA feature for training. As the default settings in [configs/configs_base.py](../configs/configs_base.py) and [configs/configs_data.py](../configs/configs_data.py):
52
+ ```bash
53
+ --model.template_embedder.n_blocks 0 \
54
+ --data.msa.enable_rna_msa false \
55
+ ```
56
+ This will be considered in our future work.
57
+
58
+ * The model also supports distributed training with PyTorch’s [`torchrun`](https://pytorch.org/docs/stable/elastic/run.html). For example, if you’re running distributed training on a single node with 4 GPUs, you can use:
59
+ ```bash
60
+ torchrun --nproc_per_node=4 runner/train.py
61
+ ```
62
+ You can also pass other arguments with `--<ARGS_KEY> <ARGS_VALUE>` as you want.
63
+
64
+
65
+ If you want to speed up training, see [<u> setting up kernels documentation </u>](./kernels.md).
66
+
67
+ ### Finetune demo
68
+
69
+ If you want to fine-tune the model on a specific subset, such as an antibody dataset, you only need to provide a PDB list file and load the pretrained weights as [finetune_demo.sh](../finetune_demo.sh) shows:
70
+
71
+ ```bash
72
+ # wget -P /af3-dev/release_model/ https://af3-dev.tos-cn-beijing.volces.com/release_model/model_v0.2.0.pt
73
+ checkpoint_path="/af3-dev/release_model/model_v0.2.0.pt"
74
+ ...
75
+
76
+ --load_checkpoint_path ${checkpoint_path} \
77
+ --load_checkpoint_ema_path ${checkpoint_path} \
78
+ --data.weightedPDB_before2109_wopb_nometalc_0925.base_info.pdb_list examples/subset.txt \
79
+ ```
80
+
81
+ , where the `subset.txt` is a file containing the PDB IDs like:
82
+ ```bash
83
+ 6hvq
84
+ 5mqc
85
+ 5zin
86
+ 3ew0
87
+ 5akv
88
+ ```
examples/7dc6.pdb ADDED
The diff for this file is too large to render. See raw diff
 
examples/7dc6_watermarked.pdb ADDED
The diff for this file is too large to render. See raw diff
 
examples/7pzb/msa/1/non_pairing.a3m ADDED
The diff for this file is too large to render. See raw diff
 
examples/7pzb/msa/1/pairing.a3m ADDED
The diff for this file is too large to render. See raw diff
 
examples/7pzb_need_search_msa/msa_resmsa_seq_0/0.a3m ADDED
The diff for this file is too large to render. See raw diff
 
examples/7pzb_need_search_msa/msa_resmsa_seq_0/0/non_pairing.a3m ADDED
The diff for this file is too large to render. See raw diff
 
examples/7pzb_need_search_msa/msa_resmsa_seq_0/0/pairing.a3m ADDED
The diff for this file is too large to render. See raw diff
 
examples/7pzb_need_search_msa/msa_resmsa_seq_0/msa.sh ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ MMSEQS="$1"
3
+ QUERY="$2"
4
+ BASE="$4"
5
+ DB1="$5"
6
+ DB2="$6"
7
+ DB3="$7"
8
+ USE_ENV="$8"
9
+ USE_TEMPLATES="$9"
10
+ FILTER="${10}"
11
+ TAXONOMY="${11}"
12
+ M8OUT="${12}"
13
+ DATAPATH="${13}"
14
+ EXPAND_EVAL=inf
15
+ ALIGN_EVAL=10
16
+ DIFF=3000
17
+ QSC=-20.0
18
+ MAX_ACCEPT=1000000
19
+ if [ "${FILTER}" = "1" ]; then
20
+ # 0.1 was not used in benchmarks due to POSIX shell bug in line above
21
+ # EXPAND_EVAL=0.1
22
+ ALIGN_EVAL=10
23
+ QSC=0.8
24
+ MAX_ACCEPT=100000
25
+ fi
26
+ export MMSEQS_CALL_DEPTH=1
27
+ SEARCH_PARAM="--num-iterations 3 --db-load-mode 2 -a --k-score 'seq:96,prof:80' -e 0.1 --max-seqs 10000"
28
+ FILTER_PARAM="--filter-min-enable 1000 --diff ${DIFF} --qid 0.0,0.2,0.4,0.6,0.8,1.0 --qsc 0 --max-seq-id 0.95"
29
+ EXPAND_PARAM="--expansion-mode 0 -e ${EXPAND_EVAL} --expand-filter-clusters ${FILTER} --max-seq-id 0.95"
30
+ mkdir -p "${BASE}"
31
+ colabfold_search --db1 uniref30_2103_db --db2 pdb70_220313_db --db3 colabfold_envdb_202108_db "${QUERY}" "${DATAPATH}" "${BASE}" --mmseqs "${MMSEQS}" --use-templates 1
32
+
33
+ wait
Protenix_new.zip → examples/7pzb_need_search_msa/msa_resmsa_seq_0/out.tar.gz RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92aab3efb89b2454c9f9dc4e66b765db0d5e6eb7d0ff1a2a8b7e8a8ed29efc0a
3
- size 200292769
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0f796ed53bd887c84e8a76a8f4ff70de4886dbbe9f8b4d5197e54a67209b00d
3
+ size 1903397
examples/7pzb_need_search_msa/msa_resmsa_seq_0/pdb70_220313_db.m8 ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ query_0 3H3U_A 0.265 230 161 3 1 229 1 223 1.823E-50 180 4M6I151M1I51M1D16M
2
+ query_0 4A2U_H 0.265 230 161 3 1 229 4 226 1.823E-50 180 4M6I151M1I51M1D16M
3
+ query_0 3R6S_A 0.265 230 164 4 1 229 21 246 3.058E-49 177 1M1I4M2I153M1I51M1D16M
4
+ query_0 4CYD_A 0.268 220 159 2 11 229 6 224 4.183E-49 176 151M1I51M1D16M
5
+ query_0 4EV0_D 0.292 219 146 2 12 229 2 212 6.863E-45 164 155M8I44M1D11M
6
+ query_0 4R8H_B 0.245 200 147 3 21 219 20 216 3.444E-39 147 70M1D85M2I35M1I6M
7
+ query_0 1O3Q_A 0.246 199 146 3 22 219 1 196 4.706E-39 147 69M1D85M2I35M1I6M
8
+ query_0 6DT4_A 0.245 200 147 3 21 219 11 207 6.430E-39 147 70M1D85M2I35M1I6M
9
+ query_0 3KCC_A 0.240 200 148 3 21 219 58 254 4.181E-38 144 70M1D85M2I35M1I6M
10
+ query_0 3FX3_B 0.237 236 170 4 1 233 4 232 7.803E-38 143 2M1D7M2I158M5I55M2D4M
11
+ query_0 2OZ6_A 0.256 199 142 2 25 219 6 202 1.066E-37 143 68M4D83M2I42M
12
+ query_0 3DKW_E 0.220 218 164 3 11 226 11 224 5.069E-37 141 84M1D75M4I40M1D13M
13
+ query_0 2XHK_B 0.268 212 146 4 17 225 12 217 6.136E-36 138 10M5I63M2D67M1I51M1D12M
14
+ query_0 2XHK_A 0.268 212 146 4 17 225 12 217 6.136E-36 138 10M5I63M2D67M1I51M1D12M
15
+ query_0 3LA7_B 0.241 195 144 3 36 227 47 240 4.801E-34 132 57M2D69M1I51M1D14M
16
+ query_0 3E97_A 0.205 219 170 3 11 228 8 223 3.105E-33 130 138M1I27M2I35M1D15M
17
+ query_0 3IWZ_D 0.233 206 149 4 20 219 22 224 9.491E-32 126 71M1D32M5D54M2I33M1I7M
18
+ query_0 2PQQ_A 0.331 151 95 1 1 151 3 147 4.486E-31 124 4M6I141M
19
+ query_0 2ZCW_A 0.250 200 137 5 33 229 6 195 1.139E-30 123 18M2D40M1I22M7I51M2I45M1D11M
20
+ query_0 5CVR_A 0.201 218 168 5 14 229 39 252 5.374E-30 121 3M2I18M1D49M1I87M1I39M1D16M
21
+ query_0 5E44_A 0.201 218 168 5 14 229 39 252 5.374E-30 121 3M2I18M1D49M1I87M1I39M1D16M
22
+ query_0 5W5B_A 0.200 230 166 5 5 229 19 235 2.534E-29 119 10M1I4M4D44M1I97M11I46M1D11M
23
+ query_0 5W5A_B 0.200 230 166 5 5 229 30 246 2.534E-29 119 10M1I4M4D44M1I97M11I46M1D11M
24
+ query_0 4I2O_A 0.216 208 152 4 32 237 40 238 2.218E-28 116 57M3I76M5I41M2D15M1I8M
25
+ query_0 2GAU_A 0.190 220 173 4 11 229 14 229 3.024E-28 115 4M2I145M1I9M1I46M1D11M
26
+ query_0 3B02_A 0.221 203 146 5 35 236 2 193 1.422E-27 113 54M1I26M6I54M2I36M1D16M2I5M
27
+ query_0 1O5L_A 0.155 199 158 3 24 220 14 204 6.681E-27 111 72M1D69M8I41M1D7M
28
+ query_0 2FMY_D 0.181 220 169 4 11 228 6 216 9.102E-27 111 59M1I15M7I69M1I51M2D15M
29
+ query_0 1FT9_A 0.215 218 157 8 20 234 12 218 5.816E-26 109 5M1I20M1D4M1I19M1I15M7I70M1I50M1D3M1D18M
30
+ query_0 3DV8_A 0.188 218 166 4 14 228 8 217 2.001E-25 107 69M2D81M1I7M7I38M1D12M
31
+ query_0 4MUV_A 0.290 141 87 2 2 136 2 135 2.777E-23 101 12M6D56M7I60M
32
+ query_0 6EO1_B 0.311 138 82 3 5 136 218 348 9.509E-23 99 9M6D51M2I6M5I59M
33
+ query_0 3BEH_D 0.311 138 82 3 5 136 218 348 9.509E-23 99 9M6D51M2I6M5I59M
34
+ query_0 7T8O_B 0.160 231 170 11 1 221 4 220 8.176E-22 96 1M2I5M2I23M2D13M1I27M1D18M1D22M5D8M1I37M7I8M1I37M1D8M
35
+ query_0 4D7S_B 0.190 131 103 1 11 141 71 198 1.112E-21 96 61M3I67M
36
+ query_0 4D7T_A 0.190 131 103 1 11 141 71 198 1.112E-21 96 61M3I67M
37
+ query_0 3SHR_B 0.269 115 83 1 6 119 154 268 5.156E-21 94 60M1D54M
38
+ query_0 2Z69_A 0.241 141 106 1 11 150 14 154 7.006E-21 94 84M1D56M
39
+ query_0 2Z69_B 0.241 141 106 1 11 150 14 154 7.006E-21 94 84M1D56M
40
+ query_0 6CJU_D 0.198 126 98 1 11 136 315 437 7.006E-21 94 61M3I62M
41
+ query_0 6CJT_D 0.198 126 98 1 11 136 315 437 7.006E-21 94 61M3I62M
42
+ query_0 5L0N_A 0.269 115 83 1 6 119 10 124 1.293E-20 93 60M1D54M
43
+ query_0 5J48_A 0.269 115 83 1 6 119 10 124 1.293E-20 93 60M1D54M
44
+ query_0 4KU8_A 0.269 115 83 1 6 119 10 124 1.293E-20 93 60M1D54M
45
+ query_0 4QX5_A 0.269 115 83 1 6 119 10 124 1.293E-20 93 60M1D54M
46
+ query_0 4Z07_E 0.269 115 83 1 6 119 137 251 1.293E-20 93 60M1D54M
47
+ query_0 4Z07_A 0.269 115 83 1 6 119 137 251 1.293E-20 93 60M1D54M
48
+ query_0 4OLL_A 0.213 187 136 5 2 187 9 185 8.124E-20 91 9M1D67M1I41M1I24M6I10M2I25M
49
+ query_0 4ONU_A 0.213 187 136 5 2 187 9 185 8.124E-20 91 9M1D67M1I41M1I24M6I10M2I25M
50
+ query_0 3GYD_B 0.232 146 105 2 3 144 36 178 2.034E-19 89 6M3I117M4D16M
51
+ query_0 3J4Q_C 0.198 146 103 4 9 147 252 390 2.034E-19 89 55M5D7M2D50M2I1M5I19M
52
+ query_0 4ORF_A 0.208 187 137 5 2 187 9 185 3.748E-19 89 9M1D67M1I41M1I24M6I10M2I25M
53
+ query_0 4DIN_B 0.173 138 108 3 1 132 236 373 3.748E-19 89 7M4D56M1D62M1D7M
54
+ query_0 5KJZ_A 0.184 130 101 2 1 125 5 134 5.087E-19 88 7M4D56M1D62M
55
+ query_0 5J3U_A 0.220 181 121 7 11 189 16 178 5.087E-19 88 57M1I51M1I7M3I3M3I19M2D9M7I7M3I8M
56
+ query_0 2QVS_B 0.191 146 104 4 9 147 160 298 9.370E-19 87 55M5D7M2D50M2I1M5I19M
57
+ query_0 6BYR_B 0.176 130 102 2 1 125 234 363 1.271E-18 87 7M4D56M1D62M
58
+ query_0 6NO7_D 0.176 130 102 2 1 125 235 364 1.271E-18 87 7M4D56M1D62M
59
+ query_0 5JIX_A 0.194 144 114 2 2 143 6 149 2.341E-18 86 67M1D35M1D40M
60
+ query_0 5JIZ_A 0.194 144 114 2 2 143 6 149 2.341E-18 86 67M1D35M1D40M
61
+ query_0 6RSX_A 0.169 142 117 1 1 142 153 293 2.341E-18 86 67M1I74M
62
+ query_0 5V4S_D 0.252 119 86 2 11 129 340 455 2.341E-18 86 53M2I11M1I52M
63
+ query_0 5D1I_B 0.280 125 88 2 1 125 9 131 3.175E-18 86 2M1I68M1I53M
64
+ query_0 5D1I_A 0.280 125 88 2 1 125 9 131 3.175E-18 86 2M1I68M1I53M
65
+ query_0 2QCS_B 0.169 130 103 2 1 125 145 274 5.842E-18 85 7M4D56M1D62M
66
+ query_0 7NP4_D 0.209 148 111 3 13 159 593 735 5.842E-18 85 51M2I11M3I68M1D12M
67
+ query_0 2MPF_A 0.206 131 99 2 13 143 27 152 7.924E-18 85 51M2I7M3I68M
68
+ query_0 3BPZ_C 0.206 131 99 2 13 143 76 201 7.924E-18 85 51M2I7M3I68M
69
+ query_0 5KHI_A 0.206 131 99 2 13 143 75 200 7.924E-18 85 51M2I7M3I68M
70
+ query_0 5JON_A 0.206 131 99 2 13 143 391 516 7.924E-18 85 51M2I7M3I68M
71
+ query_0 5JON_B 0.206 131 99 2 13 143 391 516 7.924E-18 85 51M2I7M3I68M
72
+ query_0 6GYO_D 0.213 131 98 2 13 143 394 519 1.075E-17 84 51M2I11M3I64M
73
+ query_0 7O4H_D 0.183 196 149 6 9 196 1069 1261 1.457E-17 84 55M1I11M1I14M3D34M1I32M2D27M3D12M
74
+ query_0 4AVB_B 0.197 177 132 3 11 187 15 181 1.976E-17 83 61M1I47M1I20M8I39M
75
+ query_0 4AVA_A 0.197 177 132 3 11 187 15 181 1.976E-17 83 61M1I47M1I20M8I39M
76
+ query_0 5DZC_A 0.221 140 97 5 7 145 403 531 2.678E-17 83 59M3I4M2I27M1D18M1I3M5I17M
77
+ query_0 4RZ7_A 0.221 140 97 5 7 145 403 531 2.678E-17 83 59M3I4M2I27M1D18M1I3M5I17M
78
+ query_0 5U6O_D 0.193 129 99 2 15 143 474 597 3.630E-17 83 54M2I2M3I68M
79
+ query_0 6UQF_D 0.193 129 99 2 15 143 474 597 3.630E-17 83 54M2I2M3I68M
80
+ query_0 7RHL_B 0.160 187 145 6 13 190 518 701 4.921E-17 82 51M1I11M1I14M3D34M1I21M3D7M3D37M
81
+ query_0 2MNG_A 0.230 117 85 2 13 129 16 127 6.669E-17 82 51M2I11M3I50M
82
+ query_0 6FTF_B 0.204 127 95 2 1 123 146 270 6.669E-17 82 7M4D113M2I1M
83
+ query_0 3OF1_A 0.220 150 107 4 11 160 9 148 9.038E-17 81 59M5I48M1I4M3I21M1I8M
84
+ query_0 6WEL_D 0.217 170 117 7 2 160 477 641 2.248E-16 80 9M2D54M2I9M1I14M6D34M1I23M3D6M1I5M
85
+ query_0 4OFG_A 0.214 140 98 5 7 145 11 139 4.124E-16 79 59M3I4M2I27M1D18M1I3M5I17M
86
+ query_0 4OFF_A 0.214 140 98 5 7 145 11 139 4.124E-16 79 59M3I4M2I27M1D18M1I3M5I17M
87
+ query_0 5KBF_B 0.187 160 121 2 3 160 36 188 4.124E-16 79 127M7I19M2D5M
88
+ query_0 6HQ2_B 0.305 108 74 1 32 139 2 108 7.562E-16 79 34M1I73M
89
+ query_0 6HQ3_A 0.305 108 74 1 32 139 2 108 7.562E-16 79 34M1I73M
90
+ query_0 6HQ7_A 0.316 101 68 1 39 139 7 106 1.024E-15 78 27M1I73M
91
+ query_0 3PNA_A 0.218 119 85 3 1 119 33 143 1.876E-15 78 9M2I3M1I57M5I42M
92
+ query_0 3PVB_B 0.218 119 85 3 1 119 40 150 1.876E-15 78 9M2I3M1I57M5I42M
93
+ query_0 7RHS_A 0.216 125 89 3 11 129 481 602 1.876E-15 78 54M2I9M1I14M6D39M
94
+ query_0 3TNP_B 0.193 145 104 3 9 147 267 404 3.437E-15 77 64M6D48M2I1M5I19M
95
+ query_0 5K8S_B 0.221 113 81 3 11 123 14 119 4.651E-15 76 55M3I3M2I47M2I1M
96
+ query_0 1CX4_A 0.184 146 104 4 9 147 156 293 6.292E-15 76 57M1I6M7D48M2I1M5I19M
97
+ query_0 6WJF_C 0.184 146 104 4 9 147 267 404 6.292E-15 76 57M1I6M7D48M2I1M5I19M
98
+ query_0 7LFT_B 0.216 120 85 3 11 124 344 460 6.292E-15 76 54M2I9M1I14M6D34M
99
+ query_0 7RHL_A 0.216 120 85 3 11 124 344 460 6.292E-15 76 54M2I9M1I14M6D34M
100
+ query_0 7O4H_B 0.216 120 85 3 11 124 472 588 8.512E-15 76 54M2I9M1I14M6D34M
101
+ query_0 5VA1_A 0.154 188 142 6 9 187 495 674 1.151E-14 75 57M5I16M1D6M1D41M2D8M5D14M3I29M
102
+ query_0 7RHS_D 0.164 194 143 7 9 196 549 729 1.557E-14 75 55M1I11M1I14M3D41M4I20M7I17M2D4M1D13M
103
+ query_0 5H5O_A 0.285 126 85 2 20 144 6 127 2.106E-14 74 46M4I21M1D54M
104
+ query_0 5H5O_B 0.285 126 85 2 20 144 6 127 2.106E-14 74 46M4I21M1D54M
105
+ query_0 7CAL_C 0.136 205 173 3 9 213 370 570 2.847E-14 74 59M1I87M2I52M1I3M
106
+ query_0 7T4X_A 0.166 150 124 1 9 158 365 513 3.850E-14 74 59M1I90M
107
+ query_0 7T4X_B 0.166 150 124 1 9 158 365 513 3.850E-14 74 59M1I90M
108
+ query_0 6SYG_A 0.157 127 100 3 9 133 6 127 9.505E-14 72 57M5I16M1D6M1D41M
109
+ query_0 2N7G_A 0.157 127 100 3 9 133 25 146 9.505E-14 72 57M5I16M1D6M1D41M
110
+ query_0 3MDP_A 0.165 127 103 2 11 134 8 134 1.284E-13 72 53M1D8M2D63M
111
+ query_0 1O7F_A 0.237 122 90 2 11 130 44 164 1.735E-13 72 50M2D26M1I43M
112
+ query_0 4F7Z_A 0.237 122 90 2 11 130 44 164 1.735E-13 72 50M2D26M1I43M
113
+ query_0 4L11_A 0.168 125 97 2 11 133 74 193 2.344E-13 71 55M5I22M2D41M
114
+ query_0 3IDB_B 0.214 107 83 1 16 122 45 150 4.276E-13 70 50M1I56M
115
+ query_0 3IDC_B 0.214 107 83 1 16 122 51 156 4.276E-13 70 50M1I56M
116
+ query_0 6V1X_B 0.138 130 111 1 9 138 370 498 4.276E-13 70 59M1I70M
117
+ query_0 3OGJ_A 0.179 117 89 4 3 119 19 128 5.774E-13 70 3M1I7M1I51M3I2M2I47M
118
+ query_0 3OCP_A 0.179 117 89 4 3 119 19 128 5.774E-13 70 3M1I7M1I51M3I2M2I47M
119
+ query_0 4LLO_A 0.161 155 116 5 11 158 29 176 5.774E-13 70 58M5I14M1I4M3D43M4D16M1I6M
120
+ query_0 6PBY_A 0.161 155 116 5 11 158 538 685 5.774E-13 70 58M5I14M1I4M3D43M4D16M1I6M
121
+ query_0 5K7L_A 0.161 155 116 5 11 158 549 696 5.774E-13 70 58M5I14M1I4M3D43M4D16M1I6M
122
+ query_0 5C8W_B 0.189 111 85 1 11 121 29 134 7.795E-13 70 55M5I51M
123
+ query_0 5C8W_F 0.189 111 85 1 11 121 29 134 7.795E-13 70 55M5I51M
124
+ query_0 5E16_A 0.198 111 84 1 11 121 34 139 1.917E-12 68 59M5I47M
125
+ query_0 2MHF_A 0.212 127 93 2 13 137 7 128 3.489E-12 68 53M5I23M2D44M
126
+ query_0 3UKN_B 0.212 127 93 2 13 137 79 200 3.489E-12 68 53M5I23M2D44M
127
+ query_0 3DN7_A 0.177 192 142 6 11 201 12 188 1.154E-11 66 4M1I2M2I67M1D69M9I13M1I3M2I18M
128
+ query_0 3DN7_B 0.177 192 142 6 11 201 12 188 1.154E-11 66 4M1I2M2I67M1D69M9I13M1I3M2I18M
129
+ query_0 4MGZ_E 0.196 107 80 3 17 121 40 142 2.097E-11 65 23M1D30M4I31M1D17M
130
+ query_0 3CF6_E 0.196 107 80 3 17 121 40 142 2.097E-11 65 23M1D30M4I31M1D17M
131
+ query_0 6H7E_B 0.226 106 76 4 18 121 157 258 1.687E-10 63 20M1D26M1I6M3I31M1D17M
132
+ query_0 1WGP_A 0.208 115 79 5 12 115 9 122 5.897E-09 58 51M1I9M2D15M3D6M5D4M1D18M
133
+ query_0 2D93_A 0.201 119 88 4 3 119 10 123 1.064E-08 57 35M1D27M2I5M3I27M1D18M
134
+ query_0 6M63_A 0.201 144 95 5 6 136 244 380 6.213E-08 55 14M1D8M2I6M3D14M5I16M9D66M
135
+ query_0 2FBH_A 0.178 123 78 6 118 232 3 110 3.733E-02 36 21M1D13M5I5M1I11M9I31M5D3M2D16M
examples/7pzb_need_search_msa/msa_resmsa_seq_0/tmp_5561987135da4188987956d9f05d1af2.fasta ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ >query_0
2
+ MAEVIRSSAFWRSFPIFEEFDSETLCELSGIASYRKWSAGTVIFQRGDQGDYMIVVVSGRIKLSLFTPQGRELMLRQHEAGALFGEMALLDGQPRSADATAVTAAEGYVIGKKDFLALITQRPKTAEAVIRFLCAQLRDTTDRLETIALYDLNARVARFFLATLRQIHGSEMPQSANLRLTLSQTDIASILGASRPKVNRAILSLEESGAIKRADGIICCNVGRLLSIADPEEDLEHHHHHHHH
examples/7pzb_need_search_msa/msa_resmsa_seq_0/uniref_tax.m8 ADDED
The diff for this file is too large to render. See raw diff
 
examples/7pzb_unwatermarked.cif ADDED
The diff for this file is too large to render. See raw diff
 
examples/7r6r/msa/1/non_pairing.a3m ADDED
The diff for this file is too large to render. See raw diff
 
examples/7r6r/msa/1/pairing.a3m ADDED
The diff for this file is too large to render. See raw diff
 
examples/7r6r_watermarked.cif ADDED
The diff for this file is too large to render. See raw diff
 
examples/7wux/msa/1/non_pairing.a3m ADDED
The diff for this file is too large to render. See raw diff
 
examples/7wux/msa/1/pairing.a3m ADDED
@@ -0,0 +1,544 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ >query
2
+ MASWSHPQFEKGGTHVAETSAPTRSEPDTRVLTLPGTASAPEFRLIDIDGLLNNRATTDVRDLGSGRLNAWGNSFPAAELPAPGSLITVAGIPFTWANAHARGDNIRCEGQVVDIPPGQYDWIYLLAASERRSEDTIWAHYDDGHADPLRVGISDFLDGTPAFGELSAFRTSRMHYPHHVQEGLPTTMWLTRVGMPRHGVARSLRLPRSVAMHVFALTLRTAAAVRLAEGATT
3
+ >UniRef100_UPI000B1A43EC_1463901/ 204 0.926 4.008E-55 15 232 233 0 217 218
4
+ ---------------MAETSVRTEGRPDTRVLRLPGAASAPEFRLIDIDGLLNNRATTDVRDLGSGRLNAWGNSFPAAELPAPGSLIEVAGIPFTWANAHATGDNVRCEGQVVDIPPGRYDWIYLLAASERRSEDTIWAHYDDGHADPLRVGISDFLDGTPAFGELSAFRTSRMHYPHHVQEGLPTTMWLTRVGMPRHGVAHSLRLPRSVAMHVFALTLRTTADVRLAQGATT
5
+ >UniRef100_UPI001BE85A7B_2819193/ 186 0.830 5.494E-49 19 230 233 4 215 218
6
+ -------------------SVLTAAKPGPRVHRLPGTASAPEFCLIGIDDLLNNRATTSVSDLDSGRLNAWGNSFPAEELPTPGALIEVAGIPFTWANAHAKGDNVRCEGQVIDIPPGQYDWIYLLAASERRSEDTIWAHYEDGHADPLRVGVSDFLDGTPAFGELTAFRTSRMHYPHHVQERLPTTMWLTRVGMPRRGIAQSLRLPRLVAMHVFALTLVTGIDVRRAQGA--
7
+ >UniRef100_UPI0006899DE6_66377/ 178 0.726 2.919E-46 15 230 233 3 218 221
8
+ ---------------ISSNTIRTHTPSDRLLDRLHGKASAPEFCLISIDDLLNNRATTSTEDLDSGRLNAWGNSFPAEELPAPGARIEIAGIPFAWARSHAEGDNVRCEGQVIEIPPGQYDWIYLLAASERRSENTLWAHYADGTADPLRLGVSDFLDGTPAFGELPAFSTERMHYPHHVQQGLPTTMWLSRVGMPRRGRATALRLPRCVALHVFALTLLTGIDVRLADGA--
9
+ >UniRef100_UPI0012DD7F77_35754/ 178 0.705 2.919E-46 15 231 233 3 219 237
10
+ ---------------VSDRPVLVPGAYDRLVQRLPGTTAQPDFCLIGIDDLLNNRATTGTGDLDAGRLNAWGNSFPSRELPEPGAVIEIAGIPFVWPDANPDGDNVRSEGQVIDIPPGRYDWIYLLAASERRSEDTLWVHYDDGHADPLRVGVSDFLDGTPAFGELPAFRTARMHYPHHVQERLPTTMWLARVGMPRRGRAQALRLPRLVALHVFAITLVTGIDVRLADGAD-
11
+ >UniRef100_A0A510HIW1_49319/ 171 0.295 8.250E-44 29 219 233 889 1080 1084
12
+ -----------------------------RVVEARCAAEDRGFCAADLRHSLNSDGVSTDANPGDGDFDGLGFSYPAEELPVPG-PFESGGVLYWFTETsDGAPNNIEARGQTVPLVPERYAAAHILGAAHHGAVETaATVTYADGSTERLQLSLSDWAQETPQFGEEVAVRTTHRHQEGAGDVGPPVAIFAFSLELDPSREVRFLTLPAEVRLHLFAITLR-------------
13
+ >UniRef100_A0A5N0E9G2_2545717/ 168 0.765 1.013E-42 27 222 233 11 206 217
14
+ ---------------------------DRLVRRLPAGPARPEFCLIDLDDLLNNRATTGTADLDQGRLNAWGNSFPAEELPQPGTQIDVAGIPFVWANAHAHGDNVRCEGQLIDLPPGQYDWIYLLAASERRSEDTLWAYYDDGHADPLPVGISDFLDGTPAFGELSAFRTTRMHYPHHVQHGLPTTVWLTRVGLPRRGNAHAIRFPRLVAMHIFALTLLTGS----------
15
+ >UniRef100_Q1ASW4_266117/ 167 0.295 2.592E-42 29 219 233 69 260 264
16
+ -----------------------------RVVEARCAAEDRGFCAAGLGHSLNSDGVSTDANPNDGDFDGLGFSYPAEELPAPG-PFESGGVLYWYPETsDGAPNNIEARGQTVPLVPGRYAAAHILGAAHHGAVETaATVTYADGSTGRLQLRLSDWAREAPQFGEEVAVRTTHRHQEGAGDVGPPVAIFAVRLELDPSREARFLTLPEEARLHLFAITLR-------------
17
+ >UniRef100_A0A4R7V1K6_502181/ 167 0.406 2.592E-42 28 217 233 100 290 300
18
+ ----------------------------TLVPW-PSDDTSGNAVPVDLSAHLNCVGIEPKERPGRGAFNIWHNTFPIEELPRPGSITVVGGVSFRFPAADGtRPDNLRCRGQRIELPSSRVDWLHLLAAAERRTEDVVTVHYADGTTRPQWLRVSDFWPETPSrFGELPAFRTSAMLYPNHVDSRMPPVIWHQRVPVAVRDGVVAVTLPDNPAVHVFAMT---------------
19
+ >UniRef100_UPI000B27D092_285514/ 166 0.425 3.546E-42 31 224 233 1 195 204
20
+ -------------------------------TAIPGpPTEESPYDPVDLTPCFNNTGISPASDTGRGSFNVWGNSFPAESLPSGRAPVTVDGVPFRFPPVGRGNDNVRCAGQFVPVETGRFDWLHLLAAAERRTEDTMAMHFEDGTVDPEWIRVSDFWAAPARFGETKAFETPVMHYPHHVQQGVSALLWAQRVPVTRRADLKGFRLPRNVALHIFAATLQQTAVV--------
21
+ >UniRef100_A0A920BQ67_571911/ 166 0.423 6.637E-42 42 223 233 3 185 190
22
+ ------------------------------------------PLAVDLDPYADNTGITTADRKDAGGFNIWGNTYPADQLPAAG-PVRVDDIPFRFPPaAPGRPDNVRCAGQLIEVPAGRYDWIQLLAAAERRTEDPLWLHYADGGVDPEWLRVSDfWPETAAHFGETAAYRCTHLHYPRHVDRKFGPSIWRQRVPVPRDAALRAVRLPDNPAIHIFAMTLVPAAG---------
23
+ >UniRef100_UPI000AF72EB5_718014/ 165 0.748 1.242E-41 29 231 233 17 219 221
24
+ -----------------------------PVRSVRATTSSPEFLLVGIDDLLNNRAITGAADLGDGRLNAWGNSFPAEELPAPGMLVEVAGIPFQWANAHTEGDNVRCEGQIIDIPPARYDWIYLLAASERRSEDTLWAYYDDGYADPLRVGVSDFLDGTPVFGELPGFRTTRMHYPHHVQHGLPTTMWLSRVGMPRHGRALALRLPRSVALHVFAVTLLTGIDVRRADGTT-
25
+ >UniRef100_A0A939I1U9_2815937/ 164 0.273 1.699E-41 6 220 233 502 723 724
26
+ ------PQGAPGTTVTAVTATASYRAQSTAVDTVSGEQTITQVVPYPsLEAAFNNVGATSESDTTPGNFDGGGDSYSTQALaragATPGAAISANGLGFTWPSADaGRADNVSAAGQEITF-GGQGQTLGFLGAEAGSVSGTVTVTYTDGTTSTGQLGFPNWcCTPTDAFGAKTAFTMDHRNTPTGPANfGISYGVFTNTVPLTPGKTIRSVRLPDAPAIHVFALTVQP------------
27
+ >UniRef100_UPI001F407CE4_1463833/ 163 0.462 4.348E-41 28 227 233 5 205 217
28
+ ----------------------------TPPGTATPPASAPRYRVVELAAHRNNRAATRVHTTGAGGFNVWRNSFPVEHLPPGGSEVEVGGVPFRFPPVGEGDDNVRCDGQFVEVPAGRYDWIHLLAAAERRTEDTVELHYADGSVDSEWLRVSDFWSAPAWFGELPALRTPVMHYPYHVQPGLSAHLWAQRVPVPRRTALAGLRLPRNIAVHLFAATVqEPPGAAGLP-----
29
+ >UniRef100_UPI000E241227_68246/ 163 0.451 5.947E-41 45 228 233 12 196 197
30
+ ---------------------------------------------LDLTAFANNTGITTEDRLSDGAFNIWGNTFPAEELPA-GGEITVDGIPFRFPaPAAGAPDNVRCAGQLLELPAGRYDWIHVLAAAERRTEDFVQLHYTDGSVDPEWLRISDfWPQTGARFGESAAFSCTRLHYPRHVQRSMGPTIWRQRVPVTRQRELTALTLPDNPAIHVFAMTLAPTTPPEAAE----
31
+ >UniRef100_A0A944KLD5_2819145/ 163 0.258 5.947E-41 46 222 233 506 691 697
32
+ ----------------------------------------------DLAPYFDNTGISDDANQATANLDGYGFSYSAEALAAsgltPGATVTSDGVQYGWSGAAGQADNVVAAGQVLAVPAitGATELGVMGSATNGPSSGTMTITYTDGTTQQATLGFTDWTAGSPSLGNGVAASTQYRNSTGGSSQGLGTHLYTTTIALQAGKTVASVTLPtraDQGLLHVFALGTDKGA----------
33
+ >UniRef100_A0A1C6VGZ9_683228/ 161 0.443 2.082E-40 35 229 233 7 200 206
34
+ -----------------------------------GAGAGTRQRHVDLTDHVDNLGIVVPRWPHGGGFNIWGNAFPADELPPAGGVCTVDGVSFSFP-LAGNRDNIRCRGQVVALPPGHYDWLYVLAAAERRTEDTVRLHYTDGSTAEQWLRISDfWPDTAPRFGDVLAFRCSRMLYPRHPQPSMAPAIWQQRIPVSRPGEVHAVRLPDNPAMHVFALTAVT-DAELPAEG---
35
+ >UniRef100_UPI0021D8E261_2853165/ 161 0.440 2.082E-40 28 227 233 5 206 218
36
+ ----------------------------TPPGTATPPAGAPRYRVVELAAHRNNRAASRMHTTGAGGFNVWRNSFPVEHLPPGGSEVEVGGVPFSFPPVGEGYDNVRCDGQFVEVPAGRYDWIHLLAAAERRAEDTVELHYADGSVDTEWLRVSDFWSAPAWFGELPAYRTPVMHYPYHVQPGIGAHLWAQRVPVPRRTALAGLRLPRNIAVHVFAATVQepPPGTAGLP-----
37
+ >UniRef100_A0A246RSA7_1185415/ 161 0.500 2.847E-40 44 223 233 12 191 196
38
+ --------------------------------------------PVDISRHRNNIAISSATATKAGHFNVWGNSFAAEHLPAGGSLVHVAGVPFRFPPVCAGPDNVRCEGQFVPVTESRYDWIHVLAAAERRCEDTVELHFGDGSVDAEPLRISDFWAAPAWFGELLAFRSPVMHYPHHVQQGVPAVMWAQRVPVTRRADLVGIRLPRNVAMHIFAVTLEHAEA---------
39
+ >UniRef100_UPI0018912F4F_2705253/ 160 0.262 3.894E-40 47 219 233 1054 1238 1525
40
+ -----------------------------------------------LPAAFNNDAITNDSNRGGADLDGAGASYSAQALAAvgvtPGAPLVHDGLTFTWPDRQvGQSDNVVAAGQTIDV-SGSGSTLGLLGTSTWGPStGSGTITYTDGSTQPYTIGFGDWANGTPPTGADVAIRAPYGNQP-GNQTGWAATVDYYPITLDPAKTVQSITLPSGsaqphggiPALHIFAMSIK-------------
41
+ >UniRef100_A0A2A3HR12_1938860/ 160 0.480 7.284E-40 37 217 233 8 188 212
42
+ -------------------------------------ASAPRYRVVELADHRNNRAATRVHTTAAGGFNVWRNSFPAEHLPPGGSQVEVGGVPFSFPPVGEGDDNVRCDGQFIAVPAGRYDWVHLLAAAERRTEDTVELHYADGSVDTEWLRVSDFWAAPAWFGELPAYRTPVMHYPYHVQPGVSAHLWAQRVPVPRRTELAGLRLPRNIAVHVFAAT---------------
43
+ >UniRef100_A0A838IYH6_2026798/ 159 0.292 9.962E-40 29 221 233 647 843 845
44
+ -----------------------------PRLALPEAFASDGFVALDLERAFNNDAFSSPSQPLKGNFDsrsgVLGATYPAERAPASLERIELGGVPFLFPPTDADANNVAFHGQRLEVPPGHYDELHLLGVSEQGNyQDTVRLVYEDGSVDEIPLGLSDWCQ-TPRYGEAIAFAFEQRRGAGGAIERITCRILVQLLPVRADSSLLRVDLPDRETMHLFALTLRHA-----------
45
+ >UniRef100_UPI001C21EFA6_2849653/ 159 0.434 1.362E-39 41 220 233 11 191 199
46
+ -----------------------------------------EPVPVPLTEHFDNVGFTQPDQLSSGAFNIWGNTFPADELP-PGPEVRLDGVPFLIPAAPvGSPDNLRCAGQLVAVPPGRYDWVRLLCAAERRTEDRVWLHYTDGTIDPEWLRVSDfWPETAPRFGESLAFRFSRMHYPRHVDRRMKPAIWSQRLPITRAAELAAVRLPDNPAIHLFAMTLVP------------
47
+ >UniRef100_A0A3M9MI77_2294115/ 158 0.270 1.863E-39 44 217 233 562 741 744
48
+ --------------------------------------------YANLAAAYNNVGVTAGADPTPGNFDGSGNSFNAELLAgqglTPGANVTANGFSFTWPNvAPGVADNAEAAGQLIKLT-GTGSTLAFLGSGVGDQSDTVTVHYTDGSTSTGTVGFPNWSFSDPtEFGAKLAFSTMGRNTPTGLADTAYAYrIFTNTIPLDTGKTVQSVQLPNNSALHLFAWT---------------
49
+ >UniRef100_A0A5N8W5X6_1803180/ 158 0.272 2.548E-39 44 219 233 760 936 940
50
+ --------------------------------------------PVDLTAHFDSDGISTHENTQDGDFDGTGRTYPAEELPAAG-PLTFEGVRYTFPSyADGALDNVTAKGQTIPVPPGRYAKVRVLGACSYGAlKTTLTATYTDGSTQEVELAMSDWAGTAPASGSEV-VRCTHRHGRSG-PDTLQVALFQTAVTVDQARELRSLTLPDTtkPALHLFALSVE-------------
51
+ >UniRef100_UPI0018923E89_280293/ 158 0.283 2.548E-39 47 219 233 1056 1240 1527
52
+ -----------------------------------------------LPAAFNNDAITNDSNRGGADLDGAGASFSAQALtsvgVTPGAPLVHDGLTFTWPNRQvGQSDNVVAAGQTIDVT-GSGSTLGLLGTSTWGAStGSGTITYTDGSTQPYTIGFGDWANGTPPTGGDVAIRAPYGNQP-GNQTGWAATIDYFPVTLDPSKTVQSITLPSGsaqphggiPAMHVFAMSIK-------------
53
+ >UniRef100_A0A9E3EXL3_2740538/ 158 0.256 3.485E-39 48 216 233 351 533 733
54
+ ------------------------------------------------STAYNNVGISDDSNSSTANFDGGGYSYSAEALQAagltPGQQITANGITFTWPGVPaGVNDNYQANGQTIPITPvsGATTLAFLGSATNGPSSGTVTITYTDNSTQSFSLGFSDWTlnagNSPPSFGNSVIATTSYRNHSGAGPDNVDTNILYAGVTLQTGKTLQSVTLPSTTNqgkIHVFAI----------------
55
+ >UniRef100_UPI000AB2585B_688067/ 156 0.466 8.913E-39 44 223 233 6 185 188
56
+ --------------------------------------------PVDIAGHRNNTGISAATETKAGAFNVWGNSFAAEYLPEGDSLVHVAGVPFRFPPVCEGPDNIRCAGQFVRVPEGRYDWVHVLAAAERRCEDLVEMNYGDGSVDQEPLRVSDFWAAPAWFGEVKAFETPVMHYPHHVQQRVPAVMWAQRVPVTRRADLAGLRLPRNVAVHVFAMTLQRTEA---------
57
+ >UniRef100_A0A542DYC5_402171/ 156 0.261 8.913E-39 46 228 233 869 1066 1093
58
+ ----------------------------------------------DIWPYYTNAGITDDAHTGAASFDGGGWSYSAQALAAagvtPGSTVTADGVHYTWPDVPvATPDNIEESGQTIPLKvPAGASTIGLLGSASNagssGAGGTVTVHYTDGTSSTFDAFFSDWTlgggGGTPVPGDTTAVTTAYRN-SSGGRDPVKTYVFSVSAPLDAGKTVASITLPqaQGGDAHVFAIGFDTTGASSQAP----
59
+ >UniRef100_UPI001AEB6A42_1365924/ 156 0.290 1.219E-38 7 220 233 695 914 917
60
+ -------DLERGGevlVHGPSPDLGIGPVPIsTPAPRwgLPALPDGGQTVIVDLAAALNNDAITSEFHMGDGDFDGAGNTYPAAQLPQTGQT-TDDGIPFEFVNgSEGTPNNVIPAAQTITLPPGNYMTLHLLAASDNGNtNRSMTITYADGTSQ-VPLQITDW-RTSPAFGETEALRTSQMHARTG-PQPVRLSIFHQKIPLDPAKQLVSITLPgaATPRPHLFAITLEK------------
61
+ >UniRef100_UPI0023608C19_1392640/ 156 0.216 1.219E-38 47 219 233 883 1071 1072
62
+ -----------------------------------------------LLALFNNTGISSDDDPSSANYDGVGYSYSEQALAtagvKPGGTVSAGGMEFVWPNVPaGEPDNIAADGQTINLSdvPAGASELAFLGSAtNGPSQGEVKITYTDGSTQTATLGFSDWTlnagSSSPSFGNVVAAKMPYRNSTSGNRDQVTTYVFASRpIPLEQGKQPKSVTLPssvDQGTLHVFAVSVK-------------
63
+ >UniRef100_C7PY52_479433/ 156 0.272 1.219E-38 47 219 233 1074 1258 1548
64
+ -----------------------------------------------LPAAFNNDAITNDSNRGGADLDGAGASFSAQALAsvgvTPGAPLVHDGLTFTWPDRQvGQSDNVVAAGQTIDI-SGSGSTLGLLGTSTWGaSSGSGTIAYTDGSTQPYTIAFGDWANGTPPTGGDVAIRAPYGNQP-GNQTGWAATIDYFPITLDATKTVQSITLPPgsaqphggTPAMHIFAMSIK-------------
65
+ >UniRef100_UPI0021F5AFBA_1550616/ 156 0.480 1.667E-38 36 218 233 7 189 195
66
+ ------------------------------------PAGIGLYDTVDISGHRNNTAISAATETGAGAFNVWRNSFAAEYLPAGGSLVHVDGVPFEFPPVCEGPDNIRCAGQFIRVPEGRYDWIHVLAASERRSEDTVELNFADGSVDAEALRVSDFWAAPPWFGEVRAFESLVMHYPHHVQRGIPAVMWAQRVPVTRRAGLTGILLPRNVAVHVFALTL--------------
67
+ >UniRef100_UPI0004C58EA0_1883/ 156 0.436 1.667E-38 36 221 233 2 188 196
68
+ ------------------------------------TSTTTTPVPLDLTALADNVGVTRPDRLSEGAFNIWGNTFPAEELP-PAGTVEVHGIPFRWPaTGDGSPDNVRCRGQLVTVPEGRYDWIHVLAAAERRTEDPLLLHFADGSVDPEWLRVSDFWPETASrFGERPAFSCTRLHYPRHIQHAMGPTVWRQRVPVTREERLRAFRLPDNPAMHLFAITLAPA-----------
69
+ >UniRef100_UPI001BCF2EC7_1581705/ 156 0.483 1.667E-38 44 223 233 15 194 199
70
+ --------------------------------------------PVDISGHRNNTAISAATQTGAGAFNVWGNSFAAEYLPAGESLVHVAGVPFRFPPVCDGPDNVRCAGQFVTVPAGRYDWLHVLAAAERRCEDTVELNFGDGSVDAEPLRVSDFWAAPAWFGEIKAFESPVMHYPHHVQRGVPAVMWAQRVPVTRRAGLAGLRLPRNVAVHVFAVTLQRAEA---------
71
+ >UniRef100_A0A2S9PQ64_2100817/ 156 0.422 1.667E-38 33 232 233 0 197 206
72
+ ---------------------------------MPSTASTT-TRILDLSAVFDNIGASRAAATSTGAFNVWRNSFSAEHLPEPGTTVTVDEVPFLVPPfGTGGPDNVRCAGQLLEVQPDRYDWLYLLAAAERRVEDEVALHFADGTVDFEALRLSDFWAAPAVFGESEAFRTPAMHYPQHVQFGVPAGLFCQRVPVTRRAPLAGVRLPRNTAVHVFAATLLRAAAA--AGGPRT
73
+ >UniRef100_A0A367FP16_509200/ 155 0.478 2.279E-38 44 224 233 5 186 192
74
+ --------------------------------------------PVDLTGHWNNRGISAAAGTGSGGFNVWRNSFPAEYLPPPGAQVEVEGVPFRFGGPDPAGDNIRCAGQYVELPEGRYDWIHLLTAAERRTEDTVALHFAGGEVDFEPLRVSDfWARAHAAFGEAKAFETPVMHYPHHVQPRVEALMWSQRVPVTRRAPLRGLRLPRNVAIHLFAMTLQTAGEP--------
75
+ >UniRef100_A0A1W5T2H2_1977088/ 155 0.458 2.279E-38 44 224 233 15 195 197
76
+ --------------------------------------------PVDISGHRNNTAVSAATETKAAAFNVWGNSFAAEYLPAGGTLVHIAGVPFRFPPVCDGPDNIRCAGQFLDLPEGRYDWIHVLAAAERRCEDTAELHFDDGSVDPEPLRVSDFWSAPAWFGEVKAYESPVMHYPHHVQRNVSAVMWAQRVPVTRRAGLTGVRLPRNDALHLFAVTLQRAEAA--------
77
+ >UniRef100_A0A372M322_2293568/ 155 0.392 2.279E-38 22 220 233 12 215 222
78
+ ----------------------TSELTHTPADFAPPGSPPADFTPVGLQQHFNGKGVSAPrRWPTTGGFNVWGNTFPAEGLPTGGGDAEVACIPFRFPvddSAAGAPDNLRCRGQSVDVPPGEYQWIHVLAAAERRTEDEAVLHYADGSTRREWLRISDFWPETDqRFGELLAFRTRYLMYPRHSQHNMVPSIWLQRVPVTAPGTVTAVELPDNPAIHIFAITLET------------
79
+ >UniRef100_UPI00131A6ADB_1840409/ 155 0.241 2.279E-38 49 218 233 893 1078 1080
80
+ -------------------------------------------------SVVNNTGISPDAKPAAANFDGVGWSYSADALAAagakPGGTVTVDGLSYTWPNFPvGEPDNVVAQGQTVALPgaAAGAGKLALLGAAANGkASGTLTITYTDGSTTRADIGFSDWTlgggSDQPSFGNRIAVSTPYRNSTGGQPQQIRTNVFAtVPIALDPGKRVRGITLPaqvQGGSLHVFAVAL--------------
81
+ >UniRef100_UPI001C84E3F6_763782/ 155 0.494 3.117E-38 44 218 233 2 177 189
82
+ --------------------------------------------PVDLAAHWNNRAISAADDRGSGGFNVWRNSFPAEYLPPPGARVEVAGVPFRFGGPSAAGDNVRCAGQYVELPEGRYDWIHLLTAAERRTEDTVALHFSDGEVDFESLRVSDfWAQAHAAFGETKAFETPVMHYPHHTQPRVEALMWSQRVPVTRRAPLTGLRLPRNIAVHVFALTL--------------
83
+ >UniRef100_UPI000B2085DA_46164/ 155 0.422 3.117E-38 43 227 233 5 190 191
84
+ -------------------------------------------VPVDLAPFFDNVGITPAGDLSSGAFNIWGNTFPAEELP-ERSPTTLGGVPFRFPDrGPGGADNLRCGGGLIPLPEGRYDWLYLLAAAERRTEDPVHLHYADGTVDPEWLRVSDfWPETQAWFGEREAIRCRSLHYPRHVQRPMGPALWRERIPVPREMPLAALRLPDNPAVHVFAVTLLPAGEVAPA-----
85
+ >UniRef100_A0A7K0CJB4_2585196/ 155 0.419 3.117E-38 43 221 233 7 186 196
86
+ -------------------------------------------VPVDLAPYADNTGITSARATAAGAFNLWGNTFPAGELPPPGTAV-VDGLPFRFPLAPaGEPDNVRCAGQLLPLPPGRYDWIQLLAAAERRTEDQALLHYADGAVDPEWLRVSDfWPQTRSRFGAAPAYECRVLHYPRHVDDKFGPVVWRHRVPVPRESDLAAVRLPDNPAIHVFAMTLVPA-----------
87
+ >UniRef100_UPI0018922BF9_280293/ 154 0.446 5.828E-38 44 218 233 2 178 189
88
+ --------------------------------------------PVDISPHFDNRGITGRGELSQGGFNIWDNTYPAEELPTPGGVVEVGSAPFLFPAlSPDGGDNLRCVGQFIQLPVGRYDWLYLLAASERRSEDTVYLHYADGSVDPEWLRVSDfWAETPPHFGEEAGLRCTVLHYPRHVQPFMGPAIWRTRVPVPRETPLSAVRLPDNPAIHIFALSL--------------
89
+ >UniRef100_A0A6H9Z8I5_359158/ 154 0.263 5.828E-38 20 220 233 743 944 947
90
+ --------------------APVRVTTKAPPWGLPPLPPAGQAVPVELAGHFDNDGITSEFFMGDGDFDGTGATYPAAALPQTG-RVTADGVEFLFVNGiEGSANNVTAAGQTIPVPAGRYARLHVLGASDNGNAGtTVTAVYADGGTAAVPLRLTDW-KSNPAYGESAAVRAPQFHTRTGAKD-IAVTIFHQKADLDPARELTALRLPNltRPRPHLFSLTLEK------------
91
+ >UniRef100_UPI00104757B6_1213861/ 153 0.475 1.090E-37 37 221 233 4 188 190
92
+ -------------------------------------TTANTFRPVEITEHWNNRSMSTVDDKGDGRFNVWRNSFPAEHLPRPGERVTVGGVPFDFPPATSAGDNARCAGQFVTLPPGHFDWIRLLASAERRVEDTVALHFADGQVDFEAIRVSDFWAAPACFGETLAYRTPVMHYPHHVQPRVEAMLWSQRVPVTRDATLTGLRLPNNRALHIFALTLQES-----------
93
+ >UniRef100_UPI001C690D85_2749435/ 153 0.444 1.490E-37 45 222 233 0 178 185
94
+ ---------------------------------------------MDLTALADNVGVTRPDRLSEGAFNIWGNTFPADELP-PAGPVEVHGIPFRWPaTGDGSPDNVRCRGQLVTVPEGHYDWIHVLGAAERRTEDPLLLHFTDGSVDPEWLRVSDFWPETASrFGERPAFSCTRLHYPRHIQQAMGPTIWRQRVPVTREEPLRAFRLPDNPAIHLFAITLAPAA----------
95
+ >UniRef100_UPI0021556BFD_2675858/ 153 0.471 1.490E-37 45 218 233 16 189 195
96
+ ---------------------------------------------VDISGHRNNTAISASTETKAGAFNVWGNSFAAEYLPAGESIVHVDGVPFEFPPVCDGPDNVRCAGQFIRVPEGRYDWIHVLAASERRSEETVELTFADGSVDPEPLRVSDFWAAPAWFGEVKAFESLVMHYPHHVQRGVPAVMWAQRVPVTRRAGLTGILLPRNVAVHVFAVTL--------------
97
+ >UniRef100_UPI002254C514_2975865/ 153 0.436 2.037E-37 42 227 233 2 189 191
98
+ ------------------------------------------STVIDISEACNNLGIGKSGAGEDYGFNIWRNTFPAEDLPAPGSTVAVDGVDFEFPPrETGKGDNIRCRGQLVALPGGHYEWIYLLGAAERRTEDEVELHYADGAARTEWLRMSDFWPETDSwFGEPEAFRATGLRYPRHTQAGHRPVIWQQRIPVTVPGALTALRLPDNPAMHVFALTAVTDPGVRHA-----
99
+ >UniRef100_UPI0020A32361_334858/ 153 0.456 2.037E-37 41 220 233 1 182 191
100
+ -----------------------------------------KNTVVDISALCDNRGIQPPGTEGDYGFNIWSNTFPAEELPEPGSLVPVAGVPFEFPaRPAPGGDNIRCRGQLVPVPPGDYDWLYLLGAAERRTEDQVLLHYRDGTVRAEWLRMSDfWPQTEARFGEPLAFRTSAMRYPRHTHPAHAPSIWQQRVPVTVPGEITAVRLPDNPAMHVFALTLGT------------
101
+ >UniRef100_A0A8F5GM97_2750812/ 153 0.412 2.037E-37 45 224 233 8 188 192
102
+ ---------------------------------------------VDLAPFFDNVGITPSGDPSAGAFNIWGNTFPAGELP-EGPRAMLGGVPFRFPDrGPGGADNLRCAGGLIPLPEGRYDWLYLLAAAERRTEDPVHLHYADGTVDPEWLRVSDfWPETEPWFGEREGIRCESLHYPRHVQRPMGPALWRERIPVPRETPLAALRLPDNPAVHVFAITLLPAEEV--------
103
+ >UniRef100_A0A4R2JNF3_1213861/ 153 0.455 2.037E-37 43 218 233 11 188 194
104
+ -------------------------------------------TPVDLGGHFDNRGITRGGELDQGGFNIWDNTFPAEDLPEPGGTVRIGDVPFLFPAPDPAGrDNLRCSGQVIELPTGRYDWLYLLAASERRSEDVITLHHADGSVDPQWLRVSDFWAETPAhFGEQASVRCRSLHYPRHIQRNMAPVIWRTRVPVPRETDLAAIGLPDNVAIHIFALTL--------------
105
+ >UniRef100_UPI001E5DF7EA_2714955/ 153 0.267 2.037E-37 2 221 233 797 1034 1035
106
+ --SWLPESlFLNGGTVTVEVGATANTAWGsgaadlpvdrvpsapTPVPNLPAAcVVQGDYCLQSLAGQYDVDGVTTDDDMSQGVFGTNGWSWPAELLPAPG-VGTAAGRPYLFPDTTGtAGNFLSARGQTVYLTPGRYSALHAVTASHNGSfRGDVTVTYSDGTTSKASLLSTDWAAASPAYGEETALDVPTRHRNTGIHDGLRVRMWHVPLAVDSTRTAVSITLPSLPNLKVYALSARTA-----------
107
+ >UniRef100_A0A1I3MHH7_115433/ 153 0.280 2.037E-37 52 216 233 897 1073 1077
108
+ ----------------------------------------------------NNAGISPDSKPAAANFDGVGFSYSADALAAagakAGSTVTVGGLSYTWPNYPaGSPDNVQAQGQTI-IASGSGRLAFLGAASNGSASGTLTISYTDGSSSTAQLGFSDWTLGaggqQPAFGNQVAFTTPYRNSTGGTPQQINTYVFAsAPITLPAGKTVRSVTLPSNvsgGQLHVFAI----------------
109
+ >UniRef100_A0A7K2PPX2_2690297/ 152 0.474 2.786E-37 44 218 233 15 189 195
110
+ --------------------------------------------PVDISGHRNNVAVSAAAETKAGAFNVWGNSFSAEHLPAGESLVNVDGVPFEFPPLCEGPDNIRCAGQFIRVPEGRYDWIHVLAASERRCEDTVELIFADGSVDAEALRVSDFWAAPAWFGEVKAFESLVMHYPHHVQRGIPAVMWAQRVPVTRRAGLTGIVLPRNVAIHVFAVTL--------------
111
+ >UniRef100_A0A248YPR0_2024580/ 152 0.491 2.786E-37 44 222 233 12 190 195
112
+ --------------------------------------------PVDISGHRNNTAISSATATKAGHFNVWGNSFAAEHLPAGGSLVHVAGVPFRFPPVCAGPDNIRCEGQFIAVTQGRYDWVHVLAAAERRCEDTVELSFSDGPVDAESLRVSDFWAAPAWFGELLAFRSPVMHYPHHVQRGVPALMWAQRVPVTRRADLVGIRLPRNVAMHIFAVTLQHGA----------
113
+ >UniRef100_UPI0010FB6E0B_1905/ 152 0.434 2.786E-37 43 222 233 7 187 203
114
+ -------------------------------------------VPVDLTAHADNTGITPADALDAGAFNLWGNTFPAEELP-PGGPVEVDGIPFLFPrHAPGAPDNIRCAGQLIELPAGRYDWIQVLAAAERRTEDQVLLHYGDGSVDPEWLRVSDFWPETASrLGGTAAYTCGRLHYPRHVERKFGPTLWRHRVPVPREAELTAVRLPDNPAVHLFALTLVPAP----------
115
+ >UniRef100_UPI0022EA6403_1229659/ 152 0.444 3.809E-37 41 218 233 1 180 191
116
+ -----------------------------------------KNTVVDISALCDNRGIQPPGTEGEYGFNVWSNTFPAEELPEPGGLVPVAGVPFEFPaRPAPGGDNIRCRGQLVEVPPGEYDWLHLLGAAERRTEDQVLLHYRDGTVREEWLRMSDfWPQTDARFGEPLAFRTSAMRYPRHTHRSHAPSIWQQRVPVTVPGEITAVRLPDNPAMHVFALTL--------------
117
+ >UniRef100_A0A1B4ZDD3_1213862/ 152 0.413 3.809E-37 45 228 233 26 210 211
118
+ ---------------------------------------------LDLTAFADNVGVTSPDRLSEGAFNIWGNTFPADELPK-GGPVDIHGIPFRFPAvGTGQPDNVRCAGQFIDVPVGRYDWIHVLAAAERRTEDFVRLHYTDGAVDPEWLRVSDFWPETASrFGESAAVSCTRLHYPRHIQRSMGPTLWRQRVAVPREQDLSAIRLPDNPAIHIFAMTLAPATQPETTQ----
119
+ >UniRef100_A0A5N8VJT9_1609272/ 151 0.468 5.207E-37 42 218 233 7 183 189
120
+ ------------------------------------------YEPVDISGHRNNTAISAATETGAGAFNVWRNSFAAEYLPAGGSLVHVEGVPFEFPPVCDGPDNVRCGGQFIRVPEGRYDWIHVLAASERRCEDTVELSFADGSVDTEPLRVSDFWAAPAWFGEVKAFESLVMHYPHHVQRGVPAVMWAQRVPVTRRAGLTGILLPRNVAVHVFAVTL--------------
121
+ >UniRef100_A0A229REX4_76020/ 151 0.406 5.207E-37 45 224 233 8 189 192
122
+ ---------------------------------------------LSLLEHVNNTGLSTVGDLAAAGFNIWGNSFPAADLPSPGATSVVGGVPFRFPeRAPDGRDNVRCRGQRIDVPEGRWDWVHVLGAAERRTEDPLLLRYADGTVRPQWLRMSDfWPETEPRFGELLAYRCATMHYPRHVQRTTAPAIWAQRVPLSVPDGVVALELPDNPALHLFAITLQAGERV--------
123
+ >UniRef100_A0A7K2T631_2690338/ 151 0.423 5.207E-37 36 222 233 1 188 195
124
+ ------------------------------------TTSTTAPVPLDLTALADNVGVTRPDRLSDGAFNIWGNTFPADELP-PAGPVEVHGVPFRWPaTGGGAPDNVRCRGQLVPVPEGRYDWIHVLAAAERRTEDPLLLHFADGGVDPEWLRVSDfWPETSSRFGERPAFSCTRLHYPRHVQHAMGPTVWHQRVPVTREERLRAFRLPDNPAIHLFAITLAPSA----------
125
+ >UniRef100_UPI0020D28210_193462/ 151 0.423 5.207E-37 42 221 233 6 189 195
126
+ ------------------------------------------STVVDISEVCDNRGITKSGAEESDGFNIWRNTFPAEDLPAPGSSVEVTGVAFEFPaRATGRGDNIRCRGQLLPLPGVRADWLYLLGAAERRTEDEVELHYADGAVRTAWLRMSDfWPETAAWFGEPEAFRGSGLRYPRHTQDGHRPAIWQQRVPVTVPGELTALRLPENPAMHVFALTavLEPG-----------
127
+ >UniRef100_A0A2N8NVN7_66423/ 151 0.424 5.207E-37 33 221 233 3 194 202
128
+ ---------------------------------LPARADrrQTEAVPVNLAGLADNTGITRADALSEGAFNIWGNTFPADELPT-GGPVVVDGVPFLFPEaAPGRPDNVRCAGQLIEVPTGRYDWIQLLAAAERRTEDQVLLHYADGSVDPEWLRVPDFWPETGSrVGGSPAFTCTRMHYPRHVERKMGPVIWRHRVPVPRESDLGAVRLPDNPAVHLFAMTLLPA-----------
129
+ >UniRef100_A0A9E3EXL3_2740538/ 151 0.250 5.207E-37 51 216 233 551 729 733
130
+ ---------------------------------------------------YNNAGISDDSNTTSANFDGGGYSYSAQSLQAagitPGGSVTTKGVTFTWPNvASGVADNYQTNGQTIPVTPvsGATTLAFLGSATNGPSSGTATITYTNGSTQSFSLGFSDWTlnanTASASFGNAIAATLSYRNGANG-RDNVNTYVFYADITLQAGKTLKSVTLPsttNQGRLHVFAI----------------
131
+ >UniRef100_A0A840IWR6_1181879/ 151 0.287 5.207E-37 52 218 233 903 1081 1083
132
+ ----------------------------------------------------DNAGISPDSDPSAGNFDGGGWSYSADALaeagAKPGGTVTSDGIDFTWPSYPaGDPDNVVAAGQTVNVTGSG--KLALLGSSSNGnAEGTLTVTYTDGSTSTATVGLSDWTlgggDAEPAYGNKSVLSTSYRNSSGGDPQEISTVVFATTpITLDAGKTVASVTLPDDvdgGAMHVFALGL--------------
133
+ >UniRef100_A0A941IQY1_1508375/ 151 0.262 5.207E-37 47 219 233 1034 1218 1510
134
+ -----------------------------------------------LTAAFNNSAITSDGNRGCANLDGAGASYSQQALAsvgvTPGTALVHDGLTFTWPSGGAcDADNVVAAGQTIDV-SGSGSTLGFLGTSAWGAiSGTGTVTYTDGTTQPFTIGFGDWANGTPPTGDDIAIRAPYGNQP-GNQTSWQTTIEYAPVQLDPSKTVQSITLPPGnpqpsggiPSMHIFAMSIK-------------
135
+ >UniRef100_C7QK71_479433/ 151 0.480 7.120E-37 44 218 233 15 189 195
136
+ --------------------------------------------PVDISGHHNNKAISAATETKAGAFNVWGNSFAAEYLPAGGSIVEVDGVPFRFPQAGDGPDNIRTAGQFITVPEGRYDWIHLLAASERRTEDCMDLSFADGSVDAEKVRVSDFWGAPAWFGEVKAFESLTMHYPHHVQRGVPAMMWAQRVPVTRRAVLSGILLPRNVALHIFAVTL--------------
137
+ >UniRef100_C7PW55_479433/ 151 0.248 7.120E-37 47 220 233 547 726 727
138
+ -----------------------------------------------LAGAFNNTAITDETNTAPGNFDGDGDSYSAQSLatagATPGATISAGGTTFTWPSaAAGTNDNVAGSGVMVNL-AGQGSKLGFLGSEAGFSTDTVTVAYTDGTSSTGSLGFPNWCCSSPtGYGATPAIVTDHRNTPSGPANFGTAYdVFYNSIAIDATKTVKTVTVPSDPAIHVFAMTVQP------------
139
+ >UniRef100_A0A1V2PT88_1933779/ 151 0.315 7.120E-37 33 231 233 723 921 924
140
+ ---------------------------------LPPLPNQGTTVTVDLAAVLNNDAFTNEFHMGDGDFDGAGNTYPAAQLPQTGQA-NDDGIPFEFVNGNeGAPNNIVPQAQTIQLPPGKYTTVHLLAASDNGNTDrTVTINYADG-AANAPLRITDW-RTAPAFGETEALRTNQMHTRTGPSPTR-LAIFHQKITLDPVRDLVSITLPaaATPRPHIFAITLQKKSSAAPAEGGD-
141
+ >UniRef100_UPI00207D370E_2944808/ 151 0.279 7.120E-37 15 221 233 764 967 972
142
+ ---------------IAPVTVTTPAAPWGLPPLPPG----GDMVTVDLEPFYTNDGITNEFYLGDGDFDGTGRTYPSGALPQNG-SLSNDGVPFRFTNGhEGTRNNVTAAGQTLELPEGNYRKLHLLGASDNGNTDaTATLHYTDGGTAPVRLALTDWLSP-AAFGESEPLRTNQIHTRTGPVDRRAT-VFHQVLAADPTRRLRAITLPASakPRSHVFAVTLEKA-----------
143
+ >UniRef100_A0A1C5JFJ4_47864/ 151 0.457 9.734E-37 44 218 233 15 189 195
144
+ --------------------------------------------PVDISGHRNNTAVSAATETKAGEFNVWGNSFAAEYLPKGRSTVHVDGVPFLFPPVCEGPDNIRCAGQFIEVPEGRYDWVHVLAASERRCEDTVDLNFADGSVDAEALRVSDFWAAPAWFGEVKAFQSLVMHYPHHVQRGVPAVMWAQRVPVTRRADLTGILLPRNVAVHVFAVTL--------------
145
+ >UniRef100_UPI00143C2064_1431344/ 151 0.465 9.734E-37 45 218 233 16 189 195
146
+ ---------------------------------------------VDISGHRNNVAISAATRTRDGAFNVWRNSYAAEYLPTGGSTVRVGGVPFTFPPVCDGPDNVRCEGQFIPVRRARYDWIHLLAASERRCEDTVGLDFADGTADAEPLRVSDFWAAPAWFGEIKAFESPAMHYPHHVQRGVPAVMWAQRVAVTRRADLTGILLPRNVAVHVFAVTL--------------
147
+ >UniRef100_A0A660RIY0_28240/ 151 0.242 9.734E-37 41 221 233 380 568 581
148
+ -----------------------------------------DFTILDISSFFNNDGIASFKHPRYGNFDnhsgIYGATYPLEELQnymNENGRLEVDDIPFVLKGLEGhMKNNTALQGEKIYLPEGRYSYIYFLGSSEHGSfRAPVTFEYSDGSRQTDDSGFSDWCQG-PQLGEKIACKMPYRYDSNGSRQSITCYLFMQTLKLAPKKELITILLPRKNTMHIFAITLRKS-----------
149
+ >UniRef100_UPI001CB7FD65_1930280/ 150 0.457 1.331E-36 44 218 233 15 189 195
150
+ --------------------------------------------PVDISGHRNNTAISAATETRAGKFNVWGNSFAAEYLPEGKSLVHVDGVPFEFPPVCEGPDNVRCAGQFIRVPEGRYDWVHVLAASERRCEDTVELNFADGSVDAEPLRISDFWAAPAWFGEVKAFESLVMHYPHHVQRGVAAVMWAQRVPVTRRAGLTGILLPRNVAVHVFAVTL--------------
151
+ >UniRef100_UPI0006902850_348818/ 150 0.243 1.331E-36 47 220 233 552 731 732
152
+ -----------------------------------------------LAHAFDDVGVSDESDTAPGNFDGDGDSYSAQALAAagvtPGATLSANGYTFTWPSaAAGAPDNVAGGSPTIEL-SGSGSRLAFLGAEAGFTSDTVTVHYTDGSSSTGTLGFPNWCCGaTDSYGAKTAFTTDHRNTPTGpANYGVSYRLYTNAIPLTAGKQVASVTLPSSSAIHVFAMAVQP------------
153
+ >UniRef100_A0A832K4Y2_2282144/ 150 0.279 1.331E-36 41 218 233 25 200 1038
154
+ -----------------------------------------DFLLLDLTSYFNNNGAATEENPSEGNFDYGGFSYAFELLPKPG-KVSVKEIPFIFPQTSQEGNNIACHSQVIKIQPDRYRAILLLGSSTNGDYEaCLWVQFSDGSRSCLKTGLTDWCRDS-VFGEIPAFSFPYRIGPEG-RQEIQNYIWLQVLRIKEEKFLSAIILPENKNIHIFAITL--------------
155
+ >UniRef100_UPI000B17B6CC_28444/ 150 0.474 1.819E-36 44 218 233 6 180 193
156
+ --------------------------------------------PVDISGHLNNTGISAATDTKSGAFNVWGNSFAAEYLPGGGDLVHVAGVPFRFPPVGDGPDNIRCAGQFLTVPEGRYDWVHVLAAAERRCEDVVETHYADGSVDAEPLRVSDFWAAPAWFGEVKAFESLVMHYPHHVQRGVSAVMWAQRVPVTRRADLTGLRLPRNAAVHLFAVTL--------------
157
+ >UniRef100_A0A7W3TA43_1472722/ 150 0.477 1.819E-36 45 218 233 15 188 194
158
+ ---------------------------------------------VDISGHRNNTAVSAATETKAGEFNVWRNSFAAEYLPAGGSLVHVDGVPFRFPPLCEGPDNVRCAGQFIRVPEGHYDWIHVLAASERRSEDTVQLNFADGSVDAEALRVSDFWAAPAWFGEIKAFESLVMHYPHHVQRGIPAVMWAQRIPVTRRTGLTGILLPRNVAVHVFALTL--------------
159
+ >UniRef100_UPI0006C193CE_1415555/ 149 0.390 2.487E-36 43 227 233 5 191 193
160
+ -------------------------------------------TVVNLSAHFNNRGIQPPDTPGDYGFNIWRNTFPAEELPEPGSLVDLVGAVFEFPaRASAAGDNVRCRGQLVELPGGRWDWIGLVGAAERRTEDEVELHYADGTVRREWLRMSDfWPQTAAYFGEPRAFVTGGMRYPRHTHPHHAPSIWQQRVPVRVPAPLAAVRLPDNPAMHVFAMTVTADEESRLA-----
161
+ >UniRef100_A0A977Y500_2809700/ 149 0.418 2.487E-36 42 218 233 11 187 200
162
+ ------------------------------------------YHAVDLAPYSNNVGISAAGHTSRGAFNVWGNSFPAEHLPESGAPVSVGGVPFRFPRVGVGDDNVRCDGQFIAVEAGRFDWLHLLTASERRAEDVVEMHFANGSVDPEWLRVSDFWAAPAWFGEATAFATPVMHYPHHVQRGVSAMLWSQRVPVTRRADLSGFRLPRNAAVHVFAATL--------------
163
+ >UniRef100_UPI00189254C3_280293/ 149 0.243 2.487E-36 47 220 233 554 733 734
164
+ -----------------------------------------------LASAFNNIAISDETNPGPGNFDGDNDSYSAQALAAagatPGASIKAGGTTFTWPsSATGTNDNVAGSGVLVNVT-GQGSKLGFLGAEAGFATDTVTVTYTDGSSGSGSLGFPNWCCSSPtAYGATPTIVTTHRNTPSGPANfGIDYDVFYNSIAIDATKTVKTVSVPNDPAIHIFAMTVQP------------
165
+ >UniRef100_UPI001E52087D_2714956/ 149 0.259 2.487E-36 2 217 233 772 1005 1010
166
+ --SWLPESlFLNGGTVTVEVgskantgwgtkaaDLPVDRMPTapAPVPNLPtACVVQGSNCLQSLAGQYDIDGVTTDDNMGQGVFGTNGWSWPAELLPAPG-VGTAAGRPYLFPDTTGtAGNFLSARGQTIHLTPGRYSALDAVTASHNGNyRGDVTVTYSDGTTSKASLMSTDWAAAAPAYGEETAIDVPTRHRNTGIHDGLRVRMWHVALAVDSTRTAVSITLPNLPNMKVYALT---------------
167
+ >UniRef100_A0A557ZXY0_715473/ 149 0.264 2.487E-36 52 216 233 868 1044 1048
168
+ ----------------------------------------------------NNAGISPDAKPAAANFDGVGFSYSSDALAAagakAGSTVTVNGLSYSWPNYPaGSPDNVIAQGQTVNV-SGSGQLAFLGAAANGNASGTVTVTYTDGTTSTANLGFSDWTLGagaaQPAFGNQVAFRTPYRNSVGGDSQQINTYVFAsAPIALAAGKTVKSVTLPSSvsgGQLHVFAI----------------
169
+ >UniRef100_A0A0N0T605_1519492/ 149 0.437 3.401E-36 44 218 233 2 175 180
170
+ --------------------------------------------PIPLAPHLNNTGLTGADGLDGGGFNIWGNTFPAGELPPSGSTTVVHDVPFLFPAA--GADNVRCRGQRVEVPPGRWDWVHVLGAAERRTEDLLEVHYSDGSVRGQWLRMSDfWPQTEPRFGELLAFRCRHMHYPRHVQRTMSPSIWAQRVPVTVPSDVVALVLPDNPALHVFALTL--------------
171
+ >UniRef100_A0A918SWL6_67368/ 149 0.385 3.401E-36 43 227 233 5 191 193
172
+ -------------------------------------------TVVNLSAHFNNRGIQPPDTSGEYGFNIWRNTFPAEELPEPGSLVELEGTVFAFPaRDTGAGDNVRCRGQLVELPAGRWDWIGLVGAAERRTEDEVELHHADGTVRREWLRMSDfWPQTAPYFGEPLAFSTSGMRYPRHTHRHHAPSLWQQRVPVRVPAPLAAVRLPDNPAMHVFAMTVTADEESRLA-----
173
+ >UniRef100_UPI0012EAE0B8_2654677/ 149 0.413 3.401E-36 42 225 233 6 191 195
174
+ ------------------------------------------STVVDISEACDNRGIAKSGADESYGFNIWRNTFPAESLPAPGSSVAVAGVTFEFPaRESGRGDNIRCRGQLLPLPAVRPDWLYLLGAAERRTEDEVELHYADGAVRTAWLRMSDfWPETAAWFGEPEAFRTSGLRYPRHTHDGHRPAIWQQRIPVTVAGELTALRLPDNPAMHVFALTAVREPGVR-------
175
+ >UniRef100_A0A428Z1R9_2030/ 149 0.327 3.401E-36 45 220 233 733 908 910
176
+ ---------------------------------------------VDLAAALNNDGFTNEFQMNDGDFDGAGNTYPAAQLPQTG-HAEDDGIPFEFVNGNeGAPNNIIPAGQTIQLPPGKYPTMHLLAASDNGNTNTkLTVTYADGTAQ-VPLQITDW-RASPAFGETEALRTRQMHTRTGPAETR-LSIFHQKVPLDPARELLSITLPaaAKPRPHIFAITLQK------------
177
+ >UniRef100_UPI0019405FD2_566021/ 149 0.472 4.649E-36 41 221 233 2 182 190
178
+ -----------------------------------------EQVPIELAAHLNNAGITPASDTGRGRFNVWRNSLPGEELPL-GRPFTAGGVRFAFPAAgPGRPDNVRCEGQLVTVPPGRYDWCCLLAAGERRVEDEVALHFADGSVDFEPVRVSDFWAASAVFGEEEAVATSVMHYPQHVQPGVTAHIWCQRVPVTRRAELRRLRLPHNVALHVFAATLCPS-----------
179
+ >UniRef100_UPI001AEB6BB5_882444/ 149 0.457 4.649E-36 44 218 233 12 187 194
180
+ --------------------------------------------PVDLSAYFNNTGATAKSDLARGRLNVWQNSFPAEELPDAG-IFVATSVPFEFPAiGPGRHDNIRCAGQRVELPAGRWDWIYLLACSERRSEDVLQLHYTDGTVDAEWLRVSDfWPASPPHFGEVEAIRCEQMHFPRHIQPRVGPRIWQQRVPVPRQHDLAALRLPDNIAIHVFAMTL--------------
181
+ >UniRef100_UPI001C40B55E_200378/ 148 0.497 6.356E-36 43 217 233 6 182 190
182
+ -------------------------------------------VPIDLGPLFNNIGATVETDLGRGGLNVWKNSLPAADLPAAGSLFRYTDVPFRFPEvGSGLPDNVRCEGQRVDLPPGRYDWIYLLACSERRSEDVVHLHYASGEADDEWLRVSDFWAAAPShFGEVEAVPCSRIHFPRHVQRGIAPRIWQQRLPVPRQQPLAYLRLPDNIAIHVFAMT---------------
183
+ >UniRef100_A0A7K2ZJK9_2690253/ 148 0.464 6.356E-36 44 224 233 15 195 197
184
+ --------------------------------------------PVDISGHRNNTAISAATETKAGAFNVWGNSFAAEYLPAGESLVHVDGIPFRFPPVCEGPDNVRCAGQFFGVPEGRYDWLHLLAASERRCEDTVELHFADGTLDAEPLRISDFWSAPAWFGEVKAFESLVMHYPHHVQRSVSAVMWAQRVPVTRRAVLTGVRLPRNVAVHVFAATVQRTEGA--------
185
+ >UniRef100_UPI001BA8A0F4_2802641/ 148 0.274 6.356E-36 36 220 233 30 212 765
186
+ ------------------------------------ASDAPNPVPVPLDGLFDNDGI-DTATTHDGNFDGSGYTFPAEGLPS--GQVTVDGVPFTFPSATGK-NNIVAMGQQIPLPQGRYvTALFLTACSYGATGGTATIHYADGSTSEAQLGGADWYSG---SGQLVA---PFRYTPGGRTDQSPVSISTSQVWIDPNRDAVAITLPTTspavegkSSLHIFALSLQP------------
187
+ >UniRef100_UPI001F2867CD_211113/ 148 0.322 6.356E-36 45 220 233 733 908 910
188
+ ---------------------------------------------VDLAAVLNNDGLTNEFQMNDGDFDGAGNTYPAAQLPQTG-YVEDDGIPFEFVNGNeGAANNIVPIGQTIPLPPGKYPTMHLLAASDNGNtTTTLTVTYTDGAAQ-VPLQITDWRTP-PAYGETEALRTRQMHTRNGTAETRLT-IFHQKLQLDPARDLVSITLPaaTKPRPHIFAITLQK------------
189
+ >UniRef100_A0A563EK61_2591470/ 148 0.295 6.356E-36 5 223 233 739 949 950
190
+ -----HPDFTIAPVKITE-PARAWGLPALP----PGGVAA----PVDITGLFDNDGLSNEFTSRDGDFDGAGNTYPAAQVPQTGG-VTDDGIPFEFTNGDEaSKNNVIAAGQTIAMPAGRYAKLHLLAAADTGNVDaPGVATYVDGTTAPIRFAVTAWRSG-PQFGESEPITTTLMHTPSGPQQA-KVAIFHQVIGLDPARDLASITLPKlaGPRLHVFGITLEKAKA---------
191
+ >UniRef100_A0A7V2UGE7_2026780/ 148 0.264 6.356E-36 41 219 233 33 223 1084
192
+ -----------------------------------------GQVPLNLEGLFDNDAIADAQRRADGNFdcpdhaaDIPGSVFPAENLPATGSKFSFDGIHFLFPSKErGDLNNVACEGQRIDVPPARYKALHIIGTSENGSfRDSLQLAYKEG-PAEAELTLRDWCQ-KPAAGDRVAFEAPCRYTwspqkRGMIREEIQPRIWRQTIPLDPAKTLEALTLPYNRRMHVFAATLE-------------
193
+ >UniRef100_A0A919S905_113560/ 148 0.440 8.688E-36 45 227 233 5 188 190
194
+ ---------------------------------------------IDLSGFFTNRGLQPPGTTGDYGFNIWSNTFPAEELPPAGTVADVAGVPFSFPPDAATGDNIRCRGQVIPLPEGDWDWIYLIGAAERRTEDRVELRYRDGSVRPAWLRMSDFWPETPmRFGEPLAFRTRSMRYPRHTHRNHAPALWQQRIGVAQPEDLAAVRLPDNPAMHVFALTLVADEEARCA-----
195
+ >UniRef100_UPI0019447DF0_113566/ 148 0.445 8.688E-36 45 227 233 5 188 190
196
+ ---------------------------------------------IDLSGFFTNRGLQPPGTTGDYGFNIWSNTFPAEELPPAGTVADVAGVPFSFPPDAATGDNIRCRGQVIALPAGDWDWIYLIGAAERRTEDQVELRYRDGSVRPAWLRMSDFWPETPvRFGEPLAFRTRSMRYPRHTHQNHAPALWQQRIGIAEPGNLSAVRLPDNPAMHVFALTLVADEEARCA-----
197
+ >UniRef100_UPI001FEC609C_2508722/ 148 0.477 8.688E-36 44 223 233 15 194 197
198
+ --------------------------------------------PVDISGHRNNTAVSAATDTKAGRFNVWGNSFAAEYLPAGGSLVHVAGVPFRFPPVCDGPDNVRCEGQFLEVPEGRYDWVHVLAASERRCEDTVGMHFADGAVDAEPLRVSDFWAAPAWFGEVRAFESLVMHYPHHVQRNVSALMWAQRVPVTRRAGLTGVRLPRNIAVHVFAVTLQRTEA---------
199
+ >UniRef100_A0A6G7PB94_2714844/ 148 0.482 8.688E-36 45 218 233 19 192 198
200
+ ---------------------------------------------IDISDLRNNAAISSAVETGAGAFNVWRNSFAAEYLPAGGSLVHVDGVPFEFPPVCEGPDNIRCAGQFIKVPRDRYDWIHVLAASERRSEDTVELTFADGSVDAEPLRVSDFWAAPAWFGEVKAFESLAMHYPHHVQRGVPAVMWAQRVAVTRRADLTGILLPRNVAVHIFAVTL--------------
201
+ >UniRef100_A0A4R0HF04_1124743/ 148 0.394 8.688E-36 25 218 233 35 228 237
202
+ -------------------------MPDQMPDQMPATSVQARCRAVNLAPHRNNVGSTPATDTRGGAFNIWGNSFPAEELPAPGQ-FVVDQVAYDFPPTGrGTADNVRAAGQFIEVPSGRYDWLYVLGAAERRVEDELAFHFADGSVDFEQLRLSDFWAAPGWFGETQVRATRSMHYPFHVQAGVPAMLWSQRVPVTRRAALAAVRLPRNPAVHLFAATL--------------
203
+ >UniRef100_A0A3E0IAZ0_1045776/ 148 0.284 8.688E-36 36 220 233 31 213 766
204
+ ------------------------------------ASDAPNPVPVPLDGLFDNDGI-DTATTHDGNFDGSGYTFPAEGLPS--GPITVDGVPFTFPAATGK-NNIVAMGQQIPLPKGRYvTALFLTACSYGATGGTATVHYADGTTSQAQLGGADWYSG---SGQLVA---PFRYTPGGGTDQSPVSISTSQVWLDPARDAVAITLPTTnpavegkSSLHVFALSLQP------------
205
+ >UniRef100_UPI000DE266A4_2249762/ 148 0.264 8.688E-36 33 221 233 747 936 938
206
+ ---------------------------------LPPIQPPGETTPVDLSAHFDNDGVSTHENMADGDFDGTGRTYPAEELPPAGPYL-HQGVSFLIPSfADGAHNNLTARGQVIAVPPGRYARLRLVGACAYGSLDTkLIATYADGSTAELPFVMSDW-AGQPAAGGSEVTRCTHRHGKAG-PDALKVALFEVPIALDPARELRSITMPVRvkPQLHVFALSVEKS-----------
207
+ >UniRef100_A0A7L6B3W5_2749844/ 147 0.422 1.188E-35 45 217 233 14 188 196
208
+ ---------------------------------------------VDLSACFNNTAATTPETTGAGGLNVWRNSFPIEELPDATEVFRPGAVPFRFPRTgPGRLDNVVCAGQRIDLRPARYDWIYLLACAERRIEDVVHLHYRDGSVDEEWLRVSDfWPASPPRFGELEAVRCDRIHFPRHVQPGIGPRIWQQRLPVPRQEELVYLRLPDNIAVHIFAMT---------------
209
+ >UniRef100_A0A2M7SAS6_1974543/ 147 0.279 1.188E-35 35 220 233 24 215 216
210
+ -----------------------------------GAGGGAGCVPVDISKLCNNDGIAPDDDATGGDFDAGGASYTAKAWPKVGKTgVTVNGVPFLVAGADKKVNNIACEGEAVQVPAGSYKAILMLASATNVPEGSmedaLTLVYKDGTKSAVNFKLTDWC-VEPKEGEKKAYSFTYRmsTGTEGGHHEIPCYLFLQTVKIDGKKELASIRLPEQKDIHIFAITLQK------------
211
+ >UniRef100_A0A938MG41_2026780/ 147 0.286 1.188E-35 43 218 233 67 249 254
212
+ -------------------------------------------TLLDLAAAFNSDGISTEENAKDGNMDApgkpNGASYPKDELPAAHSLLTLKGkpvMTFLFPDGqDGKLNNIACSGQTVHVPAASYVELWVLGAATYGAQtSDLELRYEDGS-ETEPLELSDWC-ETPGFGERQAVVAKHRHGWKGEEEDIPCGLWAQRVPLDAKRKLVALVLPQNAKMHIFALSL--------------
213
+ >UniRef100_A0A938S8T3_2026780/ 147 0.286 1.188E-35 43 218 233 743 925 930
214
+ -------------------------------------------TLLDLAAAFNSDGISTEENAKDGNMDApgkpNGASYPKDELPAAHSLLTLKGkpvMTFLFPDGqDGKLNNIACSGQTVHVPAASYVELWVLGAATYGAQtSDLELRYEDGS-ETEPLELSDWC-ETPGFGERQAVVAKHRHGWKGEEEDIPCGLWAQRVPLDAKRKLVALVLPQNAKMHIFALSL--------------
215
+ >UniRef100_A0A5N0UUJ1_2596893/ 147 0.258 1.188E-35 52 216 233 884 1060 1065
216
+ ----------------------------------------------------NNAGISPDAQPGVANFDGVGFSYSSDALAAagatPGGTVTVDGLSYTWPNYPaGDPDNVLAQGQTVNV-SGSGRLAFLGAAANGNASGTVTITYTDGSTSTAQLGFSDWTLGAGaqqlAYGNVKAVTTSYRNSTGGDTQQIGTYVFAsAPITLDSGKQVASVTLPSSvsgGQLHVFAI----------------
217
+ >UniRef100_A0A2H5B2X5_2018025/ 147 0.488 1.624E-35 45 218 233 15 188 194
218
+ ---------------------------------------------IDISGHRNNTAVSAATDTGAGAFNVWRNSFAAEYLPAGGSLVEVDGVPFDFPPVCQGPDNVRCAGQYLEVPRDRYDWIHLLAASERRCEDTVELTFADGSVDAEPLRVSDFWAAPAWFGEVKAFESLTMHYPQHVQRGVPAVMWSQRVAVTRRVDLTGLLLPRNVAVHVFAVTL--------------
219
+ >UniRef100_A0A5C4LYC7_2547244/ 147 0.269 1.624E-35 52 216 233 867 1043 1050
220
+ ----------------------------------------------------NNAGISPDAKPAAANFDGVGYSYSADALAAagakAGSTVTVNGFSYTWPAYPaGSPDNVIAQGQTVNV-SGSGQLAFLGAGSNGNASGTVTITYTDGSTTAASLGFSDWTLGgggaQPAFGNQKAFTTSYRNSVGGDPQQINTYVFASTpITLSAGKTVQSVTLPSSvsgGQLHVFAI----------------
221
+ >UniRef100_A0A0K3B8B7_703222/ 147 0.259 1.624E-35 47 216 233 898 1082 1086
222
+ -----------------------------------------------LRSAYNNVGISPDNAMASANFDNVGFSYSANALAngglKPGGTVTVDGLAHTWPITDiGEPDNVVAGGQTVNVQaQSGASKLALLGSAANGtASGQLTITYTDGSTQQAQVGFSDWTLGgggqQPSFGNRIAATSSYRNSVNGSSQGLNTYVFATEpVGLDPSKQVKSVTLPPTvsgGTLHVFAL----------------
223
+ >UniRef100_A0A8J3I4R3_2778364/ 147 0.284 1.624E-35 51 216 233 1027 1201 1205
224
+ ---------------------------------------------------YNNVGTSDDSAPSFGNFDAQGNSYSAQALQQvgliPSQNVTVNGVTFIWPNmAPGYFNNYQAAGQTIGVTPidGATTLAFLGSASNKGSSGTATITYTDGSTQTFTLGFTDWVTSTLSYGNSIAATMSYHNASTGKQTG-NTYIFYTSVTLQAGKTIQSVTLPatfTGGQPHVFAI----------------
225
+ >UniRef100_UPI00143BFCD2_2720606/ 147 0.482 2.219E-35 45 218 233 4 177 183
226
+ ---------------------------------------------IDISGHRNNTAVSAATETTAGEFNVWRNSFAAEYLPAGGSLVHVDDVPFRFPPVCEGPDNIRCSGQFIRVPEGRYDWIHVLAASERRTEDTVELSFADGSVDAEALRVSDFWAAPAWFGETKAFESLVMHYPHHVQRGIPAVMWAQRVPVTRRTGLTGILLPRNVAVHVFALTL--------------
227
+ >UniRef100_UPI00058392A0_703222/ 147 0.312 2.219E-35 33 220 233 726 913 917
228
+ ---------------------------------LPKLPDQGSTVPVDLVAALNNDAFTNEFHMGDGDFDGAGNTYPAAQLPQTGQA-SDDGIPFEFVNGNeGAPNNIVPAGQQIALPPGQYTTLHLLAASDNGNtNAKLTVTYTDGTAQ-VPLQVTDW-RAAPAYGETEALRTSQMHTRTG-PQPVRLSIFHQKIPLEEARHLVSIALPaaATPRPHIFAVTLQK------------
229
+ >UniRef100_A0A9E9FHN1_2979468/ 146 0.434 3.034E-35 39 221 233 1 183 192
230
+ ---------------------------------------AADFQCIDLSAWYNNVGLTRPDNTSAGAFNVWRNSLPAPELPC-GQRVTVEGVPFLLPPADGnRYDNVRCDGQVAPVPRGAYDWVYLLTTAERRVEDEMSLHFADGAVDFEPLRVSDFWVAPAVFGETAAFTTTVMHYPRHIQRNVPATVWSQRVPVTRRGRLRAVRFPDNLAVHILAMTLCEA-----------
231
+ >UniRef100_UPI001E6135CD_1617086/ 146 0.480 3.034E-35 44 218 233 18 192 200
232
+ --------------------------------------------PVDISGHRNNTAISAATETKAGAFNVWGNSFAAEYLPAGGSLVHVDGVPFEFPPVCDGPDNIRAAGQFIGVTPGRYDWIHVLAASERRCEDTIELSFADGSVDAEPLRISDFWAAPAWFGEVKAFESLVMHYPHHVQRGVPAMMWAQRVPVTRRAELTGILMPRNVALHVFAVTL--------------
233
+ >UniRef100_A0A9E3BI44_2740538/ 146 0.268 3.034E-35 31 217 233 734 926 930
234
+ -------------------------------PSFPPAATGGNS--------FNNEGTSNDSNTAVGNFDGGGNSYSNNALSaagfASGSTVMVNGISFQWPTvAAGSDDNWQVAGQGIPVNgrSGATTLAFLGAATSGPSSGTIMVTYTDGSTQTFTLAFSDWTlnggRSTILSGDSIAVQMSYRNTPTGQQTNHPTYVFLTSVTLAAGKTVKSINLPssvSQGTMHVFAIS---------------
235
+ >UniRef100_A0A941IQY1_1508375/ 146 0.278 3.034E-35 33 215 233 1314 1504 1510
236
+ ---------------------------------LSGAATQVPYS--SLAAGFNNVSITDDSDHSptgfDGGLDGGGNSFSAEALAAagltPGTDFTFDGVVFTWPNsAAGTPDNIEADGRAFDVTGTGSTLGFLGAAANGASSGTVTVTYTDGTTQQFTIGFGDWASTTPYAGGQVAVTSAYGNTSSGTS-PWKASVFYDSVTLPAGKTVQSVSLPTAGSapLHVFA-----------------
237
+ >UniRef100_UPI0018E54A72_714197/ 146 0.443 4.147E-35 45 218 233 1 176 190
238
+ ---------------------------------------------VDLSRYCNNRGIQPPGSSGEYGFNIWGNTFPAEELPEPGTRVGVCGVPFEFPvGAAPSGDNVRCRGQLVEVPPGDYDWVYLLGAAERRTEDHLGLVYADGTELAEHLRMSDFWPETGSrFGEPLAFRTTALRYPRHTHTPHEPSIWQQRSPVSVRGRLRALRLPDNPAMHVFAVTL--------------
239
+ >UniRef100_A0A1R0KG38_76021/ 146 0.390 4.147E-35 40 224 233 3 189 192
240
+ ----------------------------------------AGMRSLSLLEHVNNTGLSAVDDLAAAGFNIWGNSFPAADLPAPGATSVVGGVSFRFPeRAPDGRDNVRCRGQRIAVPEGRWDWVHVLGAAERRTEDPLLLRYADGAVRAQWLRMSDfWPETEPRFGELLAYRCAAMHYPRHVQRTTAPAIWAHRVPLTVPDGVVALELPDNPALHLFAITLQAGERV--------
241
+ >UniRef100_A0A1K1S937_546364/ 146 0.289 4.147E-35 40 227 233 46 234 781
242
+ ----------------------------------------PPPVPVALDTWFTNDGIDSASATG-GDFDGSGYTFPAEQLPV-GRTATVGGVPFRLGSaAAGAKNNIAATGQTIDLPKGRYFVAYFLVAASYGtAGGTATVHYADGSTSTGSLSGPDWYTGTG------ALVSPFRYAPGGVVDGNPVSLATGQVWIDPAREAVALTLPTTANpapnvasLHVFALTLQPVAVGRAA-----
243
+ >UniRef100_UPI001AE5AA53_130796/ 146 0.301 4.147E-35 39 220 233 768 950 953
244
+ ---------------------------------------GGDTVRVDLEPHHTNDAITNEFYLGDGDFDGTGRTYPSGQLPQNGA-LTNDGIPFTFTNGhEGTRNNVLAAGQTIPLPEGAYTRLHVLGASDNGNTDaQATLHYTDGTAATVRLALTDWL-TSAAFGESEAVRTNQIHDRTGPR-PRRASVFHQVLPVDPARRLRALTLPttTRPRAHVFALTLEK------------
245
+ >UniRef100_A0A7I8EN04_2717365/ 146 0.263 4.147E-35 43 227 233 43 230 968
246
+ -------------------------------------------TPVSLSSLFNNKGVG--GAPGQANFDGSGYAYPANQLPGAGQQ-TLNGIPYLFPNYsAGANDNVVALGQTVSLSPGeqqQYQQISLLaAASYGPSGGTLTIHYTDGSSSSVSLTVSDWYTPTAG-----LVSTTYRYTPTGTEQ-HAVHIYALSALVDATRTVASLILPNtaqpaagQASLHIFALTLLPSTQVPVP-----
247
+ >UniRef100_UPI002020D29A_211113/ 146 0.240 4.147E-35 19 214 233 835 1059 1065
248
+ -------------------SVAVRVEPGTPeatyqvVVQFTGPQGVLQSAPLqilvaqpgSLRSVYNNVGISPDNAMASANFDNVGFSYSANALfnagLKPGGTVTVDGLTHTWPvTAVGEPDNVIAGGQTVNVPaAAGATKLAFLGSAANGtASGTLTITYTDGSTQQAQVGFSDWTLGgggqQPSFGNRKAVTSSYRNSVNGSSQGLNTYVFATEpVALDPSKQVKGVTLPSSvsgGTLHVF------------------
249
+ >UniRef100_A0A495L449_2183912/ 145 0.437 5.669E-35 42 217 233 17 192 209
250
+ ------------------------------------------WAPVDLRAHVDNRGISPADESAAGAFNIWGNSFPSEHLPRAGSRVEVEGVPFEFPVSSDNGDNVQCAGQLVSVPVERYDWIYVLAAAERRAEDEAALHFTDGWVDFEPLRVSDFWAAPSVFGEVSAYRTPVMHYPHHVQPGVQAALWCQRLPVVRRAALTSIRLPRNNAVHVFAMT---------------
251
+ >UniRef100_UPI001C636FC7_1291556/ 145 0.410 7.748E-35 55 220 233 1 167 172
252
+ -------------------------------------------------------GIEFTRRPGDGAFNIWRNTFPAEDLPR-GEIVDVGGVPFCFPAADGRhPDNLRCRGQRIELPVGRVDWLCFLAAAERRTEDTLSVHYADGATRAQWLRVSDfWPETPPRFGDVLAFRTPYLLYQRHAQSGMAPAIWRQRVPVTIPGDVAAVTLPENPAMHIFALTLLT------------
253
+ >UniRef100_UPI00099B8A4C_89050/ 145 0.275 7.748E-35 32 224 233 836 1038 1167
254
+ --------------------------------WASGASAAPPSYA--GSPTYTDVGVSDDASPAAADFDGVGNSYSAQALAAagasPGAKITAGGTTFTWPtPAAGQPDNYEANGQSVAVSGSG--SIAFLGAANHGPStGTATVHFTDGTTQSVQLTFSDWTLGggkaTPAAGDTTALTTSYRNKTGASSEQVATYVFAtAPVALPSGKTADSVRLPSDtsaGGLHVFAIGFGGAAAA--------
255
+ >UniRef100_A0A6H9XE76_1873/ 145 0.427 1.059E-34 33 218 233 0 185 195
256
+ ---------------------------------MPTSATIAAHV-LDLASHFNNVGASRAEDTSAGRFNVWGNSFAAEHLPSAGSQVVVDGVPFQLPPlGTGTPDNVRCDGQFVRVAPGRYDWLYLLASAERRVEDEMALHFAHGAVDFEPLRVSDFWAAPATFGETRAFESPLMHYPHHVQFGVPAAMWCQRVPVVRRADLTAVRLPHNIAVHVFAATL--------------
257
+ >UniRef100_K0JYB8_1179773/ 145 0.428 1.059E-34 37 224 233 6 193 196
258
+ -------------------------------------TGHDVFCSVDIAGHRNNVGITSVGGTGVGGLNVWRNSLPAEQMPC-GEVVVLDGVPFEFPTAgSGEPDNVRGDGQLLELPAGYYDWVHLLACGERRVEDELALYFTDGSIDFEHVRVSDFWAAEPVFGETSALRSTVMHYPQHVQPRVPGIIWAQRVPVPRRAELKAVRLPRNVALHVFALTAQRATGA--------
259
+ >UniRef100_A0A8J3IWF3_2778369/ 145 0.229 1.059E-34 47 217 233 916 1098 1101
260
+ -----------------------------------------------LLSYYNNIGISNDSSANTADFDGDGYSYSAQQLTsagfTPGATVTVSGIAYTWPNVqPGVYDNIEVNGQTIQTPnaPANAAHLSFLGSATNGDtQGTVTITYTDGSTQTAQLGFSDWTLGagaePVAYNNVVAAKTSYRN----PNDKVTTYVFASeSIALSSGKTVASITLPNPMNMgalHIFTFT---------------
261
+ >UniRef100_A0A1V2RDC8_1857892/ 144 0.434 1.448E-34 39 221 233 1 183 192
262
+ ---------------------------------------AAGFTCVDLSAWYNNVGLTRPDNTSAGAFNVWRNSLPAHELPC-GQRVTVEGVPFVLPQADGdRCDNVRCDGQVAPVPCGAYDWVYLLTTAERRVEDEMALHFADGAVDFEPLRVSDFWVAPAVFGETAAFTTTVMHYPHHIQRNVPATVWSQRVPVTRRGRLRAVRFPDNLAVHILAMTLCGA-----------
263
+ >UniRef100_A0A918GZH9_1951/ 144 0.457 1.448E-34 44 218 233 15 189 195
264
+ --------------------------------------------PVDISDHRNNTAISAATETRAGAFNVWRNSFAAEYLPAGQSLVHVDGVPFEFPPVCDGPDNVRCAGQFIRVPVGRYDWIHVLAASERRCEDTVELNFADSSVDAESLRISDFWAAPAWFGEVRAFESLVMHYPHHVQRGIPAVMWAQRVPVTRRADLTGILLPRNVAVHVFAVTL--------------
265
+ >UniRef100_A0A918U5A5_68238/ 144 0.295 1.448E-34 44 219 233 48 225 603
266
+ --------------------------------------------PVPLDRLFDNRAVSDDARPGAADFDGSGASLSAQDLAAagwtPGRSLTVQGARLTWPrPAAGRPDNVRADGQTVRV-RGRGDALAFLAAGTGGEdlTGTGTVRYADGSRSAYRLDVGDWRTGPLA---TKAVALPHVNTP-GGRLAERARLYVVTVPLARGREVASVHLPREPDLHVFALSVR-------------
267
+ >UniRef100_A0A4R2IR67_1213861/ 144 0.266 1.448E-34 36 225 233 31 220 766
268
+ ------------------------------------TAAAPTPVTVSLTQYFDNNGIDSASAR-DGNFDGSGYTYPAEALPTGG--ITVDGVPFTFPSaAKGVKNNIVAMGQTLDIPRGKYHAGHFLVAgSYGMASGTATVHYADGGTTTASLAGPDWYNSSGP------ISASYRYAPNNGTDQHPVAIAAAQIWLDAGREAVSVTLPVtqpaqagQTSLHVFAMTVQAAAKGR-------
269
+ >UniRef100_UPI001FE67354_2698649/ 144 0.245 1.448E-34 47 214 233 887 1069 1075
270
+ -----------------------------------------------LRAAYNNVGISPDNAMSVANFDNVGFSYSANALSnaglKPGGTVTVDGLPHTWPLTEvGEPDNVIASGQTVNVQaAAGATKLAFLGSAANGtASGTLTITYTDGSTQQAQIGFSDWTLGgggqQPSFGNRIAVTSSYRNSVNGSSQGLNTYVFATEpIALDPSKQVRSVTLPSSvsgGTLHVF------------------
271
+ >UniRef100_UPI002092D67B_79912/ 144 0.259 1.979E-34 47 215 233 906 1090 1096
272
+ -----------------------------------------------IEWYQNNAGISDDGKAGTANFDGGGWSYSAQALAAaglkPGQPVAWKGFTFTWPNrTPGQLDNVQANGQVVDLPaaPKGASQLAFLGAAGNGDaSSTVKITYTDGSTATAKLELSDWALGadayPPKFGNEIVAKTPYRNTSDGGTQKLNIYVFAATpIALDASKQVQSVTLntPSSgGSLHVFA-----------------
273
+ >UniRef100_A0A9E9MNL7_2944128/ 144 0.265 1.979E-34 47 222 233 934 1125 1423
274
+ -----------------------------------------------LLAAFDNRGISDDTAVAAGNFDGGGRSYSAQALAAagltAGQPVTAGGISYGWPlPAPGYPDNAIAAGQAITLdaPSGTAQVGVLGAASNGPSQGIATLTYADGSTDRYWLGLSDWTlnggSAHPSYGNLVAARTTYRNCAgcSGGRDSVDTDIFATSLPADPAKTLTSLTLPsgtTRGQLHVFAIGTSTSA----------
275
+ >UniRef100_A0A6B2VD27_2706027/ 143 0.471 2.704E-34 42 219 233 6 183 195
276
+ ------------------------------------------FLPVRLDKHWNNRAVSAVGETGTGRFNVWRNSFPAEHLPAPGGELVVGDVPYRFPQTTADGDNIRCAGQYLPLPEGRYDWIHLLASGERRVESELALHFADGEVDFEAVRVSDFWAAPARFGEREAARTPVMHYPHHVQQGVAAVLWSQRVPVTRIATLHGVRLPRHIALHVLALTLE-------------
277
+ >UniRef100_A0A3R9DPL9_2267691/ 143 0.289 2.704E-34 40 227 233 48 236 783
278
+ ----------------------------------------PPPVPVTLDAWFTNDGIDSASATG-GDFDGSGYTFPAEHLPA-GQTTTVGGVPFKLGSaAAGAKNNIAATGQRVDLPKGRYFVAYFLVAGSYGtAGGTATVHYADGTTSTGSLSGPDWYTGTG------ALVSPFRYAPGGVVDNNPVSLATGQVWVDPAREAVAVTLPTTANpapnvasLHVFALTLQPVAVGRSA-----
279
+ >UniRef100_UPI0021528536_2945988/ 143 0.284 2.704E-34 26 227 233 40 236 783
280
+ --------------------------PDTPIVPPP------PPVPVALDTWFSNDGIDSASATG-GDFDGSGYTFPAEHLPA-GRTTNVSGVPFTLGSaAAGAKNNIAATGQTIDLPKGRYFVAYFLVAASYGtTGGTATVHYADGSTSTGSLSGPDWYTGTG------ALVSPFRYAPGGVVDNNPVSLATGQVWVDPARDAVALTLPTTANpapnvasLHVFALTLQPVAVGRSA-----
281
+ >UniRef100_UPI00210A4B97_68230/ 143 0.387 3.696E-34 43 223 233 17 198 199
282
+ -------------------------------------------HPVAIEDFLDSVGVEPNSRSGLGAFNIWGNAFPAEALPC-GTLTELDRVPFRFPPADGvRPDHLRCRGQRIPLPAGRADWIHVLGAAERRTEDDVLLEYADGSARRQWLRLSDfWPQTGPRFGERLAFRTGSMLYPRHEQLTMSPSIWHQRIPVAVSDGLRAIVLPHNPAMHLFALTLVEAAA---------
283
+ >UniRef100_UPI001FB83ACA_1213861/ 143 0.250 3.696E-34 23 217 233 831 1054 1057
284
+ -----------------------RAEPDTPeatyqvAVTFSGPGGSLGSATLqilvappgSLRASFNNIGISPDNAMSVANFDNVGFSYSANALAnvgvKPGAAITVDGLAHTWPiTGLGEPDNVVAAGQTIVVTaPAGATKLALLGSAANGSaSGTLTVTYTDGTTQQAAVGFSDWTLGgggqTPSFGNRIAAASQYRNSVNGNTQGITTYVFAtAPIGLDATKQIRSVTLPgsvSGGSLHVFALT---------------
285
+ >UniRef100_A0A399GYA9_2259644/ 143 0.377 5.051E-34 38 223 233 12 198 199
286
+ --------------------------------------TVGAAHPVAIEDYLDSVGVEPNSRSGRGAFNIWGNAFPAEELPC-GNLTELDRVPFRFPAADGkRPDHLRCRGQRIPLPAVRADWIHVLGAAERRTEDSVLVEYADGTVRRQWLRLSDfWPQTGPRFGERLAFRTTAMLYPRHEQNTMSPSIWHQRVPLAVADGLSALVLPHNPAMHLFALTLVTEAA---------
287
+ >UniRef100_UPI0015643D0C_2698649/ 143 0.455 5.051E-34 39 217 233 18 196 206
288
+ ---------------------------------------GSRFHLVDLDLHRDNVGVTSSSATGYGAFNVWGNSLPAEELP-SGRAIQVEDVPFTFPaTGGGRPDNVRCAGQHITVEPGQYDWLYLLTAAERRVEDEIAFHYADGAVDFEPLRVSDFWAAPAVFGETSAFTTLTMHYPYHVQANVSAMVWCQRVPLVRGAALAAMRLPRNPAVHIFAAT---------------
289
+ >UniRef100_UPI00234B0C2D_2944250/ 142 0.465 6.903E-34 45 218 233 0 173 179
290
+ ---------------------------------------------MDISGHRNNAAVSAATETKAGEFNVWGNSFSADYLPAGKSLVHVDGVPFEFPPVCEGPDNVRCAGQFIRVPEGRYDWIHVLAASERRCEDTVELSFADGFVDAEALRVSDFWAAPAWFGEVKAFESLVMHYPHHVQRGIPAVMWAQRVPVTRRAGLTGILLPRNVAIHLFAVTL--------------
291
+ >UniRef100_UPI000A54E855_1710355/ 142 0.464 6.903E-34 45 222 233 3 182 196
292
+ ---------------------------------------------IDLSPLLDNVGTSTTADTSAGRFNVWGNSFAAEDLPAPGSVVMVDAVPFTIPPvGTGAPDNVRCAGQYLALPGGPatADWIHVLAAGERRVEDEIALHFADGSVDFEPVRVSDFWAAPPAFGETRAFAT-LMNYPIHTQFGVPASVWSQRVPVTRRAPVVGLGLPRNLALHIFAVTLQPVA----------
293
+ >UniRef100_UPI001CFA9BF6_146923/ 142 0.387 6.903E-34 43 223 233 17 198 199
294
+ -------------------------------------------HPVAIEDFLDSVGVEPHSRTGRGAFNIWGNAFPAEELPC-GTLIEHDRVPFRFPPADGEwPDHLRCRGQRIPLPAARADWIHVLGAAERRTEDSVLLEYADGSVRRQWLRLSDfWPQTGPRFGERLAFRTSAMLYPRHEQRTMSPSVWHQRVPVAVPDGLRALVLPHNPAMHLFALTLVEEQA---------
295
+ >UniRef100_UPI0022AC71CD_47760/ 142 0.284 6.903E-34 31 219 233 18 209 585
296
+ -------------------------------VTVPAAAApGAEDRPLPLERLFDNTAVSDDTRPGEADFDGSGGSLSAQDLTAagwtPGRSLTVQGARLTWPrRVPGRPDNVRADGQSVRV-RGHGDALAFLVAGTGGGdeSGTGTVRYRDGSRSSYRLTASDWRGGPLA---TKAVALPHVNTP-GGQLTEKARLYVVTVPLVRGRSVDSVELPHNSDLHVFALSVR-------------
297
+ >UniRef100_UPI0007C6D0A7_105422/ 142 0.283 6.903E-34 47 216 233 1003 1180 1184
298
+ -----------------------------------------------LAAAFDNVGVTDDSATAAGNLDGSGSSYSAQALAAagvtPGTALSHGGVSFSWPGaASGRPDNVVAQGQPIDLT-GSGTTLGFLAAGTSGAGaGTGEIVYTDGSTQSYTVTVPDWYATPPA-GSDVAIATAYRNRAGNVQQVHATNVYYTGVPLTAGKTVAMVVLpaptPASGALHVFAL----------------
299
+ >UniRef100_A0A5J4L056_2607529/ 142 0.254 9.435E-34 31 217 233 145 346 349
300
+ -------------------------------VKLPISAlTVVVAKPGNLLGLFNNAGISND-GQGNADFDGDGYSYSAQQLAAsgykPGATVTVNGTQYLWPNvAPATFDNVQVAGQTIQTPdakAGATHLTFLGSATNGPSSGNVTITYTDGSTQTAQLGFSDWTlgagNSQPSYGNVVAVKTSYRNAGSG-QDQVGTYVFAsAPIALNASKQVASITFPssvDQGALHIFALT---------------
301
+ >UniRef100_A0A5P9PZG3_2653857/ 142 0.291 9.435E-34 34 221 233 769 953 954
302
+ ----------------------------------PGGAS----VPVDLTAFLTNDAITSEFYLGDGDFDGAGNTYPAAQLPQTG-KVVDDGIEFVFVNGhEGTPNNVVPGGGPIALPEGAYGVLHLLGASDNGNtNSTMTITYTDGSTAELPLKLTDW-RASAAFGESEAITTSQLHAKDGPK-PVKLAIFHQAIELDPARSPKSLTLPATakPRPHLFAITLEKG-----------
303
+ >UniRef100_A0A7K2QB93_2690325/ 141 0.385 1.289E-33 43 227 233 5 191 193
304
+ -------------------------------------------TVVNLSAHFNNRGIQPPETSGDYGFNIWRNTFPAEELPEPGSLVDLVGAVFEFPaRESAAGDNVRCRGQLVELPVGSWDWIGLVGAAERRTEDEVELHYADGTVGREWLRMSDfWPQTGAYFGEPRAFATGSMRYPRHTHLHHAPSIWQQRVPVRVPAPLAAVRLPDNPAMHVFAMTVTADEESRLA-----
305
+ >UniRef100_UPI001E589C24_1915/ 141 0.271 1.289E-33 36 225 233 45 234 785
306
+ ------------------------------------WVSVPDPVPVPLDALFDNDGIDTATARG-GDFDGSGYTFPGEELPA--GRIDVDGISFLFPsSAAGAGNNVVALGQRVDLPKGRYmSALFLTAGSYGDASGTATVHYADGSSTTAALGGADWYAAGGP------LSAAYRYGPDGTKDEHSVGIGVSEVWTDPRREAVALTLPKTnpvevgkTSLHVFALSLQPAAQGR-------
307
+ >UniRef100_UPI00167AA00B_33897/ 141 0.259 1.289E-33 33 220 233 770 960 961
308
+ ---------------------------------LPEPAQSGTVTRIDLTGFFDNDGITTEMYYGDGDFDGSGRTYPMAQLPQTGET-TDNGIAFLFTNgSEGTTNNVVVAGQEITVPRGAYAKLHVLGAGDTAAvTVPAVVRYADGTSETARLTLTNWLASGPENGETEAVTTSQIHTPTGPV-ATKAAVFHQVIELDASRQLTTITLtaPDgTARAHVFALSLEQ------------
309
+ >UniRef100_A0A1H5IBI8_1855349/ 141 0.259 1.289E-33 47 216 233 906 1086 1090
310
+ -----------------------------------------------LLAAYSNTGISDDSgDHDEADFDGGGWSYSRQALAAagltPGTEATVDGLAYTWPNAPaGRPDNATATAQTIELPTPAATLSFLGSATNGNQSAPATVTYTDGTTGTLDLSFTDWTigggGGTVQYGNTIVARTAYRNVSGADKDPVATYVFATRpFTAPAGKTVRSVRLPENSDLHVFAL----------------
311
+ >UniRef100_UPI001FFEE4A7_2620835/ 141 0.276 1.762E-33 15 220 233 754 956 962
312
+ ---------------IAPVKVTTPAPPWGLPPLPPG----GDMVTVDLEPFYTNDGITNEFYLGDGDFDGTGRTYPSGALPQNG-SLTNDGVPFRFTNGhEGTRNNIIAAGQTVELPEGNYRRLHILGASDNGNTDsTVTLHYTDGTATPVKFALTDWL-ASAAFGESEALRTNQIHTRTG-PAPLRAAVFHQVLAADSTRRLRAITLSANakPRSHVFAVTVEK------------
313
+ >UniRef100_A0A919K8V0_1050105/ 140 0.292 3.291E-33 33 224 233 4 198 752
314
+ ---------------------------------VPGGARAAiftDPVPVPLDDLFDNNGIGVAA--GDANLDGSGHGFPAAGMPT--GSVTVDGVPYRIAatSAAGQNDNVVALGQSVAVPAGQYVAGFLLAASAYGSaGGTVTVHYADGTTSSAEVSAPDWFS-----GDAGAVTAPYRYGPGGT-DQHAVALYAVQVWMDPARTAQSITLPSTaapaegvSSLHVFAFSLQPAVAV--------
315
+ >UniRef100_UPI001A8F7A74_2814588/ 140 0.287 3.291E-33 34 220 233 762 945 946
316
+ ----------------------------------PGGAS----VPVDLTAFLGNDAITSEFYLGDGDFDGAGNTYPAAQLPQTG-KVVDDGIEFTFVNGhEGTPNNVVPGGGPIALPSGAFRALHLLGASDNGNTNTkMTITYTDGSTAELPLRLTDWRN-SAAFGESEAITTSQLHAKDGPK-PVKLAIFHQTIELDPARSPKALTLPaaTRPRPHLFAITLEK------------
317
+ >UniRef100_A0A7C3IRJ5_2026780/ 140 0.248 3.291E-33 41 219 233 54 244 1084
318
+ -----------------------------------------GQVPLKLEALFDNDAIADAQRRSDGNFdcpdhaaDIPGSVFPAENLPATGSKFSFDNVSFLFPSKErGDFNNVSCNGQRIEVPPGRYKALHVVGTSENGSfRDRLSLAYKEG-PAEADLALKDWCQ-KPTEGDRVAFEAPCRYTWSSekrtmVREEVQPRLWLQRIALDPQKTLEAIALPYNRRMHVFAATLE-------------
319
+ >UniRef100_UPI0019308D5B_2282478/ 140 0.250 4.497E-33 52 217 233 916 1093 1096
320
+ ----------------------------------------------------NNVAVTDDANTNLGDFDGGRASLSAQALAsvgvRPGGAVDFSGLRFAWPDaAPGTPDNVVASGQVLKLAGSGGRLGFLVSSTWGPASGTGIVRYTDGTRQPFALSSPDWY-GAPKPGGVAAVVVPYQNRPNNQRQNTPATIYFAEVALQAGKQVRSVELpnvsaaarPNTPALHVFAVT---------------
321
+ >UniRef100_A0A401ZWU8_2014871/ 140 0.252 4.497E-33 31 217 233 752 959 1369
322
+ -------------------------------VTLPATAGPGtmhifavSTKYVPQAAVYNNIGTSDDSAPATGIFDGT-NSYSAQALAAvnitPGATVSYNGVNFIWPNvMVGTPNNYAAKGQMIPVnPVPNATTLAVLGSStDGNSTGTAVITYTDGTTQSFPLGLSDWTLGggysTPAYGNKVVATTAYRNTPSGQQSTDKPTILYADVTLNyPGKTVQSLTLPatmTGGQLHVFAVS---------------
323
+ >UniRef100_A0A5S5C606_582686/ 140 0.276 4.497E-33 42 228 233 2516 2703 2715
324
+ ------------------------------------------YAVVPLEAYMNDQGFGSLA-SGRANLTGMGtYFLPDDAITVEG--FRVDDRPFGIADlIDGQNDNISCHGQIIRLPQGRYDEIALLGCSEWGNYiEPCTAVYKDGTKEVFRLRFTDWT-GTPRFGEQIAWEGRiaERNEIRTFVIDQPGRIFMQTYALDGRKEAATLQLPYSPGMHIFAITLRQSQAALLPD----
325
+ >UniRef100_UPI0021AB612D_2867961/ 139 0.422 6.145E-33 52 223 233 1 175 183
326
+ ----------------------------------------------------DNTGITRPDDLGAGAFNVWSNTFPAQHLPpaDPDGTVDVGGVPFRFPAQLTGPDNLRCAGQRLDAPKGRYDWIYLLAAGERRTEDVLHLHFADGTTDPEWLRVPDfWPQTPPHFGFTTGITFPVMHYPRHIQRDMSPSVWRVRVPVPREAELTGIHLPDNPAIHVFALTLCGPAA---------
327
+ >UniRef100_UPI001C9D367C_1827978/ 139 0.403 6.145E-33 43 227 233 5 195 197
328
+ -------------------------------------------TVIDLSAHYDNRGIQPPDEPGEFGFNIWRNTFPAEELPEPGSLVEVAGTAFVFPPrPTAGGDNIRCRGQLVELPdgaaGGRYDWIGLVGAAERRTEDEVELHYRDGSVSRAWLRMSDfWPQTAAYFDEPLAFRTASMRYPRHTHRHHAPALWQQRIAVVRPEPLAAVRLPDNPAMHVFAMTAVVDEESRLA-----
329
+ >UniRef100_UPI00099FEB85_1841249/ 139 0.280 6.145E-33 33 222 233 765 957 958
330
+ ---------------------------------LPELPQTGQGTQVDLVGVFDNDAITTEMYYGDGDFDGTGRTYPMVQLPQTGQT-EDDGITFAFTNgSEGSNNNVIAAGQKIDVPAGGYANLHVLGAGDSGNVTiPAVATYADGSTAKADIRLTAWLSG-PAYGETEAVRTSQIHARSGPV-GTKAAIFHQKVALDPAKELSSITLtaPASGtaRAHVFALTLEKKS----------
331
+ >UniRef100_A0A401ZJF4_1936993/ 139 0.245 6.145E-33 46 216 233 922 1106 1110
332
+ ----------------------------------------------DLLGLFNNAGISND-GQGNADFDGDGYSYSEQQLTaagyAPGATVTVNGIQYSWPNVPaAAYDNVQVAGQTIQTPDAKagATQLTFLGSATNGPSvGNITITYTDGSTQTAQLGFSDWTLGagtsQPSYGNVVAVKTPYRNAGAGT-DNVGTYIFAsAPIALNTSKQVASITFPsslNQGALHVFAL----------------
333
+ >UniRef100_UPI00048B7B40_311244/ 139 0.270 8.397E-33 41 220 233 1 180 183
334
+ -----------------------------------------DYYKINLNSYYNNNAFTYEESSSKGDLTSFGSSYPAEYLPNEHE-ITVNGIPFIFPTKENKFNNIELENQSIIVPDDSYSTIYILGSSENGSYkEYMELRQNNDLKSRNIFALTDWISNYPMFMEQVAFECGYVHTKDSSIHSLKPKIWIQTLHLDPATEFNTIRLPDNPCIHIFSLTLQK------------
335
+ >UniRef100_A0A229H6X1_1945643/ 139 0.271 8.397E-33 31 220 233 762 950 953
336
+ -------------------------------PALPETGAA---TPVDLAGHFDNDAITTEMYYGDGDFDGTGRTYPMAQLPQTGQT-EDDGITFSFTNgSEGSNNNVIAAGQKISVPAGGYARLHVLGSGDTADvTVPAELGYADGSTAKVDVKLTAWLSG-PKYGETEAVRTSQIHTRTG-PLGTKAAIFHQKVALDPAKELATVTLgaPSgTARAHVFALTLEK------------
337
+ >UniRef100_A0A4D4LI39_68280/ 139 0.269 8.397E-33 33 220 233 763 952 955
338
+ ---------------------------------LPELPQAGAVTQVDLTGHFDNDAITTEMYYGDGDFDGTGRTYPMAQLPQTGQT-EDDGITFAFTNgSEGSNNNVIAAGQKVTVTAGGYTKLHVLGAGDTGNvSVPAEATYADGSTGKLTIQLTAWMSG-PAYGETEAVRTSQIHTRTG-PLGTKAAIFHQVVELDPGKELSAITLgaPSgTARAHIFALSLEK------------
339
+ >UniRef100_UPI0016620FE8_1896314/ 139 0.259 8.397E-33 48 217 233 961 1136 1139
340
+ ------------------------------------------------AAAYNNKGISADSDPGTGTLDPAGYSFSATQLAAvgytPGATVTAGGLPYTWPDTrPGQPDNVAAAGQVIRV-QGRGARLGLLGTGiSSTHAGTVTVTYTDGTSTDLAVVLPDWYSNAASGNSQLAVTTANWNRPPGDTLGdHAVSLYTTGGALDPAKTVATVTLPNDSGFHVFALS---------------
341
+ >UniRef100_A0A919QKH3_1070424/ 139 0.459 1.147E-32 38 221 233 10 193 209
342
+ --------------------------------------TAEGHHLLDLAGHRNNVGATAPDHLGAGAFNIWGNSFPAGELP-SGRPVLVDGVPFDLPvCGAAAPDNVRADGQFIEVAPARYDWLYVLAAAERRVEDEIAFHFTDGSVDFEPLRLSDFWAAPAVFGESAAVVTEVMHYPLHVQADVPAMLWCQRVPVTRRAVLRAARLPRNPAVHVFAATLREA-----------
343
+ >UniRef100_A0A5J4KKR6_2607529/ 139 0.250 1.147E-32 32 217 233 166 359 362
344
+ --------------------------------TLP-KTTGGQMHIFDFSyrvGPFNNIGGTIDEDLQQiQNFDGQHNGYSYDALSAaglPKGSVTINGVNFNWrSAADGNADNYQASGQVVPVtPVAQAKTLAFLGASTGGaASGTATITYTDGSQQTFTLGLTDWCAPTVAYNNRVAAAATYRRTPHG-NQTIKTSVYYTDVALQSGKTIKSVTLPTNGQIHIFDIS---------------
345
+ >UniRef100_E2PX58_1901/ 139 0.288 1.147E-32 33 221 233 751 941 944
346
+ ---------------------------------LPPLGPTGGVTPVDLAAHFDNDAITTEMFFGDGDFDGTGRTYPMAQLPQTGRT-TDDQITFSFANgSEGSKNNVVAAGQRVAVEPGSYARLHVLGSGDTGNvTVPAVLSYADGTAATVPVRLTGWLSG-PAYGETEAVRTSQIHTRGGPV-GTKSAIFHQRVPVDPGRRLVSVTLGRpsgTARAHVFALSLEAA-----------
347
+ >UniRef100_UPI001C0E2853_2842201/ 139 0.268 1.147E-32 6 220 233 745 952 955
348
+ ------PDFR-----IAPVQVTTPAEPWG-LPELPQAGAVTQ---VDLTGHFDNDGITTEMYYGDGDFDGTGRTYPMAQLPQTGQT-EDDGITFAFTNgSEGSDNNVIAAGQKVAVPAGGYTKLHVLGAGDTGNvSVPAVATYADGSTGPLTIQLTGWMSG-PAYGETEAVRTSQIHTRTG-PLGTKAAIFHQVVEVDPAKELSAITLtaPTgTARAHVFALSLEK------------
349
+ >UniRef100_D2PSN6_479435/ 138 0.394 1.568E-32 12 218 233 11 216 225
350
+ ------------MVAVRPYSGPVTVGPDS-VTEPPPESPHVGCRPVDLAPHRNNVGSTPASGTRNGAFNIWGNSFPAEELPPPG-LCAVDQVVYDFPPTePGTADNVRAAGQFVEVPAGRYDWLYVLGAAERRVEDELAFHFADGAVDFEQLRLSDFWAAPAWFGETQVRATRSMHYPFHVQAGVPAMLWSQRVPVTRRSVLSAVRLPRNPAVHLFAATL--------------
351
+ >UniRef100_A0A1H6EW60_1144553/ 138 0.264 1.568E-32 52 215 233 201 378 384
352
+ ----------------------------------------------------NNAGVGDDTEPGQADFDNGGWSYSAQALAAgarPGGTVTWKDHTFTWPNrKPGEWDNVQAAGQTVDLTaPAGAGTLALLGAGASGDIETgVTITYTDGSTQETKVGFSDWALArdayPPRFGNEIVLRTPYRLDGGGGRQDINVYVFAVTpIRLDPAKQVKSLTLarPAGAAtAHIFA-----------------
353
+ >UniRef100_UPI00143AD3F7_2720023/ 138 0.262 1.568E-32 52 215 233 905 1083 1089
354
+ ----------------------------------------------------NNAGIGDDDEAGLANFDGVGWSYSAQALAAagarPGGTVTWKGYDFTWPDrEPGEWDNVQASGQTVDLaAPAGAKTLALLGAGASGDiQTDVTITYTDGSTQQVKVGFSDWALArdayPPRFGNEIVLRTPYRLDGGGGRQDINVYVFAVTpIELDASKQVKSLTLakPTGAAtAHVFA-----------------
355
+ >UniRef100_A0A2T5BN31_2135609/ 138 0.460 2.142E-32 45 219 233 17 190 194
356
+ ---------------------------------------------VDLTPYRNNIALTTDKTLHRGMLNVWGNSLPAEVLPHDG--LTVGGIPFRgVPGGGAEPDNVRCAGQYLELPRTTADWLHLLATSERRCEETAFVHYASGAADPEWVRVSDFLPARAHFGEVLAARSDALHYPHHRQENLGGRLWAVRIPVTRREPVCGLRLPDNPALHVFALSLE-------------
357
+ >UniRef100_UPI001915634B_2778368/ 137 0.250 5.464E-32 31 216 233 907 1107 1111
358
+ -------------------------------VKLPISAlTVVVAKPGNLLGLFNNAGISND-GQGNADFDGDGYSYSAQQLNAagytPGATVTVNGIQYTWPNVAvATFDNVQVAGQTIQTPDakvGATHLTFLGSATNGPSSGNVTITYTDGSTQTAQLGFSDWTlgagNSQPSYGNVVAVKTPYRN-AGPGRDQVATYVFAsAPISLNASKQVASITFPasvDQGALHVFAL----------------
359
+ >UniRef100_A0A8J3HZW2_2778364/ 137 0.241 5.464E-32 51 222 233 775 958 1149
360
+ ---------------------------------------------------YNNVATSDDGSPAGGSFDGI-NSYSSQATQSlgliPGASVNVYSTTFIWPAVnPSYKNNYVAQGQVLPIkPVNNAGTLAFLGsSSYGPSSGTLTVAYTDGSTETFTLGFSDWTlaggKASPSFNNLVAMSMPYRNTPHG-KQNINTYIFYSEIPLAVDKTVQSITLPsdvKNGQLHVFAISTRTGS----------
361
+ >UniRef100_UPI001EF7C8F6_2807632/ 136 0.486 1.393E-31 38 218 233 15 194 208
362
+ --------------------------------------GSPLFRTVDLTGHLNNKAVTTESRKGEGRLNVWRNTIPAQHFSAAGSRVEVDGIPYLLAP-ESAYDNVRCAGQYVTVPEGRYDWIRILATGERRAETEIAVHFADGHIDFEALRVSDFWHAPPRFGESVAYRTPVMHYPHHVQERVNAGVFSTRVPVSRQAAVCALRLPRHVGVHVFALTL--------------
363
+ >UniRef100_A0A938MH69_2026780/ 136 0.231 1.393E-31 40 221 233 21 217 906
364
+ ----------------------------------------PQQLPLNLVELFDNDGISSEKNRKDGNFDcpdhpahVPGSTYPAEFLPESGAIFTappLPDVSFLFPNkADGEKNNFSCAGHKFEPPVNAYAALYILGTAENGKQeGEIGLNFEDG-QLNLPLKFSDWCEP-AQFGEVEAIAVPFRYswqerGGRMEKEDITCRLWVQKIPLPEKRRLESFVLPYNARMHVFAITLVAA-----------
365
+ >UniRef100_A0A8J3HZW2_2778364/ 136 0.232 1.393E-31 51 215 233 965 1144 1149
366
+ ---------------------------------------------------YNNMGTSNDNTPGSGNFDGGHMSYSAQALQskgyKPGSVVVFNGTSFVWPaSTPGTVDNYVAQGQVIPVNSVyNANYIAFLGSASHGPlQGNVLVTYTDGTTETVSLGFSDWTLGagkmKPSYGNKVALTTTYRNGPKG-KQNVATYMFYCEATLgALNKQVQSITLPTLPagpgQLHIFA-----------------
367
+ >UniRef100_A0A1B2HKE2_1586287/ 135 0.257 1.903E-31 52 215 233 910 1080 1085
368
+ ----------------------------------------------------NNVGASDDA-TGDGDFDGGGYSYSRQALAtaglVPGQTGTVDGLTFTWPGSPaGRPDNVTANKQKLTL---SGTKLAFLGSAANGARtRTATVTFTDGTTAPVEIGFSDWTLGgggaAPSYGNVVVANTPYRNQLGGGSEKVATHIFATkTYVAPEGKVLQSVVLPEDVNLHVFA-----------------
369
+ >UniRef100_A0A7G8KM49_2763006/ 135 0.264 2.600E-31 15 223 233 138 363 747
370
+ ---------------VGPTAAPAgRAEPTAAPAVVPpavgegAPATGRAGRPVPLERLFDNVGTSDDADPGAADLDGAGNSLSARDLAAagwtPGRALTLDATRLEWPrSAPGRPDNVRANGQHVAL-SGTGDTLTFLVTGSSpgrvgpGAGGSGTVHYRDGSRSTYTLTAPDWRGGPLA---TKAVALPHHNTPAGQR-REAVRLYAVSVPLARGAAVASLTLPADPgpdaDLHVFALAVRPPAA---------
371
+ >UniRef100_A0A2T0TAZ4_84725/ 135 0.262 2.600E-31 52 216 233 920 1092 1096
372
+ ----------------------------------------------------NNIGASDDAGSEVADFDGGGYSYSRQALAAaglsSGATGTVDGLTFTWPNSPNdRADNVVTNGQSVDVTGTKLS--FIGAAADGGRTAAATVTYTDGTTGTVDLGFSDWTLGgggqNPSYGNVVVAKTPYRNLLGGGNEQVVTNIFATkTFTAPEGKVLKSVQLPTNEGLHVFAI----------------
373
+ >UniRef100_S5TLH7_1366598/ 134 0.453 3.552E-31 41 220 233 38 216 219
374
+ -----------------------------------------EYFTVDLSGHLDNIGLTMPDQLAAGALNVWGNSLPAGALPA--GAVEVGGVPFVTAGGDGsRPDNVRCAGQLLDLPPVAGSWLHVLATSERRCEEELHVHYADGAVDPEWLRVSDFWPAAAHFGEVAAARTGAMHYPHHIQGDLGGQIWATRVPATRGGTLAALRLPDNPALHLFALTVET------------
375
+ >UniRef100_UPI00218069CC_1654476/ 134 0.250 6.629E-31 52 216 233 920 1092 1096
376
+ ----------------------------------------------------NNIGVSDDKGSEEADFDGGGYSYSRQALAAagltSGGTGTVDGLAYTWPNSPeDRADNVIAAGQSIDV---SGTKLSFLGAAAGGARtAAGTVTFTDGTSAPIDLGFSDWTlgggSQNPAYGNVVVAKTPYRNQIGGGSEQVVTNIFATkTYTAPDGKVIKSVKLPTNGGLHVFAI----------------
377
+ >UniRef100_UPI001EF8F640_2911966/ 133 0.269 9.055E-31 43 217 233 5 184 188
378
+ -------------------------------------------VCVDLSKLFNNKGVSWIGLGIAGSFDHSGISVPGEILP-DGITVTWCDIPFQFPKTSSiENDHICCEEQTIEVDPNSYRQIYVAGFSlYGDYSDRVWIHYEDGSSEDKEFGLSDWYSRTEAgnglkHSEQVAMMIPY-YYENGIKIQAMRGIWIQRLLIDPNKKLHSIRLPDNPYMFIFAIT---------------
379
+ >UniRef100_UPI001A9FE9C4_2530381/ 133 0.413 9.055E-31 45 232 233 3 192 221
380
+ ---------------------------------------------IDISPYLDNVGATTAGRTATGRLNVWNNSFAAENLPPGGSNVMVGGVPYVMPPFGGiDPDNIRCAGQYLELpgPGRAVDWLHLLATGERRVEDEIAVHFADGSVDFEPVRVSDFWAAPAAFGEVPAFET-LMHYPAHTQFGVPASVWSQRVPVTRRLPLIGIGLPVNAALHVFAVTLQPPDGTRRPAAVST
381
+ >UniRef100_A0A193C823_31958/ 133 0.259 9.055E-31 21 221 233 734 943 945
382
+ ---------------------PRGKRPDTTIAPVPVSVPAPPWglpalppagdlVPVDLAAVLNSDGVTSEFYLGDGDFDGGGNTYPAAQLPQTGQ-VTDDGVPFLFVNgSEGTPNNVV--GATIPLPAGKYATLHVLGAADtGNAVTTLKVSYVDGS-AEVPLRLTGW-RAAAAFGESEAITTNQLHARAG-VQSVKLAIFHQRVPLDPAREVVSVTLPSaaTPRPHVFAVTLEKG-----------
383
+ >UniRef100_A0A6G3RW39_2706073/ 132 0.402 1.690E-30 47 223 233 21 198 199
384
+ -----------------------------------------------IEDFLDSVGVEPSSRSGSGAFNIWGNAFPAGELPC-GTLTAHDRVPFRFPAADGvRPDHLRCRGQRIALPAGRTDWIHVLGAAERRTEDGVLLEYADGSVRRQWLRLSDfWPETGPRFGERLAYRTTAMLYPRHEQYTMSPSIWHQRVPVAVPDGLTALVLPHNPAMHLFALTLVTEAA---------
385
+ >UniRef100_A0A1B1MEJ8_1915/ 132 0.285 2.308E-30 50 216 233 931 1111 1115
386
+ --------------------------------------------------YLDNNGVSADDNDPSGDFDGGGGSYSAKALAdqklTPGATVDAGGFSFTWPKvGPGAPDNIVVGGgeQVLDVPGGATKLAILGSASNGPSTGTLTLTFTDGSTQQATVGFSDWTLGggaqKPSYGNVVAARTAYRLY-SGSTDDVDTYVFAtAPIAVPAGKRLASVTLPSStsgGRMHVFGL----------------
387
+ >UniRef100_A0A7X3GHH6_2682849/ 132 0.266 3.152E-30 43 222 233 343 522 523
388
+ -------------------------------------------VPIDISGICAIRGI--HNSLGTADVDGDGHSYSREGLPVDG-LLRIGDLSFVFPASQNEAdcDNTACDGQAIPIPPGHYHGISVLASSqYGGSADAFTVEYADGTSEQVQLGFADWWSRFPIAGEKVAWSA-NLNRPSLGKTEEIVHLFANEAPLSRtGSTAVRIVLPNLPNLHVFAVSMWKQA----------
389
+ >UniRef100_A0A7V2U765_2026780/ 132 0.279 3.152E-30 34 218 233 722 916 923
390
+ ----------------------------------PPPAANVECVHLNaLAGHFNNDGISWRANPADGNFDfpsrASGASFIADLLPKKGALLAVpgnEGVTFRFPDKDDRLrNNVLCDGQRLQLarPYGSFEAAWFLGACHDGArSALLTIDYEDGKAQ-GELRLADWLS-RPAAGEIDVLRLSARHAGDGKEEARDCGLVAWRVPLDPSRKLMALTLPRERNMHVFAVTL--------------
391
+ >UniRef100_UPI00224096B5_202862/ 132 0.275 3.152E-30 57 221 233 867 1044 1047
392
+ ---------------------------------------------------------TSTATRPVGDFNGYGGSFVAEELAedglVPGQDVTVDGIGYRWPDaAPGSPDNINAHGQTVTVHAAPGqSRLGLLGAGNEGaATGEVTVHYTDGTTRRASVTLGDWhldGATAPPPGNTAAATMPERQMAGGTPEKMTVYIWSTSVPVDPHRTVASVTLPERttgGQMHVFAVGVGEA-----------
393
+ >UniRef100_UPI00068FB531_1463857/ 132 0.264 3.152E-30 52 215 233 910 1093 1098
394
+ ----------------------------------------------------NSKGVSDDAKP-QGNFDGEGWSYSAQALAAggavPGGTVSAGGFSFTWPDVqPGDPDNIQvtpgqpATGQVVAVqPAAGATKLSLLGsAAEGTAKGTVTLTYTDGTTQQADIGFSDWTlgggSQQPSFGNTVAVKTSYRDTLSSGPDPVGTDVFAtAPIALQAGKQLASVTLPSTVSggvMHVFA-----------------
395
+ >UniRef100_A0A7I8EN04_2717365/ 131 0.262 4.305E-30 46 223 233 230 407 968
396
+ ----------------------------------------------PLPSLLNNQGIGSAA--GQANFDGSGYGYPSDQVPA-GGIIGLAGVSYLFPtHGSATKDNVVALGQSITLPQGHYQQASLLtASSYGPASGTVTVHYTDGSSSTATLNAPDWYN-----GTSDVINTTYRYTPTGT-DQHAVHIYVSQVWIDASRVAASLTLPqtalpaaNTASLHVFALTLQLPSA---------
397
+ >UniRef100_A0A5A5TI23_2014874/ 131 0.229 4.305E-30 49 215 233 103 283 1208
398
+ -------------------------------------------------PAYNNAGSSNDSNPNEGSFDG-NNSYSVQALQGtgllPGQNFTFSNVTFVWPNaAAGTPNNYLVNGQVIPVAPsiaGAANLGFLGASTNGNASGNATVTFTDATTQTFSLGLSDWTlggnpQSTPAFSNQIAATMTYRNTP-VDQQTITTFLFSAQLALPAGKTVQSVTLPtttSSGQMHIFA-----------------
399
+ >UniRef100_A0A3N1SWC4_2485152/ 131 0.418 5.880E-30 45 220 233 17 191 194
400
+ ---------------------------------------------VDLAPHLNNVALTAMQNLGDGRLNVWGNSLQFETIPS--GRLSVDGVPFeTFNEVDSEPDNIRCDAQYLKLPESKADWLHLLTAAERRCEETVHIHYSSGAVDPEWIRVSDFWPARAHFGESLAATSPSMHYPHHRQGNLSGQLWAVRIPVTRREPVRALRLPDNPALHIFALTMET------------
401
+ >UniRef100_UPI0022597B98_2903819/ 131 0.269 5.880E-30 52 215 233 907 1088 1093
402
+ ----------------------------------------------------NSTAISTDDDNPQANFDGEGWSYSAKALAAagatPGGTVSSGGFDFTWPKvAAGDPDNIEVAGdtpQVLNVPSAQgATKLSLLGsAAEGSASGTATLTYTDGTTQKADIGFSDWTLGggadKPSFGNTVAVHTPYRDVQGGGTDPVGTEIFAtAPVTLQAGKQLASVTLPSTTDggvLHVFA-----------------
403
+ >UniRef100_A0A6L5AS91_2575830/ 130 0.243 8.031E-30 42 219 233 2 182 190
404
+ ------------------------------------------YYLVNLEEYYCNCGFSFINQRENGDFTGSGSSYPAEQLPSSEKVIKIHEVPFHFPSKeKGKFNNIEFNEQKIKVKEDSYQAIHVLGAADNGSFlEPISLIDQQGSVSKVMLGFTDWVDYEPKFNDLKAIVCKGVHSAQlGCLEGMQCTIWYQKVKVPTGVLFNEIRLGDNPGMHIFSLTLE-------------
405
+ >UniRef100_UPI000D13FE3E_1463889/ 130 0.241 8.031E-30 52 215 233 907 1088 1093
406
+ ----------------------------------------------------NSTGISTDDDDPRANFDGEGWSYSAKALAAagvtPGGTVSSGGFDFDWPKvGAGDPDNIEVAGagpQVLNVPSGPAdTKLSLLGsAAEGSASGQVTLTYTDGTTQQAEIGFSDWTLGggtqNPSFGNTVAVHTTYRDVQGGGKDPVGTEIFAtAPITLQAGKQLASVTLPSATDggvIHVFA-----------------
407
+ >UniRef100_A0A543N991_405555/ 130 0.437 1.498E-29 45 219 233 17 190 194
408
+ ---------------------------------------------VELQRYKNNIALTTLQTLHLGALNVWGNSLPSETMP--HEQITVDGVPFSVSPATGEaPDNIRCAGQYLELPETAADWLHLLATSERRCEETVHIHYSSGAADRERLRVSDFLPARSHFGELLAARSPAMHYPHHRQDNLSGQIWAVRVPVTRRETLRGFRLPDNPAIHIFALSTE-------------
409
+ >UniRef100_UPI001CB70AD5_1896314/ 130 0.250 1.498E-29 50 215 233 902 1085 1090
410
+ --------------------------------------------------YVNNNGISADDSNPAANFDGEGWSYSAAALAAagatPGGTVSSGGFDFTWPQvAPGAPDNIVVGGgdQVLDVskSAAGATTLSLLGsASEGPTTGTVTLTYTDGTTQQADIGFSDWTlgggSQQPSYGNVVAVHTAYRDVQGGGKDPVGTEIFStAPIALQAGRQLASVTLPSSTNggdMHIFS-----------------
411
+ >UniRef100_UPI002258EC57_2903886/ 130 0.263 1.498E-29 52 215 233 934 1115 1120
412
+ ----------------------------------------------------NNTAISADDDNPQANFDGEGWSYSAKALAAagatPGSTVSANGFDFSWPEvTAGDPDNIEVAGstpQVLTVPPAPgATELSLLGsAAEGSASGTLTLTYTDGTTQQADIGFSDWTlgggSDKPSFGNTIAVHTDYRDVQGGGQDPIGTEVFAtAPIALQAGKQLASVTMPSSTKggvIHVFA-----------------
413
+ >UniRef100_UPI001940CD9B_113564/ 129 0.462 2.045E-29 60 231 233 1 174 176
414
+ ------------------------------------------------------------SNTSSGRFNVWGNSFAAEDLPAGGTGVVVDAVTFTMPPTgTGAPDNVRCAGQYLDLGPQAdlADWLYLLAAGERRVEDEIALHFADGSVDFEPVRVSDFWAAPPAFGETKAFAT-LMHYPIHTQFGVPATIWCQRVPVTRRAPLTGIGLPHNVALHIFAATLQPAAARPAALEVT-
415
+ >UniRef100_A0A2T3VKX0_2135446/ 129 0.416 2.045E-29 36 219 233 3 186 198
416
+ ------------------------------------AAGTGRHRVLDLTDHLDSTGFTRPDATDAGALNIWGNTFATGDLPAGHPHWLVDGVPFRTAEA-GGPDHLRCRGQYLEVPIGRYDWLHLLATAERRTEDPVLLHFADGAVDPEWLRVSDlWPGGQAHFGEAPALRGSRLHYPRHVQADMRPTLWAQRVAVPRRAPLVGLRLPENPALHVFAVTLE-------------
417
+ >UniRef100_A0A4S2RGE1_2563103/ 129 0.241 2.045E-29 52 215 233 907 1088 1093
418
+ ----------------------------------------------------NSTGISADDENPQANFDGEGWSYSAEALAAagaePGGTVSSGGFDFAWPQVhAGDPDNIEVVGsgpQVLNVPSAQGdTKIAFLGsAAEGEASGTVTLTYTDGTTQQAEIGFSDWTLGggtqQPSFGNTVAVHTGYRDVQGGGKDPVGTELFAtAPITLQAGKRLASVTLPSNTEggiIHVFA-----------------
419
+ >UniRef100_UPI001CD27338_2877242/ 128 0.264 5.209E-29 52 215 233 900 1083 1088
420
+ ----------------------------------------------------NSRGISDDTNPT-ANFDGEGWSYSAQALadagAAPGGTVSANGFDFTWPDvKTGDPDNIQvtpgapAPGQAVSVtPVSGATKLSLLGsAAEGSAEGTVTLTYTDGTTQQADVGFSDWTlsggADQPSFGNTVALKTTYRDVQGSTADPVGTDIFAtAPIALQAGKQLASVTLPSAVSggvMHVFA-----------------
421
+ >UniRef100_A0A4U0SM40_2571141/ 128 0.254 7.113E-29 50 216 233 920 1104 1108
422
+ --------------------------------------------------YLNNNGVSSDDDSPAANFDGGGYSYSAKALAAagvtSGSTVSSGDFRFTWPKVNaGDPDNIAvgGGGQVLDAPaaPAGASRLSLLGsASNGAASGTVTLTYTDGTTQEEQIGFSDWTLGggsrQPSYGNSVVVRTDHRDVSGGGTQTVETDLFStAPIVLASGEQLQSVTLPDTvsgGTIHVFAM----------------
423
+ >UniRef100_A0A1Q5MVC9_1703920/ 127 0.247 9.712E-29 52 215 233 915 1096 1101
424
+ ----------------------------------------------------NSTAISTDDDNPQANFDGEGWSYSAEALaaagAAPGGTVSSGGFDFGWPEvAAGDPDNIEVAGtepQVLNVPSASGdTKLSLLGsAAEGSASGQATLTYTDGTTQQAEIGFSDWTLGggaqKPSFGNTVAVHTAYRDVQGGGTDPVGTEIFAtAPIALQAGKQLASVTLPSATEggvIHVFA-----------------
425
+ >UniRef100_A0A1C4K485_1839759/ 127 0.244 9.712E-29 50 215 233 916 1099 1104
426
+ --------------------------------------------------YVNNAGISADDQNPAANFDGGGWSYSAKALAAagavPGGTVSAAGFDFTWPQVqPGAPDNIVVGGgdQVLDVsaTSAGHTRLSLLGSADDGDtSGTVTLTYTDGTTQQAQIGFSDWTlgggAGQPSFGNITAVHTTYRDVMDGTTDPVGTDVFAtAPIDLQAGKQLASVTLPattSGGDMHVFA-----------------
427
+ >UniRef100_UPI001575013E_227866/ 126 0.233 2.472E-28 40 220 233 1 182 187
428
+ ----------------------------------------GKYEMINLEELFNNKGISRMNNPQDANFTGFGSSYISENLPSSGSVLELYGIPFLFPNKDPlVVDNLEFMNQEIQIKEAYYSNIHVLGAADNGSFiEPIKLVGND--IKEVKIGLTDWIESSPKFNNKKGIICEGINsNKSGYTRTVKTNIWYSVVGIPSNNLFKSILLVDNPSMHIFSLTLEK------------
429
+ >UniRef100_UPI00068E7F42_1444769/ 126 0.349 2.472E-28 27 231 233 6 214 219
430
+ ---------------------------QTAQTVQTAPTDQTAHKVLALEPFMNNQAATTPANLADGRLNVWRNSLPAQDAPLE---LLVDGVPLRTARLDGtGPDNVRCAGQRIEVPERRWDWLYLIGCGERRVRDVLTWHFSDGTVDRDHLALSDLWEGRSDFGEELALRTDVIHYPHHVQERIGITLWCQRVPVTSRKPLTAMSLPRNPAVHLFALTLvgrhlgAGADGEPLIQGAS-
431
+ >UniRef100_UPI000F0856B3_2483798/ 126 0.419 3.375E-28 25 221 233 2 194 202
432
+ -------------------------ETTTRVTTAGGSAG----TLVDLSGHRNNLSCTTTDSLREGRLNIWRNSLPQVEMPTAG-LLEIHGVGFSLPRFDGiNPDNVVCSGQHLACASGSYDWIYLLATSERRSEDQFSLHFADGSVDFESFRVSDFWHAEPAFGDRRAFGTTQMHYPFHVQPNLRGDVWFQRVPVTRMVPLSGVTLPDNVAIHLYAMTLIPS-----------
433
+ >UniRef100_UPI001890D6EE_2705253/ 126 0.288 3.375E-28 50 217 233 456 631 634
434
+ --------------------------------------------------ALNATVISDDDAATEGCFDRACDSFSADALAAGGATaggkVTVAGQSFDWlDNGSGRFDSITADGQTIDY-YGSGTTLGILGASSGGDtSGTITLTYSDGTTSTATLGFPSWVTGHPtAFGDTTAVTTLHRDTPHGPGQARAAHyvVSYAPVAITPGKSLVSITLPNKPALHVFSIT---------------
435
+ >UniRef100_UPI00203463BB_2888348/ 125 0.258 4.608E-28 52 215 233 907 1088 1093
436
+ ----------------------------------------------------NSTAVSTDDDNPQANFDGEGWSYSAKALAAagatPGGTVSSGGFDFGWPKvAAGDPDNIEVAGaapQVLNVPSKEGStKLSLLGsAAEGSASGQATLTYTDGTTQQAEIGFSDWTLGggadKPAFGNTVAVHTAYRDVQGGGTDPVGTEIFAtAPIALQAGKQLASVTLPSSTDggvIHVFA-----------------
437
+ >UniRef100_A0A7W9IM19_1816182/ 125 0.437 6.291E-28 45 219 233 13 186 190
438
+ ---------------------------------------------VDLSEHRNNVAFTTPVALDRGRLNVWGNSLPAGTLPS--GRHLLGDVLFDLPEADGRaPDNVRCAGQFVTLPRVRADWIHLVATSERRCEEFVHIHHSSGAVDVEWLRVSDFWSADPHFGESVAGRSADMHYPHHVQRGLIGLVWLVRVPVPRREEITGIRLPDNPALHVFGLTLE-------------
439
+ >UniRef100_A0A3N1SMM6_2485152/ 124 0.252 8.589E-28 52 215 233 930 1111 1116
440
+ ----------------------------------------------------NSTAISADDDNPQANFDGEGWSYSAKALAAagatPGGKVSSGGFDFGWPEvAAGDPDNIEVAGtepQVVNVPAAPGdTKLSLLGsAAEGSASGQATLTYTDGTTQQAEIGFSDWTLGggadKPSFGNTVAVHTAYRDVQGGGTDPVGTEIFAtAPIALQAGKQLASVTLPSTTDggvIHVFA-----------------
441
+ >UniRef100_A0A7W3ZVE1_2213162/ 124 0.284 1.172E-27 34 223 233 32 231 614
442
+ ----------------------------------PAALRAPAAEPRPLRSYFDNRAVSHADRPEAADFDGSGNSLPASGLTAagwtPGSRLLLDATPLTWPDrTPGEPDNVRADGQLVAVDGRGPALTMLLAAtSPHGPTDPVTatgrVHYRDGSSARYTVTAHDWRTGPAA---TSALSLPHHHTPDGVS-TRRARLYAVTVPVDPTRSVRALALPADPgpraDVHVFAVGLRAAAA---------
443
+ >UniRef100_UPI0005AAA057_405782/ 124 0.404 1.601E-27 47 218 233 8 177 187
444
+ -----------------------------------------------LEPFLNNQAATTPENLGDGRLNVWRNSLPAQSAPLE---IVVDGVPLRSARLDGRgPDNVLCAGQRIEVPGRRWDWLYVIGCGERRVRDTVTWHFRDGTVDRDHFALSDLWEGRSDFGEELALRTDVIHYPFHVQERIGITLWCQRVPVTSRKPLSAVSLPRNPAVHLFAMTL--------------
445
+ >UniRef100_UPI000FCC90E2_2487275/ 123 0.262 2.185E-27 52 216 233 914 1096 1100
446
+ ----------------------------------------------------NNKGISADDTNPAANFDGGGYSYSAKALasagATPGATVTSGGFGFTWPKvKPGDPDNIAVSGgdQVLDAPAGsaNATRLSLLGsASNGAADGTMTLAYADGTTQEAQIGFSDWTLGggnrQPSYGNTIALRTAYRDISGGGSQQIATDVFAsAPITLAAGKQLKSVILPTavaGGTLHVFAL----------------
447
+ >UniRef100_A0A553ZJZ9_2595054/ 120 0.264 2.631E-26 43 219 233 38 221 606
448
+ -------------------------------------------HALPLTHLYDNTAVGGGSRPGAADFDGAGNALSAPDLSAagwtPGRALDLDAAPLTWPRAtAGRPDNVVANGQTVAV-GGTGPALTFLAAATGtgaPATGNGTVRYRDGSVSGYRLTAPDWRSGPPA---TKAVALPHLVTPHGTVDG-PARLYAVTVPLRPGRAVASVQLPRAAAsgaeLHVFALAVR-------------
449
+ >UniRef100_UPI00225B5EAE_2975715/ 119 0.387 6.686E-26 47 223 233 8 182 198
450
+ -----------------------------------------------LDPFLNNQAATTPENLTDGRLNVWRNSLPAQSAPLD---IVVDDVPLRSAPLDGRgPDNVLCSGQRIEVPERRWDWLYVIGCGERRVRDVVTWHFSNGTVDRDQLALSDLWEGRSDFGEDLALRTDVIHYPHHVQERIGITLWCQRVPVTSRKPLGAMSLPRNPAVHLFAMTLVGRQA---------
451
+ >UniRef100_UPI00147FFFB7_81569/ 119 0.342 9.123E-26 45 217 233 36 210 220
452
+ ---------------------------------------------VPLLSYFNNIAFAGLNDLSKGQLNIWRNSYPQEAVANLPGYIDVGRVKFSFPQTDGEtPDNIRCDGQRIVLPVDQYDWIYMLCSAERRVEDEATLFFSDGSVDFVPFCVSDfWPGAKGRNGEIEALRFDSLNFPRHRQVRIEPAIWRSRIPVARETPLSAIRLPRNVAAHLFAVT---------------
453
+ >UniRef100_A0A1M5CPJ9_1206085/ 118 0.451 1.245E-25 44 218 233 10 181 187
454
+ --------------------------------------------PVGLDAIRDNVAMTGADELGAGGLNVWRNSLPAGTYP--GAPVEVDGVPFT-GSPHTGPDNVRCAGQTLPVEIGRWDWLWLLATGERRVEDEIAMLFTDGAVDLEAVRVSDFWAAPAAFGETVAFRSEVMHYPHHVQSRLPGTIWCQRVPITRRADLVAIRLPDNLALHVFAATL--------------
455
+ >UniRef100_UPI0003616C8A_239974/ 118 0.437 1.699E-25 45 219 233 17 190 194
456
+ ---------------------------------------------VDLSTHRNNIALTTPETLRNGMLNVWGNSLPAETLRTDC--LRVGRVRFAGVRANGtEPDNVRCAGQYVDLPEIEADWIHLLATSERRCEEEVGVHYASGAATTEWVRVSDFLPARARFGELAAARSSALHYPHHRQEDLSGQVWAVRVPVTRWEAVRGLRLPDNPALHVFAISVE-------------
457
+ >UniRef100_UPI000A606C17_1911/ 117 0.427 2.317E-25 42 219 233 20 197 201
458
+ ------------------------------------------QLPLRLDEFFNNVAFTGPETLGNGRLNVWGNSLPIGTLPT--GRIRVSGVEFDVSAPDGvAPDNVRCTGQLVHLPDGtAADWLHLLITSERRCEETVHLHYASGAVDPEWIRVSDFWPATGHFGEVLAARSAGMHYPHHIQRDLGGQVWSVRVPAVRREPLRSVRLPDNAALHLFAITVE-------------
459
+ >UniRef100_UPI0016035FF4_2448886/ 117 0.273 2.317E-25 19 223 233 17 231 611
460
+ -------------------SLPSLVPPAGAAAPGPTGVRAAAAEPRPLHRYFDNRAVSRPDRPDAADFDGAGNSLSASELTaagwAPGSRLLLDATPLTWPDrAPGEADNVRADGQLVTVTgRGPAVTLLLAATSPYGPAAPVTasgrVHYQDGSSTRYTVTAHDWRTGPAA---TSALSLPRHHAPDGVKAGR-ARLYVVTVPVDPTRTVRALALPADPgpdaDVHVFAVGLRASAA---------
461
+ >UniRef100_UPI0020C07DD2_2707767/ 117 0.398 3.162E-25 47 218 233 8 177 198
462
+ -----------------------------------------------LEPFLNNQAATTPDNLGEGRLNVWRNSIPAQ--PGPLELV-VDGVPLRSARLDGdGPDNLVCAGQRIEVPERRWDWLYVIGCGERRVRDVITWHFADGSVDRDQLALSDLWEGRSDHGEELALRTDVIHYPYHVQERIGITLWSQRVPITSRQPLTALSLPENPAVHLFAMTL--------------
463
+ >UniRef100_UPI000E22B0D5_871959/ 116 0.382 8.028E-25 45 218 233 6 177 181
464
+ ---------------------------------------------LPLEPFLNNQAATTPANLADGRLNVWRNSIPAQ--PGPLERV-VDGVPLRSTRLDGnGPDNLVCSGQWIEVPRRRWDWLYVIGCGERRVRDYVTWHFADGSASRDRFELSDLWEGRSDYGEEFALRTDTIHYPQHVQERIGITLWCQRVPVTRRQPLCALSLPENPAVHLFAMTL--------------
465
+ >UniRef100_UPI00068A9E4F_1463917/ 115 0.376 1.095E-24 47 223 233 8 182 198
466
+ -----------------------------------------------LEPFLNNQAATTPDNLADGRLNVWRNSIPAQ--PGPLERV-VDGVPLRSARLDGnGPDNVLCAGQRIEVPERRWDWLYVIGCGERRVRDVITWHFSDGSVDRDHLTLSDLWEGRSDQGEELALRTDVIHYPYHVQERIGITLWCQRVPITSRQPLSAMSLPMNPAVHLFAMTLVGQSA---------
467
+ >UniRef100_UPI0003469B67_225762/ 114 0.426 3.791E-24 43 219 233 1 176 180
468
+ -------------------------------------------VPVDLSQHLNNIALTDFETLSNGRLNVWSNSLPMETLRADH--IQVGRVGFAGSKATGtEPDNIRCAGQYVDLPEMNADWIHILATSERRCEEEVGVHYTSGAVAGEWIRVSDFLPARAHFGEMAAARSFAMHYPHHRQEDLRGQVWCVRVPVTRWEAVRGLRLPDNPALHIFAASVE-------------
469
+ >UniRef100_V6K9D2_1352941/ 113 0.429 5.170E-24 91 221 233 0 134 140
470
+ -------------------------------------------------------------------------------------------MAFEFPaRATGRGDNIRCRGQLLPLPGVRADWLYLLGAAERRTEDEVELHYADGAVRTAWLRMSDfWPETAAWFGEPEAFRGSGLRYPRHTQDGHRPAIWQQRVPVTVPGELTALRLPDNPAMHVFALTavLEPG-----------
471
+ >UniRef100_A0A022MHX4_1883/ 112 0.369 1.311E-23 47 218 233 3 172 189
472
+ -----------------------------------------------LEPFLNNQAATTPDNLADGRLNIWRNSLPARSEPLE---AVVDGVPLRSAPLDGRgPDNVLCSGQRIAVPERRWDWLYVIGCGERRVRDVLTWHFTNGSVDRDHLALSDLWEGRSGYGEELALRTDVIHYPYHVQERIGITLWCQRVPITSRQPLGAMSLPKNPAVHLFAMTL--------------
473
+ >UniRef100_UPI001672C615_67368/ 111 0.384 4.533E-23 87 227 233 1 143 145
474
+ ---------------------------------------------------------------------------------------ELEGTVFAFPaRDTGAGDNVRCRGQLVELPAGRWDWIGLVGAAERRTEDEVELHHADGTVRREWLRMSDfWPQTAPYFGEPLAFSTSGMRYPRHTHRHHAPSLWQQRVPVRVPAPLAAVRLPDNPAMHVFAMTVTADEESRLA-----
475
+ >UniRef100_UPI001C593FAE_2749826/ 109 0.259 1.149E-22 35 221 233 12 198 203
476
+ -----------------------------------GRRQSVRYSTVELKSHFNNIGFSYPDIY--GNFTGFGSSYPGDQFPhREDNIVNVKGIPFFLYNAKGRSNNIEFAGQSVEVLPDMYSALHVLGSSDNGSFSEYLKFYLKKSpVGQYKLELSNWVAGVPAYNEHEAYRCSALCAGQQVIETVKPVLWLQSVRFRQPIVFDEISLPDNMCMHLFSLTFEGA-----------
477
+ >UniRef100_A0A5B8M4I2_2599293/ 109 0.270 1.149E-22 52 232 233 890 1077 1311
478
+ ----------------------------------------------------NTVGTASESDTGVGDFDAAGNSYSREQLAsvglAPGATGKVGSLHFTWPSSPeGAPDAVNPTGQTIDL-QGEVHSIAFIGAGiNGGASDTAVATLDDGSTMPVDFSFGDWVlptsDGSPAFGNSVVAKMSHRN---AVTQVQGAYLFATTpATAPDGRTIVSVQFPTDSKERVFAIATDVAPATNTATQLTT
479
+ >UniRef100_A0A838EJ16_2740538/ 109 0.237 2.135E-22 91 217 233 15 153 156
480
+ -------------------------------------------------------------------------------------------VVFTWPDVPaGAPDNYQAAGQVIPVQSmSNATILAFLGSATNGaSSGTATIKYTDGSTQDFTLGFSDWTlngnTNQPSYGNAISYTTSYRNNAHltNGKDTIQTYIFYSSVNLEAGKQVASVTLPTTvtgGQMHVFAVT---------------
481
+ >UniRef100_UPI000B595552_1501230/ 107 0.318 1.004E-21 42 219 233 3 182 196
482
+ ------------------------------------------FHAMDLRPYFNNRGFTYESRYGEGRLTMGSSSFPAESIRF-GRMYRFGGIPFRY-QASEDGDNIETSGQAVALPwmPGKLDCVHALGvSANGDSFDCVSFVAGDRLLHTARLALSDFVSDRPAFGDRLAMTLPYMHMVSGRYAHVRPNLWICSIPYPgEAGAARALVFEDNPSMHIFAMTLE-------------
483
+ >UniRef100_A0A2T0PYA0_1144618/ 105 0.433 3.463E-21 70 218 233 3 150 155
484
+ ----------------------------------------------------------------------WRNSLPAEEFPS--GELVIEGVPFHPPRPrRGADDHVTCDGQLLAVAEGGYDWLYLLACSERRVEEEIALHFTGGQVDFEPLRVSDFWAAVPFFGESAAFTSAVMHYPHHVQPRVPGTIWCQRVPVVRRAPLAAVRLPRNAAVHVFAATL--------------
485
+ >UniRef100_A0A838DPV7_2740538/ 105 0.260 3.463E-21 52 221 233 3 194 583
486
+ ----------------------------------------------------NNIGItgTGYAALTAGGFDTAGDSYSAALLAAltpavtAGSTVTVNGVAFTWpgPDITTNYDNWkTAAAQTITFaPVSNATAVAFLGAASDGsSSGTATINYTTGVPQTFTLGFTDWKTGTPAFSNTLALTLSNYDTSAGVVKSPaaPVYVYYadsSSVTLDPTRIVASVTLPAAVTggglVHVFAAAIKTS-----------
487
+ >UniRef100_A0A081P3V2_1501230/ 104 0.296 6.429E-21 42 219 233 3 182 196
488
+ ------------------------------------------FHAMDLRPYFNNRGFTYESRCGEGRLTMGSSSFPAESIRF-GRMYRFGGIPFRY-QTSEGGDNIETSGQTVALPwmPGALDCVHALGvSANGDSFDRVSFVAGDRLLHTARLALTDFVSDRPAFGDRLAMTLPCMHMVSGRYAHVRPNLWICSIPYPGEAGAApALVFEDNPSMHIFAMTLE-------------
489
+ >UniRef100_A0A2T6FTL9_189691/ 104 0.307 8.759E-21 42 219 233 3 182 196
490
+ ------------------------------------------FHAMDLRPYFNNRGFTYESRCGEGRLTMGSSSFPAESVRF-GRMYRFGGIPFRY-QTTEDGDNIETSGQTVALPwmPGTLDCVHALGvSANGDSFDRVSFVAGDRLLHTARLALSDFVSDRPAFGDRLAMTLPYMHMVLGRYAHVRPNLWICSIPyLGEAGAARALVFEDNPSMHIFAMTLE-------------
491
+ >UniRef100_A0A838XCG5_1507437/ 103 0.269 1.626E-20 43 218 233 6 185 189
492
+ -------------------------------------------HIIELASIYNTKGVSIQDPKLVAQLDHIHSSIPGEIFP-SDEVISYQGIPFAFPLTEAvDNDVISCDGQIVEVNAQRsFQTLAFLGFSLFGDyQDKFTIHYADGVEEEVRFGLTNWKQyktGTPLFGEQIAMSLPY-YVENMILTELPRTVWLQKVTLQHSGHIKQVILPRNPYLMVIALTL--------------
493
+ >UniRef100_A0A060CMX9_487357/ 102 0.278 4.108E-20 44 153 233 73 186 187
494
+ --------------------------------------------YANLAAAYNNVGVTSGDDPKPGNFDGTGNSFNAELLAgqglTPGATVSANGYSFQWPNvAPGVADNVQTAGQLIKL-SGSGNTLAFLGSEAGDRTDTVTVHYTDGTTSTGTVGFP-------------------------------------------------------------------------------
495
+ >UniRef100_UPI0020C7B24C_2810306/ 102 0.256 4.108E-20 47 160 233 159 279 401
496
+ -----------------------------------------------LASAADNVGVTEQDDPGPGDIDGGGSSFIAERLAQkgvtPGAAVKANGFSFTWPDaAPGTPDNVTGKGQTIQVSGDeKGNALAFLGTGTSGSaEGTATVHYTDGTTAEAKLGFPNWAVCPP------------------------------------------------------------------------
497
+ >UniRef100_UPI000B9A8B31_1465/ 102 0.252 5.595E-20 43 218 233 6 185 189
498
+ -------------------------------------------HIIGLASIFNAKGVSIQDPTLAAQLDHVHSSIPGEIFP-SDEVISYQGIPFAFPLTETvENDVISCDGQIIEVNaPSSFQTLAFLGFSLFGDyQDKFTIQYVDGIEEEARFGLTNWKQyktGTPLFGEQIAMSLPY-YVENMILTELPRTIWLQKVALRHSEHIKQVILPRNPYLIVIALTL--------------
499
+ >UniRef100_UPI001942DCDF_926357/ 101 0.233 1.038E-19 52 175 233 739 875 1109
500
+ ----------------------------------------------------DNHGISADRTEGQSDYDGWGSGYSRSALAgagvVPGNRFAAGGVSYVWPNTrPGQPDNVVAAGQRLTVaSPAGATRLGLLGSAEGqpaGAAGTLTVHYADGSTTPAEVGFSDWTlaggTATARSDNVVAARMPYRND---------------------------------------------------------
501
+ >UniRef100_A0A652KEP5_1827978/ 100 0.395 2.620E-19 90 227 233 2 145 147
502
+ ------------------------------------------------------------------------------------------GTAFVFPPrPTAGGDNIRCRGQLVELPdgaaGGRYDWIGLVGAAERRTEDEVELHYRDGSVSRAWLRMSDfWPQTAAYFDEPLAFRTASMRYPRHTHRHHAPALWQQRIAVVRPEPLAAVRLPDNPAMHVFAMTAVVDEESRLA-----
503
+ >UniRef100_A0A931JBU5_114683/ 99 0.271 4.857E-19 75 220 233 0 147 150
504
+ ---------------------------------------------------------------------------PMAQLPQTGQT-EDDGIAFAFTNgSEGSNNNVIAAGQKVTVAAGGYTKLHVLGAGDTGNvSVPAEAAYADGSTGKLTIQLTAWRSG-PAYGETEAVRTSQIHTRTG-PLGTKAAIFHQVVELDPAKELSAITLgaPSgTARAHIFALSLEK------------
505
+ >UniRef100_A0A942KCA5_2478917/ 97 0.304 2.270E-18 114 227 233 0 113 114
506
+ ------------------------------------------------------------------------------------------------------------------MPEGRYSELLLLGASEQGSyQATVRFVYQDETSDELTLGLSDWCQ-LPRFGEAIAYEFIQRRGATGAMERITCRIYFQTLPLRPEAVLTRIVLPDRDTMHLFALTLRQAESEETP-----
507
+ >UniRef100_A0A838DN80_2740538/ 95 0.283 1.059E-17 51 157 233 41 160 458
508
+ ---------------------------------------------------YNNVGItasTSTAAMALGNFDNAGDSYPSENLVAdgfiPGTIVTEYGIDFVWPNvAAGQPDNWKAAGQTIAVNAAPGDSvLAFLGAAtdtfNRGAAGTATIHYRDGSSQNFRLNFTDWLN---------------------------------------------------------------------------
509
+ >UniRef100_A0A973VFE2_1873460/ 94 0.295 1.441E-17 45 223 233 5 185 187
510
+ ---------------------------------------------VDLSGLVDRIGATYDTDRDSGEFNPWGNSFPAEELPF-GGTTAVGGVPYALVDKPPRgPDHLEALGQVIDCAAtGPAHGLALLAAGEQGPQElRVHVHLDGGRRLSLPVTVPGWSVRPDAPMTPDQLRAGHLHYPGDYGLARlLPALWSRVLRL-PGVPVTAIELTANPLVHVFAVTLEIGAA---------
511
+ >UniRef100_A0A951HER9_2800791/ 92 0.273 1.241E-16 43 222 233 879 1050 1255
512
+ -------------------------------------------TPLLLDALFN--------SQSTGDFDTEKRGYDSASLPPPGLYhLGANHIPFRLTGHIGS-DNIACNGQVIQLPPsSRGKTLYLLGAAAPGDQgGLFSVEGTQGHLTRLPVRMNDWVLGGSLHNE-AAFTFERQHTAEGLR-AMTTHFWITSLQLPSDQRPVRLRLPYNTNLHLFAATLAAAS----------
513
+ >UniRef100_UPI001EF5A1EE_1795630/ 89 0.271 7.834E-16 31 142 233 879 995 1009
514
+ -------------------------------VTAPGrcAIQTATSCAVDLSGAYNNDGVATLTDPGQGNFDGTGLSFAANLLPAPGAT-TIGGATYQAPPTTGtAKNFVKSTGQAITLPSGNYTSLDIVAAdngSTGSAAATAVVTYSD------------------------------------------------------------------------------------------
515
+ >UniRef100_UPI001EF302F3_2706031/ 85 0.282 2.280E-14 82 220 233 8 147 527
516
+ ----------------------------------------------------------------------------------PGAELTVAGAPLRLPDpAPGAPDNVRADGQRVRVT-GRGDALAFLVTATGGrALGRGSVEYADGHRSSFRVEAPDWRTGTLA---TSAVALPRISTTDGPRKARAA-LYVVTVPLRHDAAVRAVTLPRDPlgpaSLHVFALGVRP------------
517
+ >UniRef100_UPI001AD6B2B7_67304/ 81 0.293 3.568E-13 35 125 233 42 130 243
518
+ -----------------------------------GWVPVPDPVPVPLDSLYDNDGIDTATARG-GGFDGSGYTFPGEELPA--GRVEVDGVPFDFPsSTAGTKNNVVALGQRVDLPRGRYLSAVFL-----------------------------------------------------------------------------------------------------------
519
+ >UniRef100_UPI001C8AAAD7_1490222/ 71 0.311 1.299E-09 47 118 233 906 982 993
520
+ -----------------------------------------------IEWYQNNAGISDDGKAGQANFDGGGWSYSAQALAAeglkAGQPVAWNGFTFTWPNrRPGQLDNVQASGQVVDLPSAP------------------------------------------------------------------------------------------------------------------
521
+ >UniRef100_A0A2N5XH94_2593676/ 70 0.286 3.213E-09 16 120 233 30 143 151
522
+ ----------------APVSATATASPGTARDASPGSGvapTSADPSPLPLHRLFDNTGTAPDGpvAPGQGGLDGAGNALSRDDLAaagwAPGSRLTLDGTPLTWPDSrPGQPDNVVADGQAVR-PDGHGD----------------------------------------------------------------------------------------------------------------
523
+ >UniRef100_A0A4Y8YB55_1883/ 61 0.420 3.184E-06 155 221 233 4 72 78
524
+ -----------------------------------------------------------------------------------------------------------------------------------------------------------WPETAAWFGEPEAFRGSGLRYPRHTQDGHRPAIWQQRVPVTVPGELTALRLPDNPAMHVFALTavLEPG-----------
525
+ >UniRef100_A0A942KBR5_2478917/ 60 0.275 4.289E-06 34 109 233 642 720 721
526
+ ----------------------------------PVSFEEGVFVALDLGGAFNNDAFSDPSNP-RGNFDnrsgVLGATYPAERAPAENSIVEVSGTLFRFPPTSADANNIALKG---------------------------------------------------------------------------------------------------------------------------
527
+ >UniRef100_UPI001F0BFC40_2203210/ 57 0.268 4.622E-05 47 108 233 130 196 287
528
+ -----------------------------------------------LAAAYNSVGVTDESNTAPGDFDGGGNSFSAQKLAdvglSPGAAVTALGAELTWPDvPPGVKDNVSSR----------------------------------------------------------------------------------------------------------------------------
529
+ >UniRef100_UPI000A7FA51B_860235/ 46 0.307 1.287E-01 33 84 233 723 774 827
530
+ ---------------------------------LPPLPDQGTTVTVDLTAAMDNDAFTNEFHMGDGDFDGTGNTYPAAQLPQTGQ----------------------------------------------------------------------------------------------------------------------------------------------------
531
+ >UniRef100_A0A1U9KA27_1471761/ 44 0.254 5.502E-01 170 222 233 0 57 146
532
+ --------------------------------------------------------------------------------------------------------------------------------------------------------------------------MPYRNSPSG-QDGLATYVYYTYVPLDhPDKMVRSVTLPdadtiDKGRIHIFDIAIKYAA----------
533
+ >UniRef100_UPI001915FEBF_2778368/ 44 0.318 9.823E-01 69 128 233 10 78 79
534
+ ---------------------------------------------------------------------GCGESYSAQALlqapphIAPGNAVVVNGVTLTWPNvAAGSQDNYKTKGQTVPVTPvnGATTQAFLEGSS--------------------------------------------------------------------------------------------------------
535
+ >UniRef100_A0A2A3HR12_1938860/ 151 0.439 5.047E-37 35 231 233 6 203 212
536
+ -----------------------------------PQASAPRYRVVELADHRNNRAATRVHTTAAGGFNVWRNSFPAEHLPPGGSQVEVGGVPFSFPPVGEGDDNVRCDGQFIAVPAGRYDWVHLLAAAERRTEDTVELHYADGSVDTEWLRVSDFWAAPAWFGELPAYRTPVMHYPYHVQPGVSAHLWAQRVPVPRRTELAGLRLPRNIAVHVFAATAqEPPPGTAGLPAPD-
537
+ >UniRef100_C7PY52_479433/ 147 0.267 2.151E-35 47 230 233 1074 1269 1548
538
+ -----------------------------------------------LPAAFNNDAITNDSNRGGADLDGAGASFSAQALAsvgvTPGAPLVHDGLTFTWPDRQvGQSDNVVAAGQTIDI-SGSGSTLGLLGTSTWGaSSGSGTIAYTDGSTQPYTIAFGDWANGTPPTGGDVAIRAPYGNQP-GNQTGWAATIDYFPITLDATKTVQSITLPpgsaqphgGTPAMHIFAMSIKSDQLSVTAPTA--
539
+ >UniRef100_A0A1B4ZDD3_1213862/ 141 0.403 1.250E-33 45 228 233 26 210 211
540
+ ---------------------------------------------LDLTAFADNVGVTSPDRLSEGAFNIWGNTFPADELPK-GGPVDIHGIPFRFPAvGTGQPDNVRCAGQFIDVPVGRYDWIHVLAAAERRTEDFVRLHYTDGAVDPEWLRVSDfWPETASRFGESAAVSCTRLHYPRHIQRSMGPTLWRQRVAVPREQDLSAIRLPDNPAIHIFAMTLAPATQPETTQ----
541
+ >UniRef100_A0A4R0HF04_1124743/ 133 0.394 1.199E-30 25 218 233 35 228 237
542
+ -------------------------MPDQMPDQMPATSVQARCRAVNLAPHRNNVGSTPATDTRGGAFNIWGNSFPAEELPAPGQFV-VDQVAYDFPPTgRGTADNVRAAGQFIEVPSGRYDWLYVLGAAERRVEDELAFHFADGSVDFEQLRLSDFWAAPGWFGETQVRATRSMHYPFHVQAGVPAMLWSQRVPVTRRAALAAVRLPRNPAVHLFAATL--------------
543
+ >UniRef100_A0A2T6FTL9_189691/ 98 0.302 1.187E-18 42 219 233 3 182 196
544
+ ------------------------------------------FHAMDLRPYFNNRGFTYESRCGEGRLTMGSSSFPAESV-RFGRMYRFGGIPFRYQTTEDG-DNIETSGQTVALPwmPGTLDCVHALGVSANGDSfDRVSFVAGDRLLHTARLALSDFVSDRPAFGDRLAMTLPYMHMVLGRYAHVRPNLWICSIPyLGEAGAARALVFEDNPSMHIFAMTLE-------------
examples/7wux/msa/2/non_pairing.a3m ADDED
@@ -0,0 +1,436 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ >query
2
+ MGSSHHHHHHSQDPNSTTTAPPVELWTRDLGSCLHGTLATALIRDGHDPVTVLGAPWEFRRRPGAWSSEEYFFFAEPDSLAGRLALYHPFESTWHRSDGDGVDDLREALAAGVLPIAAVDNFHLPFRPAFHDVHAAHLLVVYRITETEVYVSDAQPPAFQGAIPLADFLASWGSLNPPDDADVFFSASPSGRRWLRTRMTGPVPEPDRHWVGRVIRENVARYRQEPPADTQTGLPGLRRYLDELCALTPGTNAASEALSELYVISWNIQAQSGLHAEFLRAHSVKWRIPELAEAAAGVDAVAHGWTGVRMTGAHSRVWQRHRPAELRGHATALVRRLEAALDLLELAADAVS
3
+ >B4XYC1 250 1.00 4.608E-69 19 351 352 4 336 337
4
+ -------------------APPVELWTRDLGSCLHGTLATALIRDGHDPVTVLGAPWEFRRRPGAWSSEEYFFFAEPDSLAGRLALYHPFESTWHRSDGDGVDDLREALAAGVLPIAAVDNFHLPFRPAFHDVHAAHLLVVYRITETEVYVSDAQPPAFQGAIPLADFLASWGSLNPPDDADVFFSASPSGRRWLRTRMTGPVPEPDRHWVGRVIRENVARYRQEPPADTQTGLPGLRRYLDELCALTPGTNAASEALSELYVISWNIQAQSGLHAEFLRAHSVKWRIPELAEAAAGVDAVAHGWTGVRMTGAHSRVWQRHRPAELRGHATALVRRLEAALDLLELAADAVS
5
+ >SRR5215472_11685424 196 0.515 1.144E-50 1 346 352 63 412 423
6
+ -GHSRAGRDAGDGAVTAVSIEPLELWTRDLGSCLQAVFATLLLREGQDPVTVLGSAWDFRYEPGDWRSEEFYYPCAGRPLAEALAPLHPLTSVWHRapaDGPDPLRPLRAQLAAGRLPIAAVDNFHLPFRPAFGDVHAAHLLVVAGIDDERgqVLVSDAQPPEFQGPIPVADFLRAWGSANPEDVQDAFFSNSPLDRRWLEVRFTGEFPKPTRAWLAEVLARNVACLRWPDAGQLWSGLPGVERYVDRVLSQ-ACGPGGSAVLEELYVLSWGLQAHADLHAEFLRRHGVAQDLPALREAAEVVDRVAHAWTGFRMTGAHGRLDRPDFGGDLARHGGVLVRAHHEAASALELA-----
7
+ >3300005562.a:Ga0058697_10000188_20 192 0.458 4.730E-49 22 350 352 9 336 338
8
+ ----------------------VRQWYRDPLSCLQSTLATVLLGAGVEPLPVLGLAWEFLFKPGDVRREEFYYPCRfEGDVARSLAPYHPIRSSWWSPAPgeDPLAELARRVEDGELPVAGVDNYHLPFRPAFRDVHAAHLVVVYAIDreRDEVGVSDAQPPAFRGSIPTEDFLAAWASPNRADAEDAFFSDSEIGRRCLSVEIEGPIAPLDPVRLEAALAENLARF----GAGGWSGLAGLRRYLDDLS--ARAFAGERRPLEELYPFGWGMQAQTYLHGELLREVGADWSFPELAEAGRAVQEVASAWTGLRMTGAHALAAPAAAAAELRRHGNRLRRRYETALERVAQAVESL-
9
+ >SRR6266536_566964 191 0.460 8.792E-49 21 346 352 170 497 519
10
+ ---------------------PVRLWYRDLVSCLQATFATVLLHAGRHPLAALGAAWGFYCWQGEVTTEEFYYGCRHGSLGASLAPHHPVASRWHAGPDGSdpLAPLRDQVGAARLPIAVGDNYHLPFRPAWHDVHAAHLLVVYGFGDGTVEVSDAMPPAFQGPIPVEDFLRSWGSTNPRDEQDDFFSNAAIGRRWLEVTLGEPWPVEDRPWLRSVLQSNLDDLAGGGDglAGGLLGLPGLRGFLDELLARAGRGDSSVLL--ELYVFGWGVQAQASLHGELLRERGRAWDLPALSEAGRAVESVAHAWTGLRITGAHGRRDPAAVAADLARHGRAVEHAYRHAADLVALA-----
11
+ >B5HTY9 190 0.432 1.199E-48 20 350 352 14 349 350
12
+ --------------------PEPELWYRDLISCLQSTFGSVLARAGADPLAVLGAGWRFLHLPGDVRSEEFYYPCPADesggtDLGAALAPHHELHARWWQpaDEDDLWREVRETLAEDRLVIAAVDNFYLPFRPAYQDVHAAHLVIVYGLDETRgvVHVSDAMPPAFRGAVPIEDFLRSWGSANPSDVQDAFFSDSGIGRRCLDVRLDAPAGPLTPELLGGFLRTDVDGFTTATPA--RTGLAGYDEFAAELLDRCRAQDAG--ALRELYPFGWGMQAQASLHGELLRRCGSRWDDPALAAAGRAVESVAHAWTGLRFTGAHGLADPRAAAPDIARHVTRLRGAYACAVDAVGAAAGRL-
13
+ >A0A1G8BDD9 184 0.361 1.702E-46 22 350 352 7 341 344
14
+ ----------------------LDFWFHDLCSCLQDCFGTLLLRHGQDPVAVMGAAWEFHHAPGPVATEEFYHPAPRPTLGDNLMPHHPVRATWreQEDVESSWQDIRASIIDGRPAIAAVDNFHMPIRPAYGDVHAAHLMVVWGFDDEagEVYVLESTPPQYSGPVSLADFQRARSSANdsRPDTRDYFFAGAGIRGRWIDVTVDGAFPAVEREWVADVVTANARGFAEPAPGPGWSGLTGLTTWLEGVCDRADDIEDAGVALAELYTAGWAAQSAAALHADFLRDMGNRFGCDPLVHAGRQVDRLANQWTPLRILGAHGSTTGHRHTDQLRDRVRRFTAGHRDAVARLEAAAAAL-
15
+ >GraSoiStandDraft_9_1057307.scaffolds.fasta_scaffold4049564_1 183 0.356 4.309E-46 20 350 352 5 338 339
16
+ --------------------PGIRPWRHDLTHCLHTTFGVLVGFYGLDPLHVLGAGWGFGYRLDDVRREEYYFPCLDGSLLAGLAPHHGLDSHWHEPEDAAhgWEQVRACVAAGQPVAVAADNFHLPFRPAFSDVHTNHLLAVYGFDDEegTVYVADPVPPRFDGPITVDALTNARDSNNPItHERDMFFTANPIANRWLTVELTGPQPTFDRDFVRRVLLSNLDGFARGKVEGPTMrGVAGLREF---LTACLPRIADEPQRIDEVFIVAGVTLAITGLHADFLADSGRRLGLPALVELGREVDRVAHHWTALRIGVANSRSAPIDDLPALLRRGRNLVADQERVLDRMAQTAAQL-
17
+ >SRR5581483_8883662 181 0.405 2.024E-45 21 319 352 13 314 353
18
+ ---------------------PMAHWHHDLCSCLQCCMASVMHYYGRDPILTLGAVWDFYYSPEDLRKEEFYSPCRWSGLAESLLPYHPVTSRWHQPDDPevGWLQVKEVVQNGSPVIVAVDNFYLPFRPAYQDIHAGHLILVYGFDEetDQVYVLDSMPPAFIGPIALTDLKASRSSLNPADERDYFFSNAPVANRWLELNIEAPFPQFTEKWVMDVINANLRRFTTPCDGSALSGMSGLARYF-DLLDQNMANPAGHHALEELYVLGWSIQAATALHADFLMQVGKQLNWPRLGSIGRQVAHLAHHWTALRMLGAHGRAHPR--------------------------------
19
+ >K0K7T6 179 0.411 6.974E-45 21 349 352 4 332 334
20
+ ---------------------QVEQWYRDPVSCVHATLAEVVRHAGAEPLEVLGLGWEFRHLPGDVRPEEYYWPCRvPGDLAGSVLPHHRVRSVWRTaPEPDPLTALTGPLAVGRLPVLAVDNYHLPFRPAYHDVHAAHLILVRDLDLDRgvALVSDAMPPAHRGELPVADLLRAWRSTMPPDEQDVFFSGRGGQARWLEVIVEAVPPPLTPEALRSALRANVDGFQSQGP--ERTGLAGFGEFLAEVVDRSAAGEAAAP--AEVYTFGWSMQAQSAVHGELLRTCGMRWAEPELAEAGRRVEQVAHHWTALRVTGAHGRTDPVAAAPGLHRHGERLRRSYEQAVESLALASAA--
21
+ >A0A1L7NQD0 179 0.423 1.294E-44 20 350 352 14 344 345
22
+ --------------------PEPELWYRDLISCLQATCGSVLAREGVDPLDVLGAGWQFLHVPGDVRPEEFYYPCSEGELGRALAPHHDLGARWWQPADENdvWREVREHLADDRLVIAAVDNFHLPFRPAYGDVHAAHLVVIYGLDETRgvVYVSDAMPPAFRGAVPIEEFLRSWGSVNPTDVQDAFFSNSKIGRRCLDVSLGTRRGPLTPELLGGYLRTDIEGFTTETSA--RTGLAGYDAFVGELMERCRAGDA--DALRELYPFGWAMQAQASLHGELLRRRGRDWDDAVLAGAGRAVETVAHAWTGLRFTGAHGHGDPRAAAPDIAHHATLLRAAYARAVDAVGAAAARL-
23
+ >SRR5919197_4477474 179 0.452 1.294E-44 21 350 352 224 552 553
24
+ ---------------------PPRQWYRDPISCLHSTLAAVVGYEGADPLEVLGLSFEFLYKPGDVRPEEFYFPCRfGDDLARSIAPYPPVRSEWWRaTGEDPLAELAARISNGELPIVAVDNYHLPFRPAYHDVHAAHLVVVYGIDAGRreVSVSDAMPPAFQGAIADGDFLRSWSSPNPRDDQDAFFSDARIDRRCLSVRFETPIPPLDGARLKDALESNSQRLSAG---EGWSGLPGLRRYLDELVGRADAGERRP--LEELYPFGWAMQAQAYVHGELLRVCGADWSVPELREAGRAVESVAYAWTGLWITGAHGLSAPAEATSGLRRHAGRLRRRYEEALEAVEQAVEAL-
25
+ >SRR5215211_2955382 175 0.442 1.534E-43 31 350 352 1 330 348
26
+ -------------------------------SCLQATFATLLLHRGHDPLEVLGAHWEFRYRPGDVRPEEFYFPERvAGDLGASLAPHHPVSSRWASGPAErPLDGIAAELRAGRLVIAAVDNFHLPFRPAFGDVHAAHLLVVYGIDERHGQVHlsDAMPPAFAGPIPVEDFMRSWSSQNPMDAQDAFFSDAGIGRRYLRTTVGEPSRDLDPRHVLACNRRLFDGGAQSAgaqpagaqPAGDWTGRSGRARYLDQLGRRARAAD--TRALEEVYPFGWGMQAQASLHGELLGRWGRQHAVPRLREAGRLVEAVAHGWTGLRMTAAHGRTDPVAAAPDLARHASRLRHAYDLAHEALAEAEAAL-
27
+ >A0A1M5CPU0 173 0.391 7.186E-43 24 343 352 7 328 364
28
+ ------------------------GWYRDPLSCLHVTLAVLLERAGRDPLDALSRDFGFRWIPGDVRGEEFYWPVdEPDDPVAGMAPSAGIRSRWRTAAADPLAALASALAAGRPPIVAVDNFHLPFRPAYHDVHAAHLVVVTALDvaGGRAFVVDHMPPAHAGWLPVEHLLAAWGSVNPPDAQDDFFSGEPIERRWLEVDLGPGLAPLTAADAAASAVENCARFgPLLGAPAPSSGRADLHALAA--AAVAAARDADGGALAEVYTFGWSMQAQAAVHGELLRRTGAAAGDLVLAAAGRKVEAVAHVWTGLRVTAAHGRLDPAAVAPAVGRHAARLVTAYETAFAAL--------
29
+ >GraSoi013_1_40cm_1032412.scaffolds.fasta_scaffold160357_1 172 0.404 1.814E-42 20 332 352 5 320 339
30
+ --------------------PGIGHWRHDLTHCLHTTMGVLLGFYGLDPLHALGAGWGFAYPAGDVRREEYYFPTVDRSLLGSLAPHHPLSSSWHEPAGaeQGWHDVREAVSGGVPVAVAVDNFHLPFRPAYSDVHTNHLLAVYGFDDDRheAYVADPVPPRFQGPIPLAALAAARDSANPiRHDRDLFFTANPLANRWLTIHLDGPQPALDLDFVRAAMLRNVAGLADGTAAGaVLRGLAGQRAF---LESALADLDAGEHRVDEVFVVAGAILAVTGLHADFLGAAARRLDRPALAELGREVDRVAHHWTALRIGVANARSRPGAEVPALQARAAAL-------------------
31
+ >SRR5919197_654015 172 0.393 2.471E-42 42 347 352 1 309 311
32
+ ------------------------------------------AHRQLDPLVVLGSGWTFAYSPGEWAPAEFFYPALDGSLAATLAPHQPLSIVWREpgSPEEAEQELVAALAGGRPSIVAVDNYWLPFRPAFRDVHAAHLVVVYGHDAERgsFAVLDAVPPAFRGWISRETLARARGSDNPAADDEAFFRGAPIANRWLDVDVDGAVSEVEPEWLGRVVAGNLAHMLGPSGETTWTGLAGVRRYVEWLAERAAAEDGEP-ALKEAYGFGWAPQASAALHGAFLHAAGLRLDAPGLREAARTVDLVAHAWTPLRVGAGHAVSDPPAAAGYLARRGRALVTAYEAAIDRLRELA----
33
+ >A0A1C4Y1U0 170 0.438 1.156E-41 25 350 352 39 368 369
34
+ -------------------------WYRDPLSCLQTTFAAVVDAAGGDPLEVLGAGWEFRHLPGDVRTEEFYHPSRhPEDLGRSIAPHHHVRSRWTTPaaDRDPLSELADEIAAGRLPIAAVDNFHLPFRPAYHDVHAAHLVVVYGVDlaRDLVLVSDAQPPAFTGPIRAEDFLAAWGSVNPADDQDAFFSATRIDRRYLRVEI-GDSRPLDRDGLRAALRANVDGFTAAPASDGgaWTGLAGLRRYLDEVVAAAAAGD--SATVRDVYPFGWSMQAQACLHGELLRTVGVDRDLPCLREAGRAVEAVGHTWSGLRVTAAHGWPAPQAAAESLEHHGARLQQRYQCAVEAVDRAVGRL-
35
+ >SRR5919197_910194 169 0.457 2.916E-41 22 346 352 151 470 475
36
+ ----------------------IRQWYRDPVSCLQSTLATVLLAADTEPLPVLGLRWEFLFVPVDVRPEEFYYPCRfADDLARSLAPYHPLRSSWWSPAPEedPLAELARRVEGGELPIAAVDNYHLPFRPAFHDVHAAHLVVVYAVDreKEEVGVSDAQPPAFRGSIPAKDFLAAWGSANPADTEDAFFSDSEIGRRCLSVEIEGPIPPLDAERLRTALEENLAGFVT--------GLADLGRYVDDLE--ARALADERRPLEELYAFGWGMQAQTYLHGELLRETGAGWSVPELAEAGRAVHEVASAWTGLRMTGAHALGARAAAAADLRRHGNRLRRRYEAALEHVAQA-----
37
+ >SRR5919204_1883335 168 0.472 3.969E-41 22 323 352 124 424 427
38
+ ----------------------VRQWYRDPLSCLQSTLATVLLAADAEPLPVLGLAWDFLFVPGDVRPEEFYYPCRfEGDVARSLAPYHPVRSSWWSPalDEDPLAELARRVEDGELPVAAVDNYHLPFRPAFHDVHAAHLVVVYAVDreRDEVHVSDATPPAFQGAIAAADFLASWSSANPADDEDAFFSDSRIGPRCLAVEIDGPLPPLDRERLRAALCENLARF----GAVGWSGLAGLRRYVDDLTARAQAGERRP--LEELYAFGWGMQAQTYLHGELLRETGAEWSAPELAEAGRAVQATASAWTGLRVTGAHGPAAPGDPGP----------------------------
39
+ >SRR5919202_365103 167 0.449 1.362E-40 22 350 352 157 480 481
40
+ ----------------------VRQWYRDPVSCLQSTLATVLLAAGAEPLPVLGLRWEFLFVPGDVRPEEFYYPCRfEGDVARSLAPYHPLRSSWWSPaaGEDPLAELARRVEGGELPIAAVDNYHLPFRPAFHDVHAAHLVVVYAVDRERgeVAVSDAQPPAFRGAIPAEDFLAASGSPNPADAEDAFFSDSEIGRRCLSVEIDGPIPSLDAERLRAALAENLT--------DFATSLAELGRYVDDLE--ARALDGERRPLEELYAFGWGMQAQTYLHGELLREMGAGGSVRELAEAGRAVQEVASAWTGLRMTGAHGLAAPAAAAADLRRHGNRLRRRYEAALEHVTRAVESL-
41
+ >A0A2A2D0D6 158 0.369 8.748E-38 22 348 352 88 416 422
42
+ ----------------------VRPWRHDLGGCLHGCLATLLEHRGVPALPVLGAAWTFRHFPGGVRREEYYYPcADGESLLTALAPHHPVRSVWHEPADAAtgWEQVRDAVAAGTPVAVAVDNYHLPFRPAYRDVHSNHLVVVHGYDEErgTVRVLDAVPPAFHGDITLAELTASRDSGNeLVHERDMFFTGVHIDNRWLSLEldaAPGDFPALDRAEVARVLALNLAHFAAPPDGDACTGLAGQQAF---LEGTVKRLTAGEDIRDELFVAAGAALACTAVHADWLALAGRTLDLPGLVEQARSVERVAHHWSAVRIMAALTRDGGLTP----RRLAGRVHALLKDHESALTALDD---
43
+ >688.fasta_scaffold2401073_1 156 0.357 7.519E-37 22 308 352 11 304 347
44
+ ----------------------VVSWRHDLVGCLWTSAATILAFHSAPVLPTLGAAWGFRHQPDDLRREEYYYPCKPGvSLYEAIAPYHPVRSRWHEptDAEQGWIQVRDMVLSGEPVVVAADNFYLPFRPAYQDVHTNHLIVLYGFDTERgsATVLDAVPPRFNGEITITEFTAARDSGNPqLHDRDMFFTANPLGNRWLEVEvETAAFPPFDLDMIKYVIRRNLDGFAASPDGGVRDGYSGMAGQAAYLDDLAGRLEVGENVRDELFLVAGAVLANTALHADWLALAAGVIRIPALAEAGRDVERVAHHWTAIR-------------------------------------------
45
+ >SRR5580658_171216 156 0.357 7.519E-37 22 308 352 56 349 392
46
+ ----------------------VVSWRHDLVGCLWTSAATILAFHSAPVLPTLGAAWGFRHQPDDLRREEYYYPCKPGvSLYEAIAPYHPVRSRWHEptDAEQGWIQVRDMVLSGEPVVVAADNFYLPFRPAYQDVHTNHLIVLYGFDTERgsATVLDAVPPRFNGEITITEFTAARDSGNPqLHDRDMFFTANPLGNRWLEVEvETAAFPPFDLDMIKYVIRRNLDGFAASPDGGVRDGYSGMAGQAAYLDDLAGRLEVGENVRDELFLVAGAVLANTALHADWLALAAGVIRIPALAEAGRDVERVAHHWTAIR-------------------------------------------
47
+ >SRR3569833_1969934 149 0.341 1.019E-34 22 308 352 167 457 480
48
+ ----------------------VRPWRHDLAGCLHACAGTLLDHQDIPPLDALGAHWGFYYPPGDFRQEEYYFPCRPGtSLLGSLAPYHPISSRWHlpADAEEGWSDVREQRVAGWPTAVGVDNFWLPLRPAHQDVHANHLVIVYGFDDenETVRVMDTVPPRFDGDLPLSVLTAARGSgTEAHHARDMFFADSAIAHRWLEISVNhARRRPPDRPTIAAYLCRNLAGFAAPDDENDHDGLAGLESFLRDMETRLSRGDEIAD-----ELFVVAGAALadTALHADWLAEAGRAVGIPSLTEAGRSVERIAHHWTALR-------------------------------------------
49
+ >SRR6266536_1184334 149 0.466 1.881E-34 38 286 352 2 252 493
50
+ --------------------------------------ATVLLHAGREPLAALGAAWGFYCRPGEVTTEEFYYGCRHGSLGASLAPHHPVASRWHAGPDGSdpLAPLRDQVGAARLPIAVVDNYHLPFRPAWHDVHAAHLLVVYGFGDGTVEVSDAMPPAFQGPIPVEDFLRSWGSTNPRDEQDDFFSNAAIGRRWLEVTLGEPWPVEDRPWLRSVLQSNLDDLAGGGDglAGGLLGLPGLRGFLDELLARAGRGDSSALL--ELYVFGWGVQAQASLHGELLRERGRAWG-----------------------------------------------------------------
51
+ >A0A022MLG6 147 0.352 4.714E-34 23 339 352 23 341 358
52
+ -----------------------PQWYEDPLSCLQTTLGSILLEHGLRPVEVLGRACEFAFAPDDVMCEEFYRPAqSSRGVAADLCPYH--DVESAWTSGKDADDLIGLIEEHAAVIVAVDNYHLPFRPAYHDVHAAHLVVVpawrrTGGGQLEFYVSDAQPPGFQGWLAAEHLVESWTSGNPTDTQDVFFSSREIGGRVLTVKVRPEPGPLTDDQVVRALRGNLDRWDTgtaGPAEGVWTGRSGLRRFVERLAEHSAD----PARLRPAYTFGWAMQAQAYLHGRFAQECALRTRPGHLAEVAASADRVVSAWSNVRLLSAHAAHTP-GAPELIRRRGEELSHAYEQL------------
53
+ >SRR6185503_6371475 140 0.318 1.158E-31 25 293 352 68 340 342
54
+ -------------------------WRHDLVGCLWTCAASILDFYDVPALETLGAAWTFRHCPQDVRREEYYYPCPEGtSLYEAIAPYHPIRSVWHvpADAEEGWQQVRDQVADGTPVVVAVDNFYLPFRPAYQDVHTNHLSIVYGYDEqaGTVRVLDAVPPRFDGDIRIDELTAARNSANPeLHERDMFFTNRPIANRWLEIELdAAAFPPFTLDTVRSTLRRNLQGFYAAASEVEYLGIQGEQEYLASQANWLDKGDDIRDGLFLAAGAAL---ANTALHAEWLALAGRQFYQPRLAEL----------------------------------------------------------
55
+ >4772|scaffold_11782_c1_3|+1184|00 137 0.296 1.329E-30 22 308 352 7 327 369
56
+ ----------------------VRPWRHDLAGCLHACLGTLLDHAGRRPLEVLGAGWRFYYRLGDLRAEEYYHPCPdGRSLVASVAPGQGISSRWHRpaDAEQGWQQVREQVLAGVPVAVAVDNFELPFRPAYRDVHSNHLVVVYGFDDErgTVRVLDAIPPFFDGDLPLGVLAAARDSGNRsSHERDMFFADNPIGNRWLELVADGAPSAEPRspagylAANLAALRAGASGNPdssagpnsgagpnssaqpnsdaepdRGAEPNSYAGRDGIARFLTDMSDRLADGHSIADELFVVSGVALAGTA---VHADWVGDTGRRLGLPGWPELARRLDRLAHHWSAVR-------------------------------------------
57
+ >SRR5258708_5638915 137 0.508 1.803E-30 104 347 352 13 257 264
58
+ --------------------------------------------------------------------------------------------------------VRQARGAGRRPTAAVDNSPLPFRPAFGDVHAAHLLVVNGVDDERGLVHvsDAQPPEFQGPIPVADFLRAWGSTNPEDTQDAFFSNSPLDRRWLEVRFTGDFPVADRGWLAGVMAENVRRLRRPEPGPLWSGLAGVERYVERVLEQ-AAGPGRSDALEELYVLSWGLQAQADLHAQFLRGFGVAEDLPAMREASCLVDGVAHAWTGFRMTAAHGRLDRPDFGQELRWHGRRLVRAHDEAVDAIELAL----
59
+ >GraSoi2013_115cm_1033766.scaffolds.fasta_scaffold685313_1 134 0.764 1.521E-29 115 305 352 0 190 191
60
+ -------------------------------------------------------------------------------------------------------------------IAAVDNFHLPFRPAFHDVHAAHLLVVYRITDTDVYVSDAQPPAFQGPIPIADFLRSWDSANPAHDADVFFSSSPSDRRWLHARMRGAGPRTDRGWLAQVIRDNVARFRAGSPDGIETGVEGLRRYLDELCTHEPGTQAADAALSELYVISWNIQAQTGLHAEFLRAQGLHWRIPELAEAAARVDAVAHGWT----------------------------------------------
61
+ >ERR671931_1062055 130 0.480 2.348E-28 21 243 352 108 326 335
62
+ ---------------------PPRQWYRDPISCLHSTLAAVVGYEGADPLEVLGLSFEFLYKPGDVRPEEFYFPCRfGDDLARSIAPYHPVRSEWWRaTGEDPLAELAARISNGELPIVAVDNYHLPFRPAYHDVHAAHLVVVYGIDreRDEVGVSDAQPPAFRGSVPAEDFLAAWGSANPADTEDAFFSDSEIGRRCLSVEIEGPIPPLDAERLRTALEENLAGFVT--------GLADLGRYVDDL------------------------------------------------------------------------------------------------------------
63
+ >SRR5439155_7605724 130 0.404 3.181E-28 24 201 352 9 191 192
64
+ ------------------------FWFHDLCSCLHDCIATVLIYQDQDPTLTLGASWEFYYSPADVRREEFYHPLPRPTLAESMMPFHPVRSSWHasDDPDAAWSDVKALVADGQPVIVAVDNFYVPFRPAYGDVHAAHLIVVFGFDDatDEVYVLDSTPPTKRGPMPMSEFLRARYSDNPVSGeRDFFFAGAPIANRWLQLEIGTP------------------------------------------------------------------------------------------------------------------------------------------------------
65
+ >SRR2546423_1207057 128 0.432 1.071E-27 22 266 352 174 413 417
66
+ ----------------------VSQWYRDPVSCLQSTLATVLLAAGAEPLPVLGLAWEFLFIPGDVRPEEFYYPCRfEGDLARSLAPHHPLRSSWWSPAPgeDPLAELARRVERGELPIAAVDNYHLPFRPAFHDVHAAHLVVVYAVDRERgeVGVSDAQPPAFLGSLPAEDFLAAWSSANPADAEDAFFSDAEIGRRCLAVEIEEPIPPLDGERLRAALAENLACFVT--------GLADLRRYVDDLE--ARALAGERPPLEGLYALGLG-------------------------------------------------------------------------------------
67
+ >SRR5215471_7738639 128 0.443 1.451E-27 21 193 352 168 343 344
68
+ ---------------------PTRAWRHPLGGCLFAAVGALLWHRGLDPLEALGAAWRFRYRLGDVRREEFYYPCGEDSLVEALAPHHPVRSRWHspSSPAAAWPEVRERLLAGVPVAVVVDNFHVPFRPAFGDVHSSHLVVVTGFDEGTGTVVDAVPPGFQGPLPLAQLALARGSDNRPrHARDMFFAGDPIGARW--------------------------------------------------------------------------------------------------------------------------------------------------------------
69
+ >SRR5919198_860872 128 0.456 1.966E-27 115 344 352 0 226 920
70
+ -------------------------------------------------------------------------------------------------------------------VAGGDTRLLPFRPAYHDVHAAHLVVVYGIDAGRreVSVSDAMPPAFQSAIADGDLLRSWSSPNPRDDQDAFFSDARIDRRCLSVRFETPIPPLDGARLKDALESNAQRLSAG---EGWSGLPGLRRYLDEVVGRAGAGERRP--LEELYPFGWAMQAQAYVHGELLRVCGADWSVPELREAGRAVESVAYAWTGLRMTGAHGLSAPAEATSGLRRHAGRLRRRYEEALEAVE-------
71
+ >ERR1700691_1894743 126 0.322 8.949E-27 4 235 352 55 293 308
72
+ ----HRHRTSGMSRPATALIEEVVSWRHDLVGCLWTSAGTVLGFHGAPVLETLGAAWGFHHFPDDLRREEYYYPCPPGmSLYEAIAPYHPVSSRWHEPADAAqgWAQVRDIVLSGSPVVVAADNFYLPFRPAYQDIHTNHMVVVYGFDEasGTARVLDAVPPRFDGDITMEELTAARDSTNPvLHERDMFFTNRPIANRWLEITVdTAAFPPFNLEPARPTLRRNLPGSAAPAPAEAYAGTAG--------------------------------------------------------------------------------------------------------------------
73
+ >DeetaT_8_FD_contig_21_3401058_length_202_multi_3_in_0_out_0_1 122 0.400 1.364E-25 24 203 352 9 193 195
74
+ ------------------------FWFHDLCSCLHDCIATVLIYQDQDPTLTLGASWEFYYSPADVRREEFYHPLPRPTLAESMMPFHPVRSSWHasDDPDAAWSDVKALVADGQPVIVAVDNFYVPFRPAYGDVHAAHLIVVFGFDDatDEVYVLDSTPPTKRGPMPISEFLRAIYSDNPVSGeRDFFFADAPSASRSRHLEIGTPSP----------------------------------------------------------------------------------------------------------------------------------------------------
75
+ >SRR6266704_4620549 119 0.447 1.527E-24 22 189 352 1 172 175
76
+ ----------------------VRMWRHDLGHCLHATMGVLLGCYGADPLSVLGAAWGFGYRSGDLRREEYYYPLGSESLLGVMAPHHPVSSRWHRPPDaeTAWTEVREAVASGRAVAVAVDNFHLPFRPAYRDVHTNHLLTVYGFDDerDEVLLADPVPPRFQGAITRAEFAAARRSVNPEDHDRDLFFTRNP------------------------------------------------------------------------------------------------------------------------------------------------------------------
77
+ >EndMetStandDraft_4_1072995.scaffolds.fasta_scaffold2302776_1 118 0.370 3.774E-24 108 327 352 0 209 217
78
+ ------------------------------------------------------------------------------------------------------------LASGVPVAVAADNFFLPFRPAYRDVHSNHLLLLYGFDDDagHVYVVDPVPPSYQGPIPLETLSLARGSVNPvRHDRDMFFTANPIANRWLTVRVGPVQPVMDRAFVARAIEANVA----------GFGLPGLRAFlAAALAKLPDDGT----VIDEIFLVAGPLLAVTGLHAEFLDRAGQAFGVSALRELGRRVDRIAHHLSALRIAVASARHDRAAAVPGLRR------------------------
79
+ >ERR687887_509227 117 0.470 6.896E-24 22 233 352 115 331 332
80
+ ----------------------LRQWYRDPISCLQSTLATVLLAADEEPLPVLGLAWEFLFVPGDVRPEEFYYPCRfEGDVARSLAPYHPLRSSWWSPAPEedPLAELARRAEGGELPIAAVDNYHLPFRPAFHDVHAAHLVVVYAVDrrREEVHVSDATPPAFQGAIAAADFLASWSSTNPADDEDAFFSDSRIGQRCLAVEIDEPLPPFDAERLRTALETNLARFAAEEDDAGRGGL----------------------------------------------------------------------------------------------------------------------
81
+ >SoiMetStandDraft_5_1073268.scaffolds.fasta_scaffold824170_1 116 0.332 1.702E-23 78 328 352 3 256 278
82
+ ------------------------------------------------------------------------------SLLGSLAPYHPISSRWHlpAGAEQGWSDVREQLVEGRPTAVGVDNFWLPFRPAHQDVHANHLVIVYGFDDEnkTVRVMDTVPPRFDGDLPLSVLTAARGSGNeAHHDRDMFFADSAIAHRWLEISVNhARLRPPDRPTIAAYLCRNLAGFAAPDDENDHDGLVGLESFLRDMETRLSRGDEIAD-----ELFVVAGAALadTALHADWLAEAGRAVGIPPLTEAGRSVERIAHHWTALRIIAALTRKGDVSTARLRRRH-----------------------
83
+ >SRR5262245_57182801 115 0.435 2.300E-23 31 219 352 2 194 228
84
+ -------------------------------SCLQATVGSVLAYWGHDPLEILGAGWDFTYIPGEVPFEEFYFPSPPGiDLGQSLAPHHPLSVRWATGVrDDPLGELAGAIEAGQLPIVALDKYHLPFRPAYHDVHAAYLLVVYGVDKRRglVLVSDSTPPHFTGPIAAEPFLAAWQSRNPADEQDAFFSESEIGMRYLVVEPRDRIPRLTQRQLGTALRANAA------------------------------------------------------------------------------------------------------------------------------------
85
+ >SRR3954469_16970431 113 0.377 1.034E-22 22 243 352 47 268 283
86
+ ----------------------VRQWYRDPVSCLQSTIATLLIEAGWDPVETLGLGWQFPYLPGDVRAEDFYWPCRvPGDPVASMLAHHQVSSSWrQETGSDPLAALEAALDRGQLPVVAVDNFYLPFRPAYHDVHSAHLIVVFDVDrDGTVGISDAMPPAFQGELAVEDLLRSWMSAN-PRENEAFFSGAHIGGRWLDVRLGPAPPRLDPSTLAGAL--LADAVAFRKTDVAGAGLAGLADLSAAI------------------------------------------------------------------------------------------------------------
87
+ >SRR5262249_26821897 112 0.471 2.547E-22 21 158 352 6 147 149
88
+ ---------------------PIDHWRHDLGHCLHTTAAVLLAYHRLDPVEVLGAGWGFRYPPGDVRREEYYFPGHPDDLFSGLAPSHGVSSRWrpREGAEDGWRQVRAAHADGAPVAVAVDNFHLPFRPAYRDVHTNHLLAVYGFDDDTgqVFVADPVPPSF-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
89
+ >SRR5688572_1544331 111 0.420 4.641E-22 56 264 352 0 211 212
90
+ --------------------------------------------------------WEFLYKPDDVTAEEFYYPCRVDgDIARSVAPYHEITSRWWraESGGSALDELAGCLNAGALPIAVVDNFHLPFRPAYRDVHAAHLLVVYGVDQQRgeVWVSDAMPPAFSCPIATEEFLASWSSENPLHAGDAFFSGSRIDRRCLLMDVRSQRPPLNRHVAHAALRSNLARFARADDPAGWNGRAGLGRFLQQLLERARAAD--PEALAETYRFG---------------------------------------------------------------------------------------
91
+ >MGYP000725067659 108 0.373 5.102E-21 115 341 352 6 228 245
92
+ -------------------------------------------------------------------------------------------------------------------IVAVNNYHLPFRPAYHDVHAAHLFVLnrLGTPGGVARVHDPQPPAYRGPLSREVLDIARASLRVGDESDPFIAGANPNWRWLEVRVEGPQPSPSLTWVHAIMVENLDALLRQ-----TQGPAALATFLDSLPNRVR--EHGPRALREIYVLGWPAQAEAGLHARFLASVAARHRKPRLAETARAVDHVANGWTGLRVSAAHASTLSAPtAPADVFRQGATLLRRWRRCMD----------
93
+ >SRR5262249_47284095 106 0.432 3.067E-20 25 167 352 10 157 159
94
+ -------------------------WRHELAGCLHACAGALLGFHGIEPLEALGAAWGFHYAPGDARREEYYFPCRPGWsLLRSLAPYHPVGSTWHHPAGdaEGWAEVRSAVASGRPAAVAVDNFHLPFRPAYRDVHSNHLVLVYGFDDDRrlAWVLDAVPPRFDGPIGLHQL----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
95
+ >SRR5690242_2363108 104 0.414 1.364E-19 22 147 352 18 145 148
96
+ ----------------------VDHWRHDLTGCLHQSMALLLAHEGLDPLEVLGAAWGFYYDPRDLRREEYYFPTPYESLLSGLAPFHPVRSRWHlpADAAAGWAEVRAAVADGRPVVVAVDNYELPFRPAYQDVHTNHLVTVVGFDDER------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
97
+ >SRR5262245_28891133 103 0.492 3.334E-19 21 144 352 23 148 156
98
+ ---------------------PVTVWRHDLAGCLHACAATLLAHHGVDPLDAVGAAWGFTYVPGDVRREEYYFPQAGRSLLESLAPYHPVRSEWHRpgSAEEGRRQLLSQLAEGRLVAVAVDNFHLPFRPAFGDVHTNHMVVVYGYD---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
99
+ >SRR5215468_4727865 99 0.449 4.843E-18 21 164 352 181 329 330
100
+ ---------------------PVNAWRHDLVGCLWTSAATMLACHGAPVVETLGAAWGFRHLPGDLRREEYYYPTPPGvSLYQAIAPYHPVRSVWHEpaDAGEGWTQVRDEVLAGRPTVVAADNFYLPFRPAYRDVHTNHLVVVHRLDEaaGTVGVLDAVPPHFDGDITL-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
101
+ >SRR5215467_9451402 98 0.413 1.179E-17 161 337 352 0 176 179
102
+ -----------------------------------------------------------------------------------------------------------------------------------------------------------------PITVADFLRAWSSANPPDPQDVFFSDTRIGRRFMTVTAGEPFPRLDAPMLRRILAANLAGFSGEDASDqaGWTGLPGLKLYLMSLLDACAR--PDPAQLASAYPLGWGMQAQASLHAELLRRWGTQAGLPELREAARLVETAAHAWTGLRMTAAHGRGDPRACAGELARHAAGLRRSYE--------------
103
+ >ERR1700761_2897183 97 0.376 2.869E-17 96 294 352 0 202 203
104
+ ------------------------------------------------------------------------------------------------TSGKDADDLIGLIEEHAAVIVAVDNYHLPFRPAYHDVHAAHLVVVpawrrTGGGQLEFYVSDAQPPGFQGWLAAEHLVESWTSGNPTDTQDVFFSSREIGGRVLTVKVRPEPGPLTDDQVVRALRGNLDRWDTgtaGPADRVWTGRSGVRRVAERLDEHSAD----PARLRPAYTFGWAMQAQAYLHGRFAQECALRTRPGHLAEVA---------------------------------------------------------
105
+ >SRR3954453_9001413 95 0.398 9.369E-17 25 153 352 6 138 139
106
+ -------------------------WRHDLGHCLQTTMGVLLQHHGLDPLQGLGAAWGFHHIPGEVRREEYYFPLHRGGLLESMAPYHPVRSRWHrpEDAGSGWQQVRAAVLAGSPVAVGADNFLLPVRPAYRDVHTNPLLTVYGFDDERdlVLIADP------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
107
+ >SRR5213080_3547182 95 0.373 9.369E-17 133 295 352 0 164 167
108
+ -------------------------------------------------------------------------------------------------------------------------------------HAAHLIVVFGFDDatDEVYVLDSTPPTKRGPMPMSEFLRARYSDNPVSGeRDFFFAGAPIANRWLQLEIGTPSPELTREWVTEVIATNLRRFREPDASPGWAGMAGLTHYLHSICART-LGADAGDALQELYTVGWTAQGSTALHADFLMLAGRRLGWDRLVEVGR--------------------------------------------------------
109
+ >SRR6266536_4448340 93 0.430 7.400E-16 95 227 352 3 139 147
110
+ -----------------------------------------------------------------------------------------------RDAAEVWAQVRERLAAGVPVAVAVDNFHLPFRPAYQDVHTNHLVVVHGCDErrGTVRVLDAVPPGFDGAIGLEQLTAARSSANPvRHDRDLFFTDSPIANRWLEVEVArERLPRFGPAGVSSGIQGNLAGFATPPPP----------------------------------------------------------------------------------------------------------------------------
111
+ >SRR5215216_3959313 92 0.600 9.936E-16 20 139 352 6 123 124
112
+ --------------------PPVQLWCRDLVSCLQATFATVLLHHGHDPVEVLGAPWQFRYRPGDVRTEEFYFPERAGDLGAAIAPYHPVSSRW--CTGRSLDELDAQLRAGRLVIAAVDNFHLPFRPAFCDVHAAHLLV--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
113
+ >SRR5947209_19054265 92 0.427 9.936E-16 106 229 352 0 123 137
114
+ ----------------------------------------------------------------------------------------------------------AAIDAGRPPIVAVDNYHLPFRPAYHDVHAAHLVVVRGYDGDTVQVLDPMPPAFDGPLAGSVLATSRAAATVTDPTDPFFAGSSLRRRWLEVYPTGRQPAWSWAWVQQTLRDNLAALAGPARDDS--------------------------------------------------------------------------------------------------------------------------
115
+ >ERR671931_2548281 89 0.317 7.793E-15 136 349 352 2 215 219
116
+ ----------------------------------------------------------------------------------------------------------------------------------------HLLTVYGFDDEarHALVADAVPPRYQGPLALSDFVAARGSSNPiRHDRDLFFTANPIAHRWLEVDVPARMPALDAEFLRFVVESNLRGFGARGADDAYEGIEGAHRFLVDALARLGRGEAVVDEV--FIVSGTALAA-AGVHADYLLLAADRFGSFALLEAARAVERVASHWAALRIALAELRDVSAPRLEGIRRRGEALVRDHRSALARLEHALEA--
117
+ >SRR5205809_3347963 88 0.503 1.880E-14 69 216 352 3 154 156
118
+ ---------------------------------------------------------------------EFYFPCSDDDLGRVLAPHHTLEIvtrRAPDDDARALSMLGERLEAGALPIAAVDNLHLPFRPAFGDLHAAHLLVVFGVdtDADAVWVSDAIPPAFQGPIPSATFLRSWGSANPPDGQDPFFSSSPLERRWYDVRLVD-APADNADWIARAIAD---------------------------------------------------------------------------------------------------------------------------------------
119
+ >MGYP000871191451 88 0.370 2.521E-14 21 186 352 11 179 182
120
+ ---------------------PLPGWYDDLLSCLQTTIGVSVQAHGWDPVQALAAGWRFSLPAAPVEPVEFYHPA-GDQIGERLCLHHPVQLRWHHPASRAEADagIAAATATGSHSIVAVNNYHLPFRPAYHDVHAAHLFVLnrLGTPGGVARVHDPQPPAYRGPLSREVLDIARASLRVGDESDPFFAG---------------------------------------------------------------------------------------------------------------------------------------------------------------------
121
+ >ERR1051325_7439640 87 0.568 3.380E-14 110 239 352 0 128 129
122
+ --------------------------------------------------------------------------------------------------------------AGELPILAVDNYHLPFRPAYHDVHAAHLLVVYGIDAGRgeVFVSDAMPPAFQGAIACDDLLRSWSSPNPRDDQDAFFSDARIDRRGLSVRFEAAAPPLDRDGLERGLRGNAERL---AAEDEWSGLPGLRRY----------------------------------------------------------------------------------------------------------------
123
+ >SRR5438874_1021933 87 0.443 4.531E-14 126 274 352 1 149 150
124
+ ------------------------------------------------------------------------------------------------------------------------------RPAYRDVHAAHLVVVYGveRDRDEVLVSDAVPPAFRGAIPAETFLRAWSSANPRRDEDGFFTDARIDRRCLSVEVDGPFPRLDPDRACEALRANARGFRDRDDGVGWSGLAGIARYLDWLLERARTGGA--RALEELYPLGWAMQAQAYLH-----------------------------------------------------------------------------
125
+ >SRR2546423_1612834 86 0.731 8.139E-14 192 351 352 0 159 163
126
+ ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------RWLRAAMRGPVPDTGLDWLGHVIRENVARYRQGSAGGTQTGVLGLRSYLDELYRAVPGTDAAAELLSELYVISWNIQAQAGLHAEFLRREGVRWRIHELAEAAAGVAAVAHGWTGVRMTGAHSRVWGRHRFDELRRHAVELVRRLDNALDQLELAAEVVS
127
+ >SRR5581483_10036454 86 0.401 1.091E-13 2 131 352 11 141 143
128
+ --CSRSRPSPTRRPAMRVELQPVRYWRHELGHCLHTAAAVLLAHRGLDPVGVLGAAWGFHY-PGDLRREEYYLPGAAGSLFTALAPYHGIRSVWHRpaDAEPGWQQVRAQVIAGNLVAVAADNYHLPFRPAYRD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
129
+ >CryGeyStandDraft_13_1057135.scaffolds.fasta_scaffold438374_1 86 0.343 1.091E-13 183 345 352 9 169 184
130
+ ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------FFAGTEIGGRWIDVEIGEPFPKPTEEWVGTVVAENLRGFVEPPIGDGWAGLAGLRRYLADVGDRADDRRD--PVLGELYTVGWASQSVTALHADFLRSTATALGWPEGLEAARRVEHLASLWTPLRILTAHGSTNGIELGDRLRRRMRALIRHYELAIEEVER------
131
+ >GraSoiStandDraft_48_1057284.scaffolds.fasta_scaffold433089_1 83 0.408 1.128E-12 82 217 352 7 148 149
132
+ ----------------------------------------------------------------------------------DLAPFHPVRAAWVgpSHPDEAWEQMRAALRDGRPLIAAVDNFHMPNRPAYGDVHAAHLVVVSGFDDDAgmVDVVESTPPAYRGPVPRECFLLAIGSRNDARSATqaVFFAGTEIGGRWIDVEIGEPFPKPTEEWVGTVVAEN--------------------------------------------------------------------------------------------------------------------------------------
133
+ >SRR5919197_3773559 81 0.555 2.704E-12 115 220 352 4 111 119
134
+ -------------------------------------------------------------------------------------------------------------------IVAVDNYHLPFRPAYHDVHAAHLVVVYGIDGSRreVSVSDAIPPAFQGAIAADDLLRSWASANPRDDQDAFFSDARIDRRSLSVRFDAPIPPLDLDRLEGALRSNAER-----------------------------------------------------------------------------------------------------------------------------------
135
+ >SRR5688572_26547208 80 0.366 8.655E-12 138 288 352 1 150 151
136
+ ------------------------------------------------------------------------------------------------------------------------------------------LVVYDVDagAGTVGVSDAMPPAYAGPLASDDLLRAWWSANPQDAQDVFFSGEGIGGRWLDVRLGTPFGALTVERLRDAVRANLRELTEPSDPD-TAGLAGLARFAREVAGRARRGEAG--RLAEVYTFGWSMQAQSALHGELLRRCGSAWSLP---------------------------------------------------------------
137
+ >SRR5207248_1268395 79 0.514 1.157E-11 22 156 352 61 200 201
138
+ ----------------------VRQWYRDPISCLQSTLATVLLAAGAEPLPVLGLAWELRFQPGRVGREEFAYPCRfEGDVARSLAPYHPIRSSWWSpaDDDDPLVELARRIEGGELPIAAVDNYHLPFRPAFHDVHAAHLVVVYAVDwqRDEVGVSDAQPP---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
139
+ >SRR5882724_12307322 79 0.377 1.547E-11 121 308 352 1 192 205
140
+ -------------------------------------------------------------------------------------------------------------------------YHLPFRPAFHDVHAAHLVVVPGWRRTThgaveFYVSDAQPPQFQGWLSAEHLMNSWTSGNTSDTQDVFFSSREIGGRVLTTKVRSPPDELTTGQVAQALQGNLDRWatgVAGPSDRVWTGRTGLCRFIERLQE----SCDDPEGLRSAYTFGWAMQAQAFLHGRFAQEFAHRSKQTVLLGVAASADRVVSAWSNVR-------------------------------------------
141
+ >SRR5687768_15959820 78 0.362 4.936E-11 148 296 352 0 146 147
142
+ ----------------------------------------------------------------------------------------------------------------------------------------------------VYLADAMPPVFHGHMPTENFLRAWTSVNPNDEQDAFFSDTAIDRRYLTVELGRRFPKFDPGFLHRVLDANLAGFAVEDDPAGWGGLRGLDRFLDDLVTRSRAGDAAG--VAAAYPFGWGMQAQASLHGELLRGWGARYEVPEVSEAGRL-------------------------------------------------------
143
+ >SRR4051794_15511058 76 0.471 1.177E-10 114 245 352 0 139 143
144
+ ------------------------------------------------------------------------------------------------------------------VIVAVDNYYLPFRPAYQDVHAAHLVVVPAWRRTpgggvEFYVSDAQPPAFQGWLSAEHLVASWTSGNPSDTQDVFFSSREIGGRVLTTKVRQRPEEPTTEQVVRALRGNLDRWDNgvaGPTDRLWTGRSGLRRFVDRLHE----------------------------------------------------------------------------------------------------------
145
+ >SRR3712207_6013657 75 0.379 2.801E-10 141 295 352 0 156 162
146
+ ---------------------------------------------------------------------------------------------------------------------------------------------YGFDEeaDEVYVLDSTPPLHKGPITMQDFLAARNSSNPVSGeRDFFFAGAPIANRWLHLEVQSSFPELTRDWVAEVIATNLRRFEASSDGPTFSGMDGLARYLRGVCER-AAGAEGGRALDEMYTVSWVSQAAAGLHADFLMEAGRRLHWYELAEVGR--------------------------------------------------------
147
+ >SRR5919199_5445869 75 0.276 3.740E-10 90 221 352 4 123 149
148
+ ------------------------------------------------------------------------------------------RWRHEADPGTAWREVRARIAGGRPVALAVDEFHLPFRPGYRHAHARRALVAYGFDDEAelVFVADPTPPRFQGSLALSALAAAREAEDGQP--------------WLELELAGPQRAFGPTVVGEALERNLRRF----------------------------------------------------------------------------------------------------------------------------------
149
+ >SRR3982074_821911 74 0.432 4.992E-10 24 131 352 26 136 138
150
+ ------------------------PWRHDLATCLQSCMATLLDANGVAALDVLGANWSFYHRPAALRRAEYYFPCREGvSLLASLAPYHPVRSQWHEpaDAEQGWSQVRAAVAVGRPVAVAVDNYELPFRPAYHD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
151
+ >SRR5436305_665040 74 0.325 4.992E-10 150 348 352 6 204 208
152
+ ------------------------------------------------------------------------------------------------------------------------------------------------------VSGPVPARSVGCIPRDQLTPARDSGNRsKHERDMFFADQPIGNRWLEAEIdAEHYPAFDRGSIRYAIRRNIDGFTEPGPlgAHRYAGLAGLRAFLDD---SAAKLAAGGQVADEVFVAAGAVLAATALHADWLARAARMLGTIELAEAARQVARVAHHWTAVRIMAALSRDGQVTAPRLARRAANLLADQ-ERALTALGDVLD---
153
+ >SRR5437870_817573 74 0.401 6.662E-10 53 175 352 1 127 131
154
+ -----------------------------------------------------GASWEFFYAAEDVRSEEFYHPAPRPTLGASMMPFHPVSTAWHESTDgeAALAEIEAVIAGGRPVIAAVDNFYMPIRPAFGDVHAAHLVVVTGFDEGTgeVVVLEETPPLYHGPIAVSDFLSARGSGN--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
155
+ >9576|scaffold_589529_c1_1|-3|11 74 0.299 6.662E-10 102 225 352 3 129 140
156
+ ------------------------------------------------------------------------------------------------------DDAFDCVAQGHAVVVAVDSFELPYRPAWRRVNSGRSLIVTAIDRTTGVaqVIDGWMPHYTGSVALADLARARASSVPQDlRREPLYAGTPLRRRWWRIALASEIPHGGRDNVADALAQLTAQATEAP------------------------------------------------------------------------------------------------------------------------------
157
+ >SRR5919199_1533857 74 0.376 6.662E-10 196 341 352 9 148 191
158
+ ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------VGGGAPTPPLDSERLEAALAENLDGFAGT----GWSGLAGLARYVDALET--RALAGERLPLEELYAFGWGMQAQTYLHGELLRETGAAWSVAELAEAGRAVQEVASAWTGLRMTGAHALGAPAAAAADLRRHGNRLRRRYEAALE----------
159
+ >SRR5581483_6019569 73 0.452 1.582E-09 25 127 352 2 107 108
160
+ -------------------------WRHELSNCLHSCIGVALTRHGFDALVVLGSRWQFYYRPGHLRREEYFWPCPPGvSLAETLMPYHPVSSRWHqpEDAGEGWQQVRDRILAGEPALVAVDNFWLPFRP--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
161
+ >SRR4029453_12411764 73 0.453 1.582E-09 20 136 352 87 205 207
162
+ --------------------PEPELWSRALISCLQATFGSLLVRVGADPLVVLGAAWRFLHLPGDVRFDEFYYPCHDGDLGAALAPHHELRSRWWQPADEDdvWREVRESLADDRLVIAAVDNFPLPFRPAYGDVHAAH-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
163
+ >SRR5688572_10121371 72 0.428 2.110E-09 126 239 352 1 126 131
164
+ ------------------------------------------------------------------------------------------------------------------------------RPAYHDVHAAHLIVVGAIDPAAGTadVSDAMPPAYAGPLALDDLLRAWRSTNPQDGQDVFFSGADIGGRWLDVRFDLPFPDLTADRLADMLRANVRGFVAASGADTapdgtaaetcWSGLAGLRRF----------------------------------------------------------------------------------------------------------------
165
+ >SRR5690348_7358098 72 0.400 2.815E-09 25 123 352 41 140 152
166
+ -------------------------WRHELTGCLHVAMGVLLKHRGHDPLEVLGASWGFYHDPADLRREEYYFPARRGSLLADLAPYHPVSSRWHaATGAEAWSRVRDTIAAGQPVAVAADNYYL------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
167
+ >SRR5258708_3518620 72 0.299 3.753E-09 52 194 352 71 215 216
168
+ ----------------------------------------------------LGARWGFAPPRQEAWGElcEYSLPLGSNPYAATLAARTGLQIAQHSGIAAA--DLSGHLASGAPAIVAVDSFYLPYRPAFGRVHSQRTILVRqGQGRTDLRVEDPWPPTYHGSLGAAHLERARRSTVPLDRRaEPVFAGRPIDFEWW-------------------------------------------------------------------------------------------------------------------------------------------------------------
169
+ >SRR5437870_3049837 71 0.420 6.672E-09 20 124 352 36 142 144
170
+ --------------------PEPELWYRDLISCLQATFGSLLLRVGADPLAVLGAGWRFLHLPGDVRSDEFYYPCHDADLGAALAPHHDLRSRWWQPADEDdvWREVRESLGDDRPVIAAVDNFHLP-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
171
+ >DeetaT_8_FD_contig_31_513410_length_211_multi_4_in_0_out_0_1 71 0.352 6.672E-09 115 239 352 28 180 181
172
+ -------------------------------------------------------------------------------------------------------------------VIQADVFHLPFRPAYHDVHAAHTVIVHGFDDEagTVSVLDSMPPAFDGTVSQEdlanargsgnpseeadpffggtisqrDLANARGSGNPFEEADAFFGGTPIAGRWLRFEVVGEMPSLDREWVGRVVTGNLERFAAATDGPALSGLAGVDRF----------------------------------------------------------------------------------------------------------------
173
+ >SRR5438105_1867171 70 0.453 1.185E-08 210 350 352 1 140 141
174
+ ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------LRHVLRENVRALRAPVTGSEWFGLAGLERFVDRLLTR-AAGPDRRTALEEQYVLSWGLQAQADLHAAFLLAAGGRWGLPCLSEAGCLVDRVAHAWTGLRMTGAHGRLDRPEFGLELRWHAGCLVRAHEEAVDAIELALDEL-
175
+ >ERR1700687_5501718 70 0.429 1.580E-08 40 156 352 8 127 128
176
+ ----------------------------------------VLQRHGWEPARALGAGWRFVAAKNPVEPVEFYHPA-GEMLADHLCLHHPVLLRWHQPAHDAAAhcDIGESLAHGTAPIVAVNNFHLPFRPAYHDVHAAHLVVVTGYDEhhDNYQILDLMPP---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
177
+ >SRR5262249_51622373 70 0.418 1.580E-08 20 115 352 51 148 149
178
+ --------------------LPVEGWYHDLCSCLQVDAACVLSAFGQEPLAVLGAGWGFQFRPGDWEPVEFYCPAPDGDLARALAPHHRLCCRWHHPDGPeaALAALVEALRAGLPAI--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
179
+ >12671|scaffold1085376_1|-94|01 69 0.310 2.105E-08 169 350 352 0 184 190
180
+ -------------------------------------------------------------------------------------------------------------------------------------------------------------------------AARGSDNPvRHGRDLFFTDNPIGNRWLTIETADVMPAFDQDFVAAAVGRNLADFgdtsTSGASTVSYAGAEGQRRFLGDIADRLAAGDEG--AVDEAFVVAGPVLAQTALHASWLSRAARDFAEPGLAEAARRVERVAHHWSAIRIVVASSRAAPAAAAGPLRRRATALADDHWRALDDIARIIGAV-
181
+ >SRR4051794_8300831 69 0.350 2.804E-08 4 116 352 6 119 120
182
+ ----HCRQEYGERAPMNVALSPMTFWYHDLCSCLHNTIATVLHYHHQEPTQTLGAVWDFYYAPAQFHKEEYFFPSRHPTLAENLLPYHPISADWRdSSAADSWPAVREAVARGTPAIV-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
183
+ >SRR5277367_2659287 68 0.500 6.627E-08 21 123 352 29 136 137
184
+ ---------------------PVQHWCRDLVSCLLATLATVLLRAGHAPLEVLGAHWEFRYRPGDVRTEEFYYPcAVPGDLAASLAPHHPLSSSWREPgagaGTEALAEISDELAAGRLVIAAVDNFYL------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
185
+ >SRR5579872_5282090 67 0.350 1.175E-07 136 246 352 1 114 115
186
+ ----------------------------------------------------------------------------------------------------------------------------------------HIILVYGVDQHRglVWLCDPTPPGFNGPLPIGDFQQARNSANPADVQDAFFSASEIRGRYLRIVADPGLAPLSPLELGAALKANLAAFGADSPADgPWTGNIGLRRYLDSVTSH---------------------------------------------------------------------------------------------------------
187
+ >SRR5205809_7151332 66 0.289 2.082E-07 104 209 352 0 106 107
188
+ --------------------------------------------------------------------------------------------------------LREYLHAGRTAIVAVDSYYLPYRPAFGHVHSSRTILVRRGDSQAAEVEDVWSPGYSGPLGWDHLERARYSCMPYAAlTEPVFSGCPLSGEWFAVDLEPVMLPDPTVW----------------------------------------------------------------------------------------------------------------------------------------------
189
+ >SRR5919204_5113640 66 0.452 2.082E-07 21 124 352 194 299 301
190
+ ---------------------PVRLWCRDTISCLHATLATVADHHGWSPLSAVGPLWEFRYFPGDDRGEEFYYPLCGRPPGEALMPYHPvsIEWRAAKSARQGLAELRVALAEDVLPIVAVDNYHLP-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
191
+ >SRR5690242_5973043 66 0.305 2.771E-07 69 200 352 4 135 139
192
+ ---------------------------------------------------------------------EYSLPGGVLSFSERIASRTGVKIVQQDGAHSS--SLYSFLATGQTAIAVVDSFHLPYRPAFGRVHSHRTILVRqGLDPTDVLVEDEWPPAYHGPVPVRSLEAARYSAVPLDPvREPVFAGGKIRGEWFHLEMDG-------------------------------------------------------------------------------------------------------------------------------------------------------
193
+ >SRR5882724_5907471 66 0.394 2.771E-07 20 121 352 93 196 197
194
+ --------------------PEPELWCRDLISCLQATFGTLLARLGADPLAVLGAGWRFLHLPGDVRFDEFYYPCPDADLGAALAPHHELRARWWQPADEDdvWREVRESLVDDRPVIAAVDNF--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
195
+ >SRR5919198_2188180 65 0.556 3.688E-07 115 193 352 5 83 99
196
+ -------------------------------------------------------------------------------------------------------------------IVAVDNYHLPFRPAFRDVHAGHLVVLCAVEDDAVVVSDALSPAFQGRVALADFARSWESANGARGESGFFSGSPIGGAL--------------------------------------------------------------------------------------------------------------------------------------------------------------
197
+ >SRR4051794_17814528 65 0.418 3.688E-07 103 226 352 0 128 136
198
+ -------------------------------------------------------------------------------------------------------ELIELIEAHASVIVAVDNYHLPFRPAYHDVHAAHLIVVpawrRTVDGDvEFYVSDAQPPRFQGWLSEEHVMNACTSGNPADHQDVFFSSQAIGGRVLTVQVRSQPAELTVDRFTEAIAGNVQRWITSEG-----------------------------------------------------------------------------------------------------------------------------
199
+ >ERR1700744_2712043 65 0.342 4.908E-07 21 124 352 5 108 109
200
+ ---------------------PVP-WYRPAINCLAAGVGTVLATAGHDPLDILGSGWDFTHVPGQLPFEEFYWSVTEAsDLGSRLAPHHRVTVEWSDDDAGDLSGLMSELEQDRIPVAAVDKYFLP-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
201
+ >ERR1051325_11437074 64 0.327 1.156E-06 62 179 352 4 120 127
202
+ --------------------------------------------------------------PLRSQLVEYSLPFGPSSFIEALLFRTGLRICSHTGPDP--NNLRRYLAEGGEAIIAVDSFLLPYRPAFGRVHSHRTIIVRkGQNGDEVWVEDAWPPSYEGSLPLAVIENAQHSQVSLDP----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
203
+ >SRR6266404_4313130 63 0.313 1.537E-06 100 197 352 2 100 112
204
+ ----------------------------------------------------------------------------------------------------SESELRQHLALSEPAIVAVDSFYLPYRPAFGKVHSHRTLLVVSSREDLVWVEDDWPPCYQGPLQWSCLQRARYGDVPLQPRFePVFAGGTLSGIWFSVR----------------------------------------------------------------------------------------------------------------------------------------------------------
205
+ >MGYP000377277604 63 0.341 2.044E-06 100 175 352 8 89 135
206
+ ----------------------------------------------------------------------------------------------------SVDDLRAMLDAGKPVIIGLDSDDLygaGDAPFTDDVVAGHAVVITGIDDETglVYINDPGFPDGAGVaIPLETFEDAWQDAD--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
207
+ >SRR5690242_18701050 63 0.616 2.044E-06 1 105 352 51 157 158
208
+ -GDRHRRAPRGRRAGMIETAPQVQLRPRDLRRCLHGSLATALLHDGHDPVTVLGAPWEFRRRAGAWTTEEYYYLPEPDSLAKRLAPYHPFESTWHrgDGPGDPLDELR------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
209
+ >DeetaT_20_FD_contig_41_1501015_length_280_multi_3_in_0_out_0_1 63 0.335 2.044E-06 188 348 352 0 159 163
210
+ --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------PIGNRWLEAEIdAEHYPAFDRGSIRYAIRRNIDGFTEPGPlgAHSYAGLAGLRAFLDD---SAAKLAAGGQVADEVFVAAGAVLAATALHADWLARAARVLGTIELAEAARQVERVAHHWTAVRIMAALSRDGQVTAPRLARRAANLLADQ-ERALTALGDVLD---
211
+ >SRR5256714_1899555 63 0.418 2.044E-06 232 348 352 2 116 218
212
+ ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GPAALRRYVDDLE--ARALAGERRPLEELYAFGWGMQAQTYLHGELLRETGAGWSVAELAEAGRGVQEVASAWTGLRMTGAHGLGAPAVAAADLRPHGNPLRRRHQEALQRVGRAGG---
213
+ >SRR4051794_37091874 63 0.333 2.718E-06 189 323 352 1 133 134
214
+ ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------IDRRCLLMHVQSVQPPLQRDAVGAALRSNLARFARSDAPEGWNGRAGIDRFLEQLVERARAGD--RQALSDAYRFGWSMQAQSYLHGELLRTKGREWQVAALSEAGRVVESVASAWSGLRVTAAHGRNAPAGVAA----------------------------
215
+ >SRR5215218_1112994 62 0.319 3.614E-06 204 344 352 0 139 145
216
+ ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------PLTREWLTEVIATNLRRFREPDAGAGWAGMAGLTHYLQSICER-SLGPDAGDALQELYTVGWTAQGSTALHADFLMLAGRQLGWDRLVEVGRHVDRLANEWTALRMFGAHGFTRPAEIAERLRRRTIIFLSNYEQTLQLLE-------
217
+ >SRR5215216_6411204 61 0.281 8.489E-06 101 171 352 27 93 106
218
+ -----------------------------------------------------------------------------------------------------LEQLIAHLETGLPAIAFVNTAHLSHW----NHETGHAVVVIGMDEQSVFIHDPAIDEPAKAIPIPEFEAAW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
219
+ >MGYP000919097685 60 0.303 1.499E-05 100 171 352 5 83 138
220
+ ----------------------------------------------------------------------------------------------------TVDDLRAALDAGEPVIVGIDSadvYSGGGGPFDPGMESGHAVVVTGIDngpPGVLYINDPGFPdGAGVEIPLELFEDAW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
221
+ >SRR5205814_1734864 60 0.336 1.992E-05 136 243 352 0 112 114
222
+ ----------------------------------------------------------------------------------------------------------------------------------------HLVTVVGFDDERgeAFVIDPVPPRFSGAIPVEVLTAARSSGNPiVGERDLFFTGNPIGNRWLDIEVEADVPAHDLDFVREVVACNRDAFEATSESGSPvrTGLSGERRFLDSI------------------------------------------------------------------------------------------------------------
223
+ >ERR1700722_17685023 59 0.402 2.647E-05 25 118 352 15 111 112
224
+ -------------------------WCRDLISCLQSTFAAVLDRAGEDPLAVLGAGWQFRQIPGDVRPEEFYYPCSgEGDLGAALAPRHALHSRWWQpaDPDDPWQEIRAELDEDRLVIAAV-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
225
+ >SRR5690348_3972261 59 0.380 2.647E-05 19 115 352 22 121 122
226
+ -------------------APPLPRWYRDYVSCLQSTMATLLLAAGEDPVDTLGLGWEFLHIEGDVRAEEFYWPCRkPGDVAGSLLPHHAATSRWLQaDADDPLAPLEAALAEGRlPVL--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
227
+ >MGYP000822340748 59 0.274 2.647E-05 113 200 352 49 137 210
228
+ -----------------------------------------------------------------------------------------------------------------PIIVGADTYFLPYSANHKKRHAKHTLILCGFDltRNVVYVIDWYsPWFYKGEIDIEIFLQARNSKNEKD--DSFYSGTPIKNNWAYIEKIP-------------------------------------------------------------------------------------------------------------------------------------------------------
229
+ >SRR5215831_14536786 59 0.487 3.516E-05 230 350 352 10 129 138
230
+ --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------WSGLRGVERYVDHVLSKAP-GPDGLAVLEELYVLSWGLQAQADLHAEFLRRHGVAQDLPALREAAAVVDRVAHAWTGFRMTGAHGRLDRPDFGGELAWHGSVLVRAHHEAADALELALEEL-
231
+ >OM-RGC.v1.007693089 59 0.259 4.671E-05 101 171 352 0 76 148
232
+ -----------------------------------------------------------------------------------------------------LDELRTLLDADTPVIIGLDAddlYGTGDSPFADDLVSGHAVVITGIDDEAglVYINDPGFPDGAGVaISISEFEDAW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
233
+ >SRR6185436_5620841 58 0.392 6.204E-05 136 244 352 2 113 117
234
+ ----------------------------------------------------------------------------------------------------------------------------------------HLVVTYGFDEeaDEVYVLDSTPPVFQGAIAVRDFLAARGSTVPREGeQDFFFSGTPIANRWLHLEVGADFPELTREWVSEVVAAHVRGFRATRETASGAALSGLAGLGRYLR-----------------------------------------------------------------------------------------------------------
235
+ >SRR5262245_38567583 58 0.340 8.239E-05 158 290 352 1 132 133
236
+ --------------------------------------------------------------------------------------------------------------------------------------------------------------FQGPIPLAALTAARDSANPvRHERDLFFTANPLANRWLTIHLHGPQPVLDLDLVRRALLGNATGLLDGAADgDRLRGLAGQRWF---LQSLLADLAAGAHRVDEVFIVAGALLATTGLHADFLAAAASRLDRPEL-------------------------------------------------------------
237
+ >3300027819.a:Ga0209514_10000001_1266 58 0.188 8.239E-05 94 171 352 222 311 431
238
+ ----------------------------------------------------------------------------------------------FSSVSEAIRQLEIVINSGRPIMVHLDSYYVKeefaktsefWKNNVGDSHSSHFMVVTGYNESHVYINDPTEPNlsiKNVEVPIEIFKEAW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
239
+ >SRR5688500_5242141 57 0.301 1.094E-04 100 181 352 29 110 113
240
+ ----------------------------------------------------------------------------------------------------SLGKLRQALTSGPVLVGPLDMSQLTYMPNHERlVGADHFVVLYGIDHEHVFLHDPAGFAY-VALPLNDFRRAWRAENIEYRPD--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
241
+ >SRR4051794_22847671 57 0.238 1.453E-04 100 175 352 10 96 106
242
+ ----------------------------------------------------------------------------------------------------SLDGIKSEIAQGRPVIVLVDNSRyirseggkqVPY-PSGQGFEAPHIVVVTGYDADNVYLNDPlaiTVSGKDFRVPTASFQAAAGAKG--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
243
+ >SRR5512142_3069760 56 0.355 3.400E-04 180 286 352 2 106 108
244
+ ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------RDYFFAGSQIGCRWIDVKLGDPFPKPTTAWVGEVIAENLRGFVEPSVPDRRIGLAGLRWYLTDLSERVDSED--GNVLGELYTVGWAAQSVTALHADFLRQTASAFG-----------------------------------------------------------------
245
+ >SRR5215510_68064 55 0.326 4.513E-04 148 245 352 4 107 115
246
+ ----------------------------------------------------------------------------------------------------------------------------------------------------IYVCDSMPPRFRGPIPITDFINAWSSINQRTENDEVFSGAGIDRRWLSVELGAAFPAVDAAFLCRTLAANREQWHRTTPPqsaserDSWTGAQGLWQFLDALLE----------------------------------------------------------------------------------------------------------
247
+ >SRR5215472_923646 55 0.569 4.513E-04 21 99 352 76 154 157
248
+ ---------------------PVQLWTRDLGSCLQAVFATLLLRERQDPVTVLGAAWDFRYEPGDWRSEEFYYPCAGRPLAEALAPLHPLTSTWHRAPTD------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
249
+ >SRR5262249_44885629 55 0.361 5.989E-04 26 116 352 65 158 160
250
+ --------------------------RHDLAGCLPACAASLLGARGLDPLETLGASWSSLHARGEVRRVEYSSPSPPGaSLLGALAPYHPTRSRWHEpaDAEQGWAQVRAAVAAGTPVAV-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
251
+ >MGYP000859423510 55 0.312 7.949E-04 95 171 352 82 161 324
252
+ -----------------------------------------------------------------------------------------------PSGDNGLYDVLQIVNDGVPCIVNVDGYFLSYHPLFKTEHENHAAVLYGYNMNlhTIYISDYMPPYfFNGEISISEFLEAR------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
253
+ >SRR5580693_9259363 53 0.469 1.857E-03 21 100 352 23 103 111
254
+ ---------------------PIQHWCRDLVSCLQATFATVLAHAGHDPLDALGAHWEFRYRPGDVRTEEFYYPcAVPGDLAASLAPHHPLSSSWRQPAEGT-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
255
+ >SRR5690349_8663926 53 0.305 1.857E-03 92 173 352 44 126 150
256
+ --------------------------------------------------------------------------------------------TSFPDFRQASDYLKELLARRKLVFVWGDEYYLPYRkEAFHAIHSTHSFVVTDYDGENkaYYVEDWD--GLYGYLPAAHVEAAFDS----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
257
+ >SRR6266851_2378427 53 0.386 2.464E-03 127 224 352 0 100 101
258
+ -------------------------------------------------------------------------------------------------------------------------------PAFGDVHTNHLLSVFGLDEERgcAYVVDDIPPAFRGWIPIAMLDAARGSANrAEHGRDRFFTNEAIRWRWLEIRCSAQLPRLSDEAIDAVLGQNLAGFRQP-------------------------------------------------------------------------------------------------------------------------------
259
+ >SaaInlV_150m_DNA_4_1039716.scaffolds.fasta_scaffold159517_1 53 0.262 2.464E-03 85 175 352 0 99 177
260
+ -------------------------------------------------------------------------------------EHYGVD--THQGAGAGIESLMSELVQGHAVIVAVDADDM-WNPGslfrglFGEDGADHAVVVTGLdlsdpDHPQVYINDPgDPNGAGKAYPLEQFLAAWSDSG--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
261
+ >SRR5919198_383436 53 0.382 2.464E-03 22 113 352 254 347 351
262
+ ----------------------VEPWYDDLASCLHSTLASVVAQRQLDPLVVLGSGWTFAYSPGEWAPAEFFYPALDGSLAATLAPHqpLSIAWREPGSPEEAEQELVAALAGGGP----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
263
+ >SRR5436853_446621 53 0.283 3.268E-03 136 238 352 0 97 123
264
+ ----------------------------------------------------------------------------------------------------------------------------------------HLLTVYGFDDDagTALLADPVPPSFAGPVPRSLLAAARDSDNPvLHERDMFFTANPIANRWLDVRVGPVQPAFGPAFVRDAVAAAR--------PDPPSAAASLRR-----------------------------------------------------------------------------------------------------------------
265
+ >SRR5215204_5524451 52 0.283 4.335E-03 103 162 352 1 59 117
266
+ -------------------------------------------------------------------------------------------------------HLVDKLDKGMPVCLPVDIYFLPFTSHFQRLHMAHYVNVFGYNDTQYYIISPYY-RYQGWV---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
267
+ >SRR3954452_13515641 52 0.349 4.335E-03 219 341 352 0 119 120
268
+ ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AGMLAISTETAWTGMNGLDRYLTRLT--VRAEEKDADALADVYPFGWAMQAQAYMHAELLRACAVRWRRPVLGEAARRLAAVSHAWTGVPISAGHGGAD-TRVAETVERHCRRLRRCHEQAVE----------
269
+ >SRR5271163_123213 52 0.265 5.750E-03 100 175 352 2 79 120
270
+ ----------------------------------------------------------------------------------------------------SVDDLQQYLDQGRSVIVSVDPDPI-WYPGQPDQGEGHAVMITAIDETTgmVTLDDTgSPQGNEEQVPISEFQQAWAEHD--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
271
+ >GraSoiStandDraft_4_1057263.scaffolds.fasta_scaffold8820481_1 52 0.270 5.750E-03 99 171 352 122 191 210
272
+ ---------------------------------------------------------------------------------------------------PTLKTIRDFLRKGYIAIVNVNYYPLYRQPGY----SGHFVIVLSIDKRSVRLHDPGLPRKdNMRIPLRQFLAAW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
273
+ >SRR3990172_4137688 52 0.270 5.750E-03 99 171 352 128 197 216
274
+ ---------------------------------------------------------------------------------------------------PTLKTIRDFLRKGYIAIVNVNYYPLYRQPGY----SGHFVIVLSIDKRSVRLHDPGLPRKdNMRIPLRQFLAAW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
275
+ >SRR5215467_2164624 51 0.316 7.626E-03 152 289 352 0 137 138
276
+ --------------------------------------------------------------------------------------------------------------------------------------------------------DASPPSFKGSIPIHALQAARGSENPAiHNRDLFFTNVPIANRWLEVEIGNTFPCITKQWVKEVLSTNTQRFAAPTSEVALSGTEGMVRYFGDF-GLDQVRERGTPAMDELVLVSVAFQEATALHADFLRAMGQRLDWRE--------------------------------------------------------------
277
+ >MGYP000895684420 51 0.301 7.626E-03 100 171 352 80 151 152
278
+ ----------------------------------------------------------------------------------------------------SLARLRGLVARGPVVVGPLDMSKLTYIPGHEHlVGADHFVLVHDVTDEEVFMHDPGGSPY-VSLPIADFLAAW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
279
+ >SRR5919197_1036140 51 0.415 7.626E-03 22 119 352 166 266 267
280
+ ----------------------VRQWYRDPISCLQSTLATVLLAAGAEPLPVLGLAWEFRFQRGRVGREEFAYPCRfEGDVARSLAPYHPLRSSWWSPalDEDPLGELARRVEDGELPVAAVD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
281
+ >2286|Ga0268252_1295275_1|+2|10 51 0.373 1.011E-02 274 348 352 2 76 87
282
+ ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------HGELLRWCGRRWIDAALAEAGRRVERVAHLWTGVRVVCAHLRDDPAPHGRTLARHTNRLQHAYEDAVEAVARAAG---
283
+ >SRR5947208_11376747 51 0.406 1.011E-02 226 343 352 2 118 122
284
+ ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------PARGRAGLAGLACYLDGICER-AAGPHGRAALDEIYTVGWQMQAATALHADFLRLAGRSLDWDELAEAGRQVDRLANAWTPLRIMGAHAGTRDLDVTGELRRRATRLVADHELTLELL--------
285
+ >ERR671937_651557 51 0.397 1.011E-02 22 111 352 66 158 160
286
+ ----------------------VRQWYRDPVSCLQSTLATVVLAAGAEPLPVLGLGWEFLFEPGDVRPEEFYYPCRfEGDLARSLAPYHPIRSRWWspEPDEDPLAELARRVEAG------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
287
+ >SRR5215472_1733542 51 0.384 1.341E-02 156 269 352 0 114 118
288
+ ------------------------------------------------------------------------------------------------------------------------------------------------------------PGFDGPMASRDFLLAWGSANPADTRDAFFSDSGIGRRCLITRVTGPgRGPVGRDGLAAALRANLNDLTAPlaTSQLPWTGLAGLHRYLDDLRQRAAAGD--QEVLADAYAFGWPMQA----------------------------------------------------------------------------------
289
+ >SRR4051812_10767532 51 0.393 1.341E-02 20 106 352 55 143 145
290
+ --------------------PEPELWYRDLISCLQATFGSLLVRDGADPLVVLGAGWRFLHLPGDVRFDEFYYPCRDGDLGAALAPHHDLHSRWWQPGDEGdvWREIRE-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
291
+ >MGYP000929587384 50 0.285 1.778E-02 115 172 352 1 63 84
292
+ -------------------------------------------------------------------------------------------------------------------IVPVDTALLPYWLTRSDvpeteRGTDHAVVVVAVDEQHVYVNDPDFAQAPQVVELGWFLDAWR-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
293
+ >SRR5713101_4123152 50 0.337 1.778E-02 52 140 352 9 92 103
294
+ ----------------------------------------------------LGARWSFGFPRDDRELQEYTL---PDYLAAKLRTRTGIVIRSHSDPAGL--ELRRHLADEQPAAIAVDSFYLPYRPAFQRVHSSRTILV-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
295
+ >SRR5206468_3369128 50 0.358 1.778E-02 224 343 352 7 123 124
296
+ --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GEPAPAYRGLAGLRGF---LTAVADALAEGRPASDEGFVVAGPILAITGLHADWLARAARMFDDPRLLEAARVVERVAHHWTAFRIAVATARADPPGAAESLRARGRALVDDHERALALL--------
297
+ >SRR5437879_4950898 50 0.301 2.358E-02 141 239 352 1 106 113
298
+ ---------------------------------------------------------------------------------------------------------------------------------------------YGVDDGHklVWVLDAIPPAFQGPISRATFDAGRDSSNRsRQERDMFFADNPIANRWLELDIEPDRNADgalgDRDRLRQILEGNLRGFTQERKSGPFEGVRGVDRF----------------------------------------------------------------------------------------------------------------
299
+ >GraSoiStandDraft_27_1057306.scaffolds.fasta_scaffold00573_2 50 0.252 2.358E-02 101 171 352 0 77 154
300
+ -----------------------------------------------------------------------------------------------------LDDLKKFIDNGQPPIALVKYANLPDRVDKGST-GGQYVVVVGYDDaaQNIFINDPDMfpwnhaAGFQKAYPYQTWLSAW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
301
+ >A0A2H2XNW3 50 0.271 2.358E-02 101 180 352 125 201 208
302
+ -----------------------------------------------------------------------------------------------------IEDIRQLLACGYLIICLVNWCKLNAHIGYE----GHYVLVYDISETHIYLHDPGLPTYiSQALTITEFEQAWAAPTQRDRN---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
303
+ >SRR3954466_6934608 49 0.329 3.126E-02 88 175 352 0 87 132
304
+ ----------------------------------------------------------------------------------------GIPVSIENSD---VSGLIDALDSGKGIIVAVDSGEYWTGEATEDNAPDHAVVVAAIDEENgiVYLSDTGtPDGNMLAVPLDAFLDAWGDSG--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
305
+ >ERR1700752_922602 49 0.304 3.126E-02 100 175 352 3 84 182
306
+ ----------------------------------------------------------------------------------------------------TLTDLETYLDEGRSIILGVDSsdiWNEPEDPAEPVDQADHALVITAIDKEQGLVvlSDPgDPDGNQSIVPLSDFTEAWSDSG--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
307
+ >SRR2546423_3336499 49 0.373 3.126E-02 24 111 352 176 266 268
308
+ ------------------------QWYRDPISCLQSTLATVLLAAGVEPLPVLGLAWEFLFRPGRIGREEFAYPCRfEGDVARSLAPYHPLRSHWWSPAPEedALAELARRVERG------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
309
+ >SRR5947209_18303409 49 0.482 4.144E-02 20 75 352 68 123 125
310
+ --------------------PGVEFWRHDLSSCLQDCLATLLVQRRHDPVPALGAAWDFYYPPGDLRPEEYYYPCR------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
311
+ >SRR6185503_18933790 49 0.450 4.144E-02 20 99 352 52 131 136
312
+ --------------------PEPELWYRDLISCLQATFGSLLLRAGADPLTVLGAGWRFLHVPGDVRSEEFYYPAEDADLGAALAPHHELHSRWWQPADE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
313
+ >ERR1700687_2983525 49 0.379 5.494E-02 232 318 352 22 106 109
314
+ ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GLAALADLLADLASRLEREGAWP--LRDIYVLGWSAQAEASLHSRFLAGAARDLQRPDVAEAARWVDAVAHAWTGFRVAAAHGAAAP---------------------------------
315
+ >SRR5579862_9683268 49 0.357 5.494E-02 213 337 352 4 127 133
316
+ ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ALKANLAAFSADSPADaPWTGNIGLRRYLDHVISHAEA--GHADAVRQVYPFAWSAQASAAMHGELLRTRGGQWRIPELTEAGRAVEAVASAWTPVRVLAAHGWQSPASIASSLRSYGIDLARQYR--------------
317
+ >SRR5215471_6261074 49 0.308 5.494E-02 197 327 352 0 128 147
318
+ -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------EVTGPARALDRVFLRRVLADNGRRFRSGSGAGPeLTGLAGLERF----CEWaAAALAAAQPVVDEVFVVAGVVLADTALHADYLAHAAARLGAARLLEAARRVERLAHHWTAVRIAVATARGAEPTAAPRLGR------------------------
319
+ >SRR5213076_2182558 48 0.551 7.282E-02 115 161 352 0 48 116
320
+ -------------------------------------------------------------------------------------------------------------------IVAVDNYHLPFRPAYGDVHTNHLVVVYGFDDEagEVHVLDSKPPRYRGP----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
321
+ >SRR5207237_9007299 48 0.259 7.282E-02 100 171 352 27 106 120
322
+ ----------------------------------------------------------------------------------------------------GVDRLKAEVAAGNPVVVWITAGkYVQRTPvvasyngeTFKLVAGEHAVVVYGYDSGGVYIMDVSNGSFTYT-EWSSFLTRW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
323
+ >4958|scaffold10375_5|+5204|00 48 0.285 7.282E-02 95 175 352 87 169 196
324
+ -----------------------------------------------------------------------------------------------SSPDKAWEANKTAIKT-KPIIVLTDIYHLKFRNEYLKQHGAHFIILFYYDElsNSVGVLDWYePHFYKGEILLNDFLSARYSEN--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
325
+ >EndMetStandDraft_8_1072994.scaffolds.fasta_scaffold5616288_1 48 0.285 7.282E-02 95 175 352 87 169 196
326
+ -----------------------------------------------------------------------------------------------SSPDKAWEANKTAIKT-KPIIVLTDIYHLKFRNEYLKQHGAHFIILFYYDElsNSVGVLDWYePHFYKGEILLNDFLSARYSEN--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
327
+ >SRR5262249_50530649 48 0.330 7.282E-02 1 105 352 93 204 205
328
+ -GGGRGMTSDVTARTYMIIGPEPELWYRDLVSCLRATFGSLLARACADPLTVLGAGWRFLHLPGDVRSEEFYYPCPADesggaDLGAALAPHHGLHSRWWQpaDEDDLWREVR------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
329
+ >SRR5437667_7529899 48 0.381 9.652E-02 235 344 352 0 108 114
330
+ -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GLRDYLAGVAARARDARD-PGVMDELYVAGWAAQQSAGLHADFLAAAGCRLGRTRLAEAGREVDRLAHHWSDLRMLGAHCRAEPAAAASRIRRRAAQLVTDTERVLAQLE-------
331
+ >SRR2546430_2482901 48 0.346 9.652E-02 146 269 352 4 127 134
332
+ --------------------------------------------------------------------------------------------------------------------------------------------------GKVRVLDEVPPRFDGEIELAELAAARHSVNPiVHGRDKFFTANPIAGRWLSVRATASPPytVDDRERISALLRANIERFDQ-SQADVYSGLSGQARFLAEGARRLDTDPAAKDEL--FVVAGTALAA----------------------------------------------------------------------------------
333
+ >SRR4030042_1077736 48 0.232 9.652E-02 117 172 352 7 59 215
334
+ ---------------------------------------------------------------------------------------------------------------------WRDRTDVPWTETWDD---GHYMVLLGMDAANLYFEDPSLLGARGVIPRAEFVDRWH-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
335
+ >MGYP000240722769 48 0.333 1.279E-01 113 145 352 42 74 91
336
+ -----------------------------------------------------------------------------------------------------------------PIIVLVDVYYLPYRKEYHKYHASHAVLLVGYDE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
337
+ >SRR5205814_4332124 48 0.359 1.279E-01 194 296 352 0 101 104
338
+ --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------LQVSIGEDFPAPAPGWVDGVIRQNVERFRAGGDGGSFSGLAGLRDYLAGVAARARDARD-PGVMDELYVAGWAAQQSAGLHADFLAAAGCRLGRTRLAEAGRE-------------------------------------------------------
339
+ >SRR6266542_2363188 47 0.509 1.695E-01 21 73 352 49 101 105
340
+ ---------------------PVRLWCRDLVSCLQATFATVLLHAGRDPLAALGAAWGFYYLPGDVTSEEFYYP--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
341
+ >SRR5688572_6867596 47 0.298 1.695E-01 49 150 352 41 139 141
342
+ -------------------------------------------------PELLGARWTFAlpHDERRAELTEYSLPFHPLGFVAAIDRRTGLRLQEGR-----AEELDGTVAGGTPVIAAVDSFHLPYRPAFGRVHSHRTVIVRRVLRDAVEV---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
343
+ >SRR3989344_2091767 47 0.222 2.247E-01 125 175 352 7 60 146
344
+ -----------------------------------------------------------------------------------------------------------------------------WVKSWEKVHSSHFMTVTGYDDDYVYINDPtdhDLTIKNMKVPNNNFLSAWSNGN--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
345
+ >SRR5216683_1865760 47 0.315 2.247E-01 101 172 352 126 194 216
346
+ -----------------------------------------------------------------------------------------------------LDDLKRELSLGSLAIVNVNYFRL----LEKVGYAGHFVVVEAVDEEEVLLQNPGLPPIPnQRVAMATFLAAWH-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
347
+ >SRR3569832_2248283 46 0.377 2.977E-01 134 178 352 21 65 133
348
+ --------------------------------------------------------------------------------------------------------------------------------------GDHWFSVYGIDESGVRIYDPIPSKFTGTIPLEDFKRAWGGNALVA-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
349
+ >SRR4051812_39536360 46 0.300 2.977E-01 104 172 352 70 137 138
350
+ --------------------------------------------------------------------------------------------------------VAAQLAAGLPPIALIpDWRALPAEQTY-ATGNAHAVVITGVTDSDVTFIDPW-PGKSFAMSTTRFEAAWS-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
351
+ >SRR5688572_24881951 46 0.294 2.977E-01 104 171 352 6 62 140
352
+ --------------------------------------------------------------------------------------------------------LEEALGAGLPPIVLIEVAPARW----------HYVVVVGVTDNAVHVHDPARAP-DLRIERAEFLSRW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
353
+ >SRR2546423_2718336 46 0.417 2.977E-01 22 99 352 74 152 162
354
+ ----------------------IRQWYRDPISCLQSTLAAVLLAAGAEPLPVLGLAWEFRFQPGRVGREEFAYPCRfEGDLAGSLAPYHPLRSSWWSPAPE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
355
+ >SRR5688500_13065661 46 0.325 3.944E-01 271 313 352 2 44 130
356
+ -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------RGLYADFLTEAAALLGDPRVAEAASGWRAAADLWEDLSDAVIP--------------------------------------
357
+ >SRR4051812_2527260 46 0.279 3.944E-01 178 313 352 4 132 135
358
+ ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------HDRDRFFTNQPIAHRWLDVGLESPRPRPSTASI---VRGNLAAFRKVSDGHIFRGSSGLAQFLDTAAGAIAGGKPPDELF---VVAGTALAAS-SLHAEWLSREGMRTGTPVWCELGRAVDRISHHWTALRIAAAR--------------------------------------
359
+ >SRR4051812_40660863 46 0.407 3.944E-01 1 76 352 78 153 154
360
+ -GGGRGMTSVVMARTYMITGPEPELWYRDLISCLQATFGSLLARAGADPLTVLGAGWRFLHLPGDVRSEEFYYPCPP-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
361
+ >SRR5207244_1963219 46 0.405 3.944E-01 22 99 352 88 166 171
362
+ ----------------------IRQWYRDPISCLQSTLATVLLAAGAEPLPVLGLAWEFLFVPGDVRPEEFYYPCRfKGDPARSLAPYHPIRSHWWRPAEE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
363
+ >A0A2B0WVF8 46 0.236 3.944E-01 93 171 352 79 171 362
364
+ ---------------------------------------------------------------------------------------------VYSDFGNGLQFIKECLNRQEVFIALGSTFFLPYSNDYlnpkfikshidihtDKYVTDHYLAINKLTEDSVFVQDPVPNKYMGEISLEEFHSFW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
365
+ >MGYP001339385609 46 0.295 5.226E-01 129 172 352 3 46 60
366
+ ---------------------------------------------------------------------------------------------------------------------------------YWDQVTSHVVVVIGCDKQGVVINDPSLPDGGRAISWDVFLAAWA-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
367
+ >SRR6185295_5817841 46 0.388 5.226E-01 252 318 352 41 107 118
368
+ ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------DSAGVRQELYVLGWWMQAMTAVHARFLAEAGRALGRPELVEIARRVELVAHEWTGLRITAVHGPADD---------------------------------
369
+ >SRR4051812_38390578 46 0.280 5.226E-01 104 153 352 45 94 119
370
+ --------------------------------------------------------------------------------------------------------IRAELAASGTVIVVANTYHLPWSPAYQQSATPHWLLVTDTHDGLWHLVDP------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
371
+ >UPI000845F276 46 0.256 5.226E-01 100 169 352 85 156 167
372
+ ----------------------------------------------------------------------------------------------------TFQDIASALRNGEVPIVLISTYRL------HRVRAPHWVVVTGFDRHYIYFHDPYegfyernkRQAQHISIPINEFLR--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
373
+ >MGYP001050378313 46 0.526 5.226E-01 87 160 352 109 178 179
374
+ ---------------------------------------------------------------------------------------HGLRRESIDADDGAL------VARGELPIAAVDNFHLPFRPAFHDVHAAHLVVVYGVDRARGLVNvsDAMPPAFSG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
375
+ >ERR671931_602696 45 0.323 6.924E-01 203 304 352 0 98 99
376
+ -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------PALTRKWLHEVVEQNRERLL--ADDDPFCGLVALRAYMEDLLAYCGTPFGGRALVGA-YRLGWPMQAQADLHGELLRLAGARFDDAALCEAAAEVRGVGHAW-----------------------------------------------
377
+ >SRR5882724_1744776 45 0.295 6.924E-01 94 154 352 41 95 101
378
+ ----------------------------------------------------------------------------------------------YRIGARGLDELRGDLADGAVPVVLVSMDYI------HKDPTAHWVVVTGVDDENVTVNDPW-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
379
+ >SRR6478752_1188215 45 0.400 6.924E-01 20 99 352 50 134 137
380
+ --------------------PEPELWYRDLISCLQATFGSLLARLGADPLAVLGAGWRFLHLPGDVRSEEFYYPCPADesggtDLGAALAPHHQLHSLWWQPADE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
381
+ >SRR6185437_13975043 45 0.282 6.924E-01 135 173 352 14 51 246
382
+ ---------------------------------------------------------------------------------------------------------------------------------------DHYVVVTGVEDGIVRFHDPHGHPYA-TLPADDFIAAWAS----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
383
+ >SRR5215467_3611681 45 0.365 9.172E-01 255 347 352 9 101 110
384
+ ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------HMLGELYTVGWAAQSVAALHADFLRRHAMAFGWSEGLEAARSVDQLANLWTPLRIFAAHASTNGIEVSDRLRQRMRVVVDHYELALEELDRAA----
385
+ >SRR6185312_7446242 45 0.341 9.172E-01 268 308 352 11 51 138
386
+ ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAGRLLYADFLDVASVVLDEPALERAAAGFREAGDRWSKLA-------------------------------------------
387
+ >SRR5205085_3472310 45 0.430 9.172E-01 22 99 352 58 136 150
388
+ ----------------------VSQWYRDPISCLQSTLATVLLAAGAEPLPVLGLAWELRFQPGRVGREEFAYTCRfEGDLARSLAPYHPISSRWWSPAED------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
389
+ >SaaInl7_100m_RNA_FD_contig_21_4047958_length_241_multi_3_in_0_out_0_1 45 0.217 9.172E-01 85 171 352 72 172 360
390
+ -------------------------------------------------------------------------------------KIYTMERKFYRSLEEGLYGIHEHLSQNDFFISLGTTYYLPYSRDYKNPKyieshvksnsnkyvTDHYLSVYGLHNENIFINDPVPNKYIGAISLKDFSDFW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
391
+ >SRR6185312_10924508 44 0.395 1.215E+00 232 327 352 5 95 127
392
+ ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GMSGLARYFDLLRERTAA-PDGHQALEELYVLGWSLQASTALHADFLMKAGKQLDWYQLTTVGRQVARIAHHWTPLRMLGAHG----RARPDEIRE------------------------
393
+ >MGYP001029765960 44 0.279 1.215E+00 110 152 352 91 133 258
394
+ --------------------------------------------------------------------------------------------------------------QKQPIMAFVDAFDCDWLPFYRKHHIKHTIIVYSINKDDIYFYD-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
395
+ >18595|scaffold4953588_1|+69|01 44 0.323 1.609E+00 86 152 352 25 95 99
396
+ --------------------------------------------------------------------------------------RAGATVTLHETSGrvAAVKHLQEALAAGTPPLAWVDQAHMPYlqLPEVLKGHIGHLVAIAGQTGDSYLIDD-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
397
+ >SRR5215208_421555 44 0.275 1.609E+00 188 295 352 1 106 107
398
+ --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------PIRNRWFEIEWSGAVPVLPSlGRVRCVVNRNLETFDRPPVDGWTFGQAGLRLMFDVLAG---SAAPGVRESDEMFVVAGAALAKTALHAEYLRQAGRRWGVPSLSEASR--------------------------------------------------------
399
+ >SRR5215213_5558053 44 0.298 1.609E+00 101 175 352 49 114 117
400
+ -----------------------------------------------------------------------------------------------------LDDLKEELARGLYPIVYLElvSGQLRYV---------HSVVVVEITDDQVQVLDPEIGERAFNI--EDFNRAWSAKN--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
401
+ >SRR5919202_3634501 44 0.400 1.609E+00 22 90 352 119 188 191
402
+ ----------------------VAHWYRDPLSCLQSTLASVLLHAGAEPLPVLGLAWEFLFKPGEVPREEFAYPCRfEGDVARSLEPYHPIR---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
403
+ >MGYP000886274396 44 0.291 2.132E+00 101 148 352 21 68 69
404
+ -----------------------------------------------------------------------------------------------------WKQNLKKLQEGIPLIILVDLFYLPYTIYLGKEHAAHAIIVYEYCDNKI-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
405
+ >SRR5581483_1739624 44 0.382 2.132E+00 25 92 352 42 109 110
406
+ -------------------------WYRDLISCLQATFGTLVARLGADPLAVLGAGWRFLHLPGDVRFDEFYYPCPDGDLGAALAPHHELRAR-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
407
+ >ERR1700741_1246718 44 0.491 2.132E+00 20 78 352 52 110 120
408
+ --------------------PEPELWYRDLISCLQATFGSIVARAGADPLTVLGAGWRFLHLPGDVRSEEFYYPCPPDD---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
409
+ >SRR4051794_34888363 44 0.322 2.132E+00 4 62 352 75 133 134
410
+ ----HAAAGRPGRRIAAMRLPRMEPWRHDLVGCLHTAIGSVMANHGFDPLVTLGASWGFYYRP-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
411
+ >5940|scaffold5088856_1|+3|11 43 0.366 2.824E+00 22 81 352 21 80 81
412
+ ----------------------LKRWYADPMSCLQACLGTVLIFPGADPLETLGTSWEFRYVPGDVRREEFYYPCRFKDDPA------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
413
+ >ERR1700729_2025172 43 0.463 3.739E+00 23 63 352 65 105 111
414
+ -----------------------PMWYRDTVSCLPATIGSVLAYHGYDPLEVLGAGWDFTFIPG------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
415
+ >SRR5206468_2692918 42 0.328 4.952E+00 1 64 352 41 104 110
416
+ -GRAQPAGRRGRGARVKVELTPLSYWFHDLCSCLHDCLGTVLTYHGQSPIEVMGASWEFFHSPDD-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
417
+ >SRR3954452_12164045 42 0.481 4.952E+00 20 73 352 70 123 125
418
+ --------------------PEPQWWYRDLVSCLQATFGTVLARAGIDPLSVLGAGWRFLHLPGDVRSEEFYYP--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
419
+ >SRR5579862_4067062 42 0.341 4.952E+00 268 308 352 13 53 130
420
+ ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAGRPLYGDFLDVAASVLREPSLERAAAGFRDAGRRWTGLA-------------------------------------------
421
+ >ERR1051325_11233650 42 0.454 4.952E+00 21 75 352 72 126 131
422
+ ---------------------PVRQWYRDPVSCLQSTLSTVLLDVGADPLAVLGLGFEFRYLPGEVRPEEFYYPGR------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
423
+ >ERR1700712_5093054 42 0.252 4.952E+00 98 174 352 75 169 193
424
+ --------------------------------------------------------------------------------------------------DQALNDLKAALAAGKATMVGVNNTALykdfPHQDQPgQTPAANHQVVVIGYDtaTDTVYIDDGGWPPDPedagrpeggqkMPVKLDTFLAAWKAD---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
425
+ >SRR3712207_5935252 42 0.292 6.558E+00 268 308 352 0 40 104
426
+ ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GNLRALYADFLEQAAARLHAPELVPVAGLFRAAAEAWQSVA-------------------------------------------
427
+ >SRR5579872_692970 42 0.280 6.558E+00 76 146 352 32 106 111
428
+ ----------------------------------------------------------------------------PDVGIGKALELLGFAVaeRAHDDGADApFDDLRHALASGPVVLGPLDMGHLTYIPYHEALGGaDHFVLAYAMDEE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
429
+ >SRR2546430_4736840 42 0.270 6.558E+00 106 167 352 7 80 145
430
+ ----------------------------------------------------------------------------------------------------------EALGGGDPVLVVGDAFHLPWVPYHGRQHLDHGFVIEGLEPGAmpvvAHVVDAYDNatqwghavPLATTLPLADL----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
431
+ >SRR5256885_6193825 42 0.359 8.684E+00 1 98 352 12 114 119
432
+ -GRSPDMTVDVPARTYMITGPEPEWWYRDLVSCLQATFGSLLIREGADPLSVLGAGWRFLHLPGEVRSEEFYYPCPTDesgaiDLGAALAPHHQLHSRRLPPAG-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
433
+ >SRR3954469_21777195 42 0.431 8.684E+00 20 77 352 32 89 121
434
+ --------------------PEPEWWYRDLISCLQATFGSVLANAGADPLAVLGAGWRFLHLPGEVRSEEFYYPCPAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
435
+ >SRR3546814_4326061 42 0.311 8.684E+00 100 160 352 66 120 151
436
+ ----------------------------------------------------------------------------------------------------GVAELEERFRAGWLPMVLVSTFYVHG------DHVAHWVVVTGFGPDAVYINDPWVDRKGG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
examples/7wux/msa/2/pairing.a3m ADDED
The diff for this file is too large to render. See raw diff
 
examples/dimer.fasta ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ >dimer
2
+ FPTIPLSRLFDNAMLRAHRLHQLAFDTYQEFEEAYIPKEQKYSFLQNPQTSLCFSESIPTPSNREETQQKSNLELLRISLLLIQSWLEPVQFLRSVFANSLVYGASDSNVYDLLKDLEERIQTLMGRLEDGSPRTGQIFKQTYSKFDTNSHNDDALLKNYGLLYCFRKDMDKVETFLRIVQCRSVEGSCGF:FSGSEATAAILSRAPWSLQSVNPGLKTNSSKEPKFTKCRSPERETFSCHWTDEVHHGTKNLGPIQLFYTRRNTQEWTQEWKECPDYVSAGENSCYFNSSFTSIWIPYCIKLTSNGGTVDEKCFSVDEIVQPDPPIALNWTLLNVSLTGIHADIQVRWEAPRNADIQKGWMVLEYELQYKEVNETKWKMMDPILTTSVPVYSLKVDKEYEVRVRSKQRNSGNYGEFSEVLYVTLPQMSQ
examples/example.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc30e4fc1073854069611a138dcec5e0aeefacbad5889784d4e97cf6f0bc1d9e
3
+ size 836
examples/example_with_msa.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de916e3746b17638c3e7f2409f4e0242f2e54dadeab39ae226aa582b1378ae4b
3
+ size 3367
examples/finetune_subset.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ 6hvq
2
+ 5mqc
3
+ 5zin
4
+ 3ew0
5
+ 5akv
examples/ligands/7wux_smiles.smi ADDED
@@ -0,0 +1 @@
 
 
1
+ Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O
examples/ligands/compounds-3d-R.sdf ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mol_R
2
+ MOE2022 3D
3
+
4
+ 55 59 0 0 1 0 0 0 0 0999 V2000
5
+ 1.8430 3.9110 0.0060 O 0 0 0 0 0 0 0 0 0 0 0 0
6
+ 1.0400 3.0050 0.1630 C 0 0 0 0 0 0 0 0 0 0 0 0
7
+ 1.3790 1.8370 0.7780 N 0 0 0 0 0 0 0 0 0 0 0 0
8
+ 2.6800 1.7040 1.4280 C 0 0 0 0 0 0 0 0 0 0 0 0
9
+ 3.2700 2.6180 1.3520 H 0 0 0 0 0 0 0 0 0 0 0 0
10
+ 2.5860 1.4190 2.4760 H 0 0 0 0 0 0 0 0 0 0 0 0
11
+ 3.2540 0.5870 0.5860 C 0 0 0 0 0 0 0 0 0 0 0 0
12
+ 3.7280 0.8360 -0.7040 C 0 0 0 0 0 0 0 0 0 0 0 0
13
+ 3.6890 1.8370 -1.1060 H 0 0 0 0 0 0 0 0 0 0 0 0
14
+ 4.2500 -0.1980 -1.4740 C 0 0 0 0 0 0 0 0 0 0 0 0
15
+ 4.6150 -0.0020 -2.4720 H 0 0 0 0 0 0 0 0 0 0 0 0
16
+ 4.2990 -1.4870 -0.9510 C 0 0 0 0 0 0 0 0 0 0 0 0
17
+ 4.8160 -2.4870 -1.7410 O 0 0 0 0 0 0 0 0 0 0 0 0
18
+ 4.8150 -3.8560 -1.2980 C 0 0 0 0 0 0 0 0 0 0 0 0
19
+ 5.2650 -4.4870 -2.0640 H 0 0 0 0 0 0 0 0 0 0 0 0
20
+ 3.7890 -4.1790 -1.1180 H 0 0 0 0 0 0 0 0 0 0 0 0
21
+ 5.3890 -3.9390 -0.3750 H 0 0 0 0 0 0 0 0 0 0 0 0
22
+ 3.8390 -1.7460 0.3410 C 0 0 0 0 0 0 0 0 0 0 0 0
23
+ 3.8840 -2.7470 0.7440 H 0 0 0 0 0 0 0 0 0 0 0 0
24
+ 3.3220 -0.7060 1.1050 C 0 0 0 0 0 0 0 0 0 0 0 0
25
+ 2.9690 -0.9010 2.1070 H 0 0 0 0 0 0 0 0 0 0 0 0
26
+ 0.1730 0.9630 0.9500 C 0 0 2 0 0 0 0 0 0 0 0 0
27
+ -0.0990 0.7160 2.4440 C 0 0 0 0 0 0 0 0 0 0 0 0
28
+ 0.8190 0.5160 2.9970 H 0 0 0 0 0 0 0 0 0 0 0 0
29
+ -0.6260 1.5510 2.9050 H 0 0 0 0 0 0 0 0 0 0 0 0
30
+ -0.9990 -0.5510 2.3340 C 0 0 0 0 0 0 0 0 0 0 0 0
31
+ -2.0270 -0.2950 2.0750 H 0 0 0 0 0 0 0 0 0 0 0 0
32
+ -0.9910 -1.1350 3.2540 H 0 0 0 0 0 0 0 0 0 0 0 0
33
+ -0.3240 -1.2490 1.2430 N 0 0 0 0 0 0 0 0 0 0 0 0
34
+ -0.4900 -2.6690 0.9440 C 0 0 0 0 0 0 0 0 0 0 0 0
35
+ 0.2320 -3.0190 0.2070 H 0 0 0 0 0 0 0 0 0 0 0 0
36
+ -0.4230 -3.2860 1.8400 H 0 0 0 0 0 0 0 0 0 0 0 0
37
+ -1.9000 -2.6260 0.3860 C 0 0 0 0 0 0 0 0 0 0 0 0
38
+ -2.1050 -2.4640 -0.9870 C 0 0 0 0 0 0 0 0 0 0 0 0
39
+ -1.2570 -2.3610 -1.6480 H 0 0 0 0 0 0 0 0 0 0 0 0
40
+ -3.3950 -2.4360 -1.5060 C 0 0 0 0 0 0 0 0 0 0 0 0
41
+ -3.5580 -2.3090 -2.5660 H 0 0 0 0 0 0 0 0 0 0 0 0
42
+ -4.4710 -2.5730 -0.6380 C 0 0 0 0 0 0 0 0 0 0 0 0
43
+ -5.7290 -2.5430 -1.1380 F 0 0 0 0 0 0 0 0 0 0 0 0
44
+ -4.2930 -2.7400 0.7280 C 0 0 0 0 0 0 0 0 0 0 0 0
45
+ -5.1440 -2.8450 1.3840 H 0 0 0 0 0 0 0 0 0 0 0 0
46
+ -2.9990 -2.7680 1.2370 C 0 0 0 0 0 0 0 0 0 0 0 0
47
+ -2.8430 -2.9000 2.2980 H 0 0 0 0 0 0 0 0 0 0 0 0
48
+ 0.2400 -0.4310 0.3720 C 0 0 0 0 0 0 0 0 0 0 0 0
49
+ 0.6660 -0.7810 -0.7260 O 0 0 0 0 0 0 0 0 0 0 0 0
50
+ -0.9170 1.7860 0.2880 C 0 0 0 0 0 0 0 0 0 0 0 0
51
+ -2.2660 1.5010 0.0860 C 0 0 0 0 0 0 0 0 0 0 0 0
52
+ -2.6860 0.5710 0.4390 H 0 0 0 0 0 0 0 0 0 0 0 0
53
+ -3.0670 2.4300 -0.5770 C 0 0 0 0 0 0 0 0 0 0 0 0
54
+ -4.1150 2.2290 -0.7440 H 0 0 0 0 0 0 0 0 0 0 0 0
55
+ -2.4960 3.6230 -1.0220 C 0 0 0 0 0 0 0 0 0 0 0 0
56
+ -3.2780 4.5220 -1.6600 F 0 0 0 0 0 0 0 0 0 0 0 0
57
+ -1.1510 3.9170 -0.8290 C 0 0 0 0 0 0 0 0 0 0 0 0
58
+ -0.7280 4.8460 -1.1810 H 0 0 0 0 0 0 0 0 0 0 0 0
59
+ -0.3650 2.9730 -0.1680 C 0 0 0 0 0 0 0 0 0 0 0 0
60
+ 1 2 2 0 0 0 0
61
+ 2 3 1 0 0 0 0
62
+ 2 55 1 0 0 0 0
63
+ 3 4 1 0 0 0 0
64
+ 3 22 1 0 0 0 0
65
+ 4 5 1 0 0 0 0
66
+ 4 6 1 0 0 0 0
67
+ 4 7 1 0 0 0 0
68
+ 7 8 1 0 0 0 0
69
+ 7 20 2 0 0 0 0
70
+ 8 9 1 0 0 0 0
71
+ 8 10 2 0 0 0 0
72
+ 10 11 1 0 0 0 0
73
+ 10 12 1 0 0 0 0
74
+ 12 13 1 0 0 0 0
75
+ 12 18 2 0 0 0 0
76
+ 13 14 1 0 0 0 0
77
+ 14 15 1 0 0 0 0
78
+ 14 16 1 0 0 0 0
79
+ 14 17 1 0 0 0 0
80
+ 18 19 1 0 0 0 0
81
+ 18 20 1 0 0 0 0
82
+ 20 21 1 0 0 0 0
83
+ 22 23 1 0 0 0 0
84
+ 22 44 1 0 0 0 0
85
+ 22 46 1 0 0 0 0
86
+ 23 24 1 0 0 0 0
87
+ 23 25 1 0 0 0 0
88
+ 23 26 1 0 0 0 0
89
+ 26 27 1 0 0 0 0
90
+ 26 28 1 0 0 0 0
91
+ 26 29 1 0 0 0 0
92
+ 29 30 1 0 0 0 0
93
+ 29 44 1 0 0 0 0
94
+ 30 31 1 0 0 0 0
95
+ 30 32 1 0 0 0 0
96
+ 30 33 1 0 0 0 0
97
+ 33 34 1 0 0 0 0
98
+ 33 42 2 0 0 0 0
99
+ 34 35 1 0 0 0 0
100
+ 34 36 2 0 0 0 0
101
+ 36 37 1 0 0 0 0
102
+ 36 38 1 0 0 0 0
103
+ 38 39 1 0 0 0 0
104
+ 38 40 2 0 0 0 0
105
+ 40 41 1 0 0 0 0
106
+ 40 42 1 0 0 0 0
107
+ 42 43 1 0 0 0 0
108
+ 44 45 2 0 0 0 0
109
+ 46 47 1 0 0 0 0
110
+ 46 55 2 0 0 0 0
111
+ 47 48 1 0 0 0 0
112
+ 47 49 2 0 0 0 0
113
+ 49 50 1 0 0 0 0
114
+ 49 51 1 0 0 0 0
115
+ 51 52 1 0 0 0 0
116
+ 51 53 2 0 0 0 0
117
+ 53 54 1 0 0 0 0
118
+ 53 55 1 0 0 0 0
119
+ M END
120
+ > <Name>
121
+ S
122
+
123
+ $$$$
examples/ligands/compounds-3d-RS.sdf ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mol_R
2
+ MOE2022 3D
3
+
4
+ 55 59 0 0 1 0 0 0 0 0999 V2000
5
+ 1.8430 3.9110 0.0060 O 0 0 0 0 0 0 0 0 0 0 0 0
6
+ 1.0400 3.0050 0.1630 C 0 0 0 0 0 0 0 0 0 0 0 0
7
+ 1.3790 1.8370 0.7780 N 0 0 0 0 0 0 0 0 0 0 0 0
8
+ 2.6800 1.7040 1.4280 C 0 0 0 0 0 0 0 0 0 0 0 0
9
+ 3.2700 2.6180 1.3520 H 0 0 0 0 0 0 0 0 0 0 0 0
10
+ 2.5860 1.4190 2.4760 H 0 0 0 0 0 0 0 0 0 0 0 0
11
+ 3.2540 0.5870 0.5860 C 0 0 0 0 0 0 0 0 0 0 0 0
12
+ 3.7280 0.8360 -0.7040 C 0 0 0 0 0 0 0 0 0 0 0 0
13
+ 3.6890 1.8370 -1.1060 H 0 0 0 0 0 0 0 0 0 0 0 0
14
+ 4.2500 -0.1980 -1.4740 C 0 0 0 0 0 0 0 0 0 0 0 0
15
+ 4.6150 -0.0020 -2.4720 H 0 0 0 0 0 0 0 0 0 0 0 0
16
+ 4.2990 -1.4870 -0.9510 C 0 0 0 0 0 0 0 0 0 0 0 0
17
+ 4.8160 -2.4870 -1.7410 O 0 0 0 0 0 0 0 0 0 0 0 0
18
+ 4.8150 -3.8560 -1.2980 C 0 0 0 0 0 0 0 0 0 0 0 0
19
+ 5.2650 -4.4870 -2.0640 H 0 0 0 0 0 0 0 0 0 0 0 0
20
+ 3.7890 -4.1790 -1.1180 H 0 0 0 0 0 0 0 0 0 0 0 0
21
+ 5.3890 -3.9390 -0.3750 H 0 0 0 0 0 0 0 0 0 0 0 0
22
+ 3.8390 -1.7460 0.3410 C 0 0 0 0 0 0 0 0 0 0 0 0
23
+ 3.8840 -2.7470 0.7440 H 0 0 0 0 0 0 0 0 0 0 0 0
24
+ 3.3220 -0.7060 1.1050 C 0 0 0 0 0 0 0 0 0 0 0 0
25
+ 2.9690 -0.9010 2.1070 H 0 0 0 0 0 0 0 0 0 0 0 0
26
+ 0.1730 0.9630 0.9500 C 0 0 2 0 0 0 0 0 0 0 0 0
27
+ -0.0990 0.7160 2.4440 C 0 0 0 0 0 0 0 0 0 0 0 0
28
+ 0.8190 0.5160 2.9970 H 0 0 0 0 0 0 0 0 0 0 0 0
29
+ -0.6260 1.5510 2.9050 H 0 0 0 0 0 0 0 0 0 0 0 0
30
+ -0.9990 -0.5510 2.3340 C 0 0 0 0 0 0 0 0 0 0 0 0
31
+ -2.0270 -0.2950 2.0750 H 0 0 0 0 0 0 0 0 0 0 0 0
32
+ -0.9910 -1.1350 3.2540 H 0 0 0 0 0 0 0 0 0 0 0 0
33
+ -0.3240 -1.2490 1.2430 N 0 0 0 0 0 0 0 0 0 0 0 0
34
+ -0.4900 -2.6690 0.9440 C 0 0 0 0 0 0 0 0 0 0 0 0
35
+ 0.2320 -3.0190 0.2070 H 0 0 0 0 0 0 0 0 0 0 0 0
36
+ -0.4230 -3.2860 1.8400 H 0 0 0 0 0 0 0 0 0 0 0 0
37
+ -1.9000 -2.6260 0.3860 C 0 0 0 0 0 0 0 0 0 0 0 0
38
+ -2.1050 -2.4640 -0.9870 C 0 0 0 0 0 0 0 0 0 0 0 0
39
+ -1.2570 -2.3610 -1.6480 H 0 0 0 0 0 0 0 0 0 0 0 0
40
+ -3.3950 -2.4360 -1.5060 C 0 0 0 0 0 0 0 0 0 0 0 0
41
+ -3.5580 -2.3090 -2.5660 H 0 0 0 0 0 0 0 0 0 0 0 0
42
+ -4.4710 -2.5730 -0.6380 C 0 0 0 0 0 0 0 0 0 0 0 0
43
+ -5.7290 -2.5430 -1.1380 F 0 0 0 0 0 0 0 0 0 0 0 0
44
+ -4.2930 -2.7400 0.7280 C 0 0 0 0 0 0 0 0 0 0 0 0
45
+ -5.1440 -2.8450 1.3840 H 0 0 0 0 0 0 0 0 0 0 0 0
46
+ -2.9990 -2.7680 1.2370 C 0 0 0 0 0 0 0 0 0 0 0 0
47
+ -2.8430 -2.9000 2.2980 H 0 0 0 0 0 0 0 0 0 0 0 0
48
+ 0.2400 -0.4310 0.3720 C 0 0 0 0 0 0 0 0 0 0 0 0
49
+ 0.6660 -0.7810 -0.7260 O 0 0 0 0 0 0 0 0 0 0 0 0
50
+ -0.9170 1.7860 0.2880 C 0 0 0 0 0 0 0 0 0 0 0 0
51
+ -2.2660 1.5010 0.0860 C 0 0 0 0 0 0 0 0 0 0 0 0
52
+ -2.6860 0.5710 0.4390 H 0 0 0 0 0 0 0 0 0 0 0 0
53
+ -3.0670 2.4300 -0.5770 C 0 0 0 0 0 0 0 0 0 0 0 0
54
+ -4.1150 2.2290 -0.7440 H 0 0 0 0 0 0 0 0 0 0 0 0
55
+ -2.4960 3.6230 -1.0220 C 0 0 0 0 0 0 0 0 0 0 0 0
56
+ -3.2780 4.5220 -1.6600 F 0 0 0 0 0 0 0 0 0 0 0 0
57
+ -1.1510 3.9170 -0.8290 C 0 0 0 0 0 0 0 0 0 0 0 0
58
+ -0.7280 4.8460 -1.1810 H 0 0 0 0 0 0 0 0 0 0 0 0
59
+ -0.3650 2.9730 -0.1680 C 0 0 0 0 0 0 0 0 0 0 0 0
60
+ 1 2 2 0 0 0 0
61
+ 2 3 1 0 0 0 0
62
+ 2 55 1 0 0 0 0
63
+ 3 4 1 0 0 0 0
64
+ 3 22 1 0 0 0 0
65
+ 4 5 1 0 0 0 0
66
+ 4 6 1 0 0 0 0
67
+ 4 7 1 0 0 0 0
68
+ 7 8 1 0 0 0 0
69
+ 7 20 2 0 0 0 0
70
+ 8 9 1 0 0 0 0
71
+ 8 10 2 0 0 0 0
72
+ 10 11 1 0 0 0 0
73
+ 10 12 1 0 0 0 0
74
+ 12 13 1 0 0 0 0
75
+ 12 18 2 0 0 0 0
76
+ 13 14 1 0 0 0 0
77
+ 14 15 1 0 0 0 0
78
+ 14 16 1 0 0 0 0
79
+ 14 17 1 0 0 0 0
80
+ 18 19 1 0 0 0 0
81
+ 18 20 1 0 0 0 0
82
+ 20 21 1 0 0 0 0
83
+ 22 23 1 0 0 0 0
84
+ 22 44 1 0 0 0 0
85
+ 22 46 1 0 0 0 0
86
+ 23 24 1 0 0 0 0
87
+ 23 25 1 0 0 0 0
88
+ 23 26 1 0 0 0 0
89
+ 26 27 1 0 0 0 0
90
+ 26 28 1 0 0 0 0
91
+ 26 29 1 0 0 0 0
92
+ 29 30 1 0 0 0 0
93
+ 29 44 1 0 0 0 0
94
+ 30 31 1 0 0 0 0
95
+ 30 32 1 0 0 0 0
96
+ 30 33 1 0 0 0 0
97
+ 33 34 1 0 0 0 0
98
+ 33 42 2 0 0 0 0
99
+ 34 35 1 0 0 0 0
100
+ 34 36 2 0 0 0 0
101
+ 36 37 1 0 0 0 0
102
+ 36 38 1 0 0 0 0
103
+ 38 39 1 0 0 0 0
104
+ 38 40 2 0 0 0 0
105
+ 40 41 1 0 0 0 0
106
+ 40 42 1 0 0 0 0
107
+ 42 43 1 0 0 0 0
108
+ 44 45 2 0 0 0 0
109
+ 46 47 1 0 0 0 0
110
+ 46 55 2 0 0 0 0
111
+ 47 48 1 0 0 0 0
112
+ 47 49 2 0 0 0 0
113
+ 49 50 1 0 0 0 0
114
+ 49 51 1 0 0 0 0
115
+ 51 52 1 0 0 0 0
116
+ 51 53 2 0 0 0 0
117
+ 53 54 1 0 0 0 0
118
+ 53 55 1 0 0 0 0
119
+ M END
120
+ > <Name>
121
+ S
122
+
123
+ $$$$
124
+ mol_S
125
+ MOE2022 3D
126
+
127
+ 55 59 0 0 1 0 0 0 0 0999 V2000
128
+ 1.5220 4.2070 -0.8770 O 0 0 0 0 0 0 0 0 0 0 0 0
129
+ 0.8070 3.3300 -0.4130 C 0 0 0 0 0 0 0 0 0 0 0 0
130
+ 1.3080 2.1770 0.1270 N 0 0 0 0 0 0 0 0 0 0 0 0
131
+ 2.7120 2.0740 0.5360 C 0 0 0 0 0 0 0 0 0 0 0 0
132
+ 3.2960 2.8760 0.1030 H 0 0 0 0 0 0 0 0 0 0 0 0
133
+ 2.7910 2.2270 1.6050 H 0 0 0 0 0 0 0 0 0 0 0 0
134
+ 3.3560 0.7600 0.1460 C 0 0 0 0 0 0 0 0 0 0 0 0
135
+ 3.4390 0.3940 -1.1990 C 0 0 0 0 0 0 0 0 0 0 0 0
136
+ 3.0560 1.0430 -1.9460 H 0 0 0 0 0 0 0 0 0 0 0 0
137
+ 4.0160 -0.8170 -1.5650 C 0 0 0 0 0 0 0 0 0 0 0 0
138
+ 4.0770 -1.1020 -2.5830 H 0 0 0 0 0 0 0 0 0 0 0 0
139
+ 4.5220 -1.6610 -0.5810 C 0 0 0 0 0 0 0 0 0 0 0 0
140
+ 5.0790 -2.8500 -0.9960 O 0 0 0 0 0 0 0 0 0 0 0 0
141
+ 5.7550 -3.7890 -0.1290 C 0 0 0 0 0 0 0 0 0 0 0 0
142
+ 6.1420 -4.6210 -0.7170 H 0 0 0 0 0 0 0 0 0 0 0 0
143
+ 5.0650 -4.1830 0.6170 H 0 0 0 0 0 0 0 0 0 0 0 0
144
+ 6.5910 -3.3030 0.3750 H 0 0 0 0 0 0 0 0 0 0 0 0
145
+ 4.4620 -1.3010 0.7650 C 0 0 0 0 0 0 0 0 0 0 0 0
146
+ 4.8540 -1.9350 1.5140 H 0 0 0 0 0 0 0 0 0 0 0 0
147
+ 3.8780 -0.0900 1.1220 C 0 0 0 0 0 0 0 0 0 0 0 0
148
+ 3.8290 0.1840 2.1450 H 0 0 0 0 0 0 0 0 0 0 0 0
149
+ 0.1910 1.3310 0.6670 C 0 0 1 0 0 0 0 0 0 0 0 0
150
+ 0.1330 -0.0450 -0.0200 C 0 0 0 0 0 0 0 0 0 0 0 0
151
+ -0.8850 -0.2520 -0.3480 H 0 0 0 0 0 0 0 0 0 0 0 0
152
+ 0.8030 -0.1100 -0.8750 H 0 0 0 0 0 0 0 0 0 0 0 0
153
+ 0.5050 -1.0520 1.0690 C 0 0 0 0 0 0 0 0 0 0 0 0
154
+ -0.1300 -1.9280 1.0300 H 0 0 0 0 0 0 0 0 0 0 0 0
155
+ 1.5230 -1.3960 0.9350 H 0 0 0 0 0 0 0 0 0 0 0 0
156
+ 0.3750 -0.3420 2.3320 N 0 0 0 0 0 0 0 0 0 0 0 0
157
+ 0.5330 -0.9630 3.6460 C 0 0 0 0 0 0 0 0 0 0 0 0
158
+ 0.0500 -0.3600 4.4060 H 0 0 0 0 0 0 0 0 0 0 0 0
159
+ 0.0060 -1.9070 3.6730 H 0 0 0 0 0 0 0 0 0 0 0 0
160
+ 1.9830 -1.1770 4.0270 C 0 0 0 0 0 0 0 0 0 0 0 0
161
+ 2.7010 -0.1350 4.6230 C 0 0 0 0 0 0 0 0 0 0 0 0
162
+ 2.2280 0.8000 4.8010 H 0 0 0 0 0 0 0 0 0 0 0 0
163
+ 4.0350 -0.3160 4.9780 C 0 0 0 0 0 0 0 0 0 0 0 0
164
+ 4.5900 0.4650 5.4300 H 0 0 0 0 0 0 0 0 0 0 0 0
165
+ 4.6300 -1.5450 4.7290 C 0 0 0 0 0 0 0 0 0 0 0 0
166
+ 5.9270 -1.7250 5.0670 F 0 0 0 0 0 0 0 0 0 0 0 0
167
+ 3.9400 -2.5950 4.1400 C 0 0 0 0 0 0 0 0 0 0 0 0
168
+ 4.4240 -3.5210 3.9680 H 0 0 0 0 0 0 0 0 0 0 0 0
169
+ 2.6070 -2.4050 3.7830 C 0 0 0 0 0 0 0 0 0 0 0 0
170
+ 2.0670 -3.1950 3.3280 H 0 0 0 0 0 0 0 0 0 0 0 0
171
+ 0.2890 0.9840 2.1620 C 0 0 0 0 0 0 0 0 0 0 0 0
172
+ 0.2840 1.7830 3.0980 O 0 0 0 0 0 0 0 0 0 0 0 0
173
+ -1.0390 2.1600 0.3390 C 0 0 0 0 0 0 0 0 0 0 0 0
174
+ -2.3890 1.9160 0.5940 C 0 0 0 0 0 0 0 0 0 0 0 0
175
+ -2.6960 1.0370 1.1020 H 0 0 0 0 0 0 0 0 0 0 0 0
176
+ -3.3350 2.8540 0.1730 C 0 0 0 0 0 0 0 0 0 0 0 0
177
+ -4.3690 2.7000 0.3510 H 0 0 0 0 0 0 0 0 0 0 0 0
178
+ -2.9040 4.0060 -0.4840 C 0 0 0 0 0 0 0 0 0 0 0 0
179
+ -3.8270 4.9050 -0.8900 F 0 0 0 0 0 0 0 0 0 0 0 0
180
+ -1.5630 4.2630 -0.7360 C 0 0 0 0 0 0 0 0 0 0 0 0
181
+ -1.2550 5.1470 -1.2310 H 0 0 0 0 0 0 0 0 0 0 0 0
182
+ -0.6330 3.3150 -0.3080 C 0 0 0 0 0 0 0 0 0 0 0 0
183
+ 1 2 2 0 0 0 0
184
+ 2 3 1 0 0 0 0
185
+ 2 55 1 0 0 0 0
186
+ 3 4 1 0 0 0 0
187
+ 3 22 1 0 0 0 0
188
+ 4 5 1 0 0 0 0
189
+ 4 6 1 0 0 0 0
190
+ 4 7 1 0 0 0 0
191
+ 7 8 1 0 0 0 0
192
+ 7 20 2 0 0 0 0
193
+ 8 9 1 0 0 0 0
194
+ 8 10 2 0 0 0 0
195
+ 10 11 1 0 0 0 0
196
+ 10 12 1 0 0 0 0
197
+ 12 13 1 0 0 0 0
198
+ 12 18 2 0 0 0 0
199
+ 13 14 1 0 0 0 0
200
+ 14 15 1 0 0 0 0
201
+ 14 16 1 0 0 0 0
202
+ 14 17 1 0 0 0 0
203
+ 18 19 1 0 0 0 0
204
+ 18 20 1 0 0 0 0
205
+ 20 21 1 0 0 0 0
206
+ 22 23 1 0 0 0 0
207
+ 22 44 1 0 0 0 0
208
+ 22 46 1 0 0 0 0
209
+ 23 24 1 0 0 0 0
210
+ 23 25 1 0 0 0 0
211
+ 23 26 1 0 0 0 0
212
+ 26 27 1 0 0 0 0
213
+ 26 28 1 0 0 0 0
214
+ 26 29 1 0 0 0 0
215
+ 29 30 1 0 0 0 0
216
+ 29 44 1 0 0 0 0
217
+ 30 31 1 0 0 0 0
218
+ 30 32 1 0 0 0 0
219
+ 30 33 1 0 0 0 0
220
+ 33 34 1 0 0 0 0
221
+ 33 42 2 0 0 0 0
222
+ 34 35 1 0 0 0 0
223
+ 34 36 2 0 0 0 0
224
+ 36 37 1 0 0 0 0
225
+ 36 38 1 0 0 0 0
226
+ 38 39 1 0 0 0 0
227
+ 38 40 2 0 0 0 0
228
+ 40 41 1 0 0 0 0
229
+ 40 42 1 0 0 0 0
230
+ 42 43 1 0 0 0 0
231
+ 44 45 2 0 0 0 0
232
+ 46 47 1 0 0 0 0
233
+ 46 55 2 0 0 0 0
234
+ 47 48 1 0 0 0 0
235
+ 47 49 2 0 0 0 0
236
+ 49 50 1 0 0 0 0
237
+ 49 51 1 0 0 0 0
238
+ 51 52 1 0 0 0 0
239
+ 51 53 2 0 0 0 0
240
+ 53 54 1 0 0 0 0
241
+ 53 55 1 0 0 0 0
242
+ M END
243
+ > <Name>
244
+ R
245
+
246
+ $$$$
examples/prot.fasta ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ >A|protein
2
+ MASWSHPQFEKGGTHVAETSAPTRSEPDTRVLTLPGTASAPEFRLIDIDGLLNNRATTDVRDLGSGRLNAWGNSFPAAELPAPGSLITVAGIPFTWANAHARGDNIRCEGQVVDIPPGQYDWIYLLAASERRSEDTIWAHYDDGHADPLRVGISDFLDGTPAFGELSAFRTSRMHYPHHVQEGLPTTMWLTRVGMPRHGVARSLRLPRSVAMHVFALTLRTAAAVRLAEGATT
3
+ >B|protein
4
+ MGSSHHHHHHSQDPNSTTTAPPVELWTRDLGSCLHGTLATALIRDGHDPVTVLGAPWEFRRRPGAWSSEEYFFFAEPDSLAGRLALYHPFESTWHRSDGDGVDDLREALAAGVLPIAAVDNFHLPFRPAFHDVHAAHLLVVYRITETEVYVSDAQPPAFQGAIPLADFLASWGSLNPPDDADVFFSASPSGRRWLRTRMTGPVPEPDRHWVGRVIRENVARYRQEPPADTQTGLPGLRRYLDELCALTPGTNAASEALSELYVISWNIQAQSGLHAEFLRAHSVKWRIPELAEAAAGVDAVAHGWTGVRMTGAHSRVWQRHRPAELRGHATALVRRLEAALDLLELAADAVS