Add large file
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +2 -34
- .gitignore +141 -0
- README.md +0 -12
- app.py +506 -4
- assets/foldmark.png +0 -0
- assets/foldmark_head.png +0 -0
- configs/__init__.py +0 -0
- configs/configs_base.py +409 -0
- configs/configs_data.py +216 -0
- configs/configs_inference.py +31 -0
- cutlass +1 -0
- dataset/7pzb.pkl.gz +0 -0
- dataset/7pzb_unwatermarked.cif +0 -0
- dataset/output.csv +23 -0
- docs/colabfold_compatiable_msa.md +33 -0
- docs/docker_installation.md +30 -0
- docs/infer_json_format.md +243 -0
- docs/kernels.md +24 -0
- docs/model_performance.md +51 -0
- docs/msa_pipeline.md +101 -0
- docs/prepare_training_data.md +119 -0
- docs/training.md +88 -0
- examples/7dc6.pdb +0 -0
- examples/7dc6_watermarked.pdb +0 -0
- examples/7pzb/msa/1/non_pairing.a3m +0 -0
- examples/7pzb/msa/1/pairing.a3m +0 -0
- examples/7pzb_need_search_msa/msa_resmsa_seq_0/0.a3m +0 -0
- examples/7pzb_need_search_msa/msa_resmsa_seq_0/0/non_pairing.a3m +0 -0
- examples/7pzb_need_search_msa/msa_resmsa_seq_0/0/pairing.a3m +0 -0
- examples/7pzb_need_search_msa/msa_resmsa_seq_0/msa.sh +33 -0
- Protenix_new.zip → examples/7pzb_need_search_msa/msa_resmsa_seq_0/out.tar.gz +2 -2
- examples/7pzb_need_search_msa/msa_resmsa_seq_0/pdb70_220313_db.m8 +135 -0
- examples/7pzb_need_search_msa/msa_resmsa_seq_0/tmp_5561987135da4188987956d9f05d1af2.fasta +2 -0
- examples/7pzb_need_search_msa/msa_resmsa_seq_0/uniref_tax.m8 +0 -0
- examples/7pzb_unwatermarked.cif +0 -0
- examples/7r6r/msa/1/non_pairing.a3m +0 -0
- examples/7r6r/msa/1/pairing.a3m +0 -0
- examples/7r6r_watermarked.cif +0 -0
- examples/7wux/msa/1/non_pairing.a3m +0 -0
- examples/7wux/msa/1/pairing.a3m +544 -0
- examples/7wux/msa/2/non_pairing.a3m +436 -0
- examples/7wux/msa/2/pairing.a3m +0 -0
- examples/dimer.fasta +2 -0
- examples/example.json +3 -0
- examples/example_with_msa.json +3 -0
- examples/finetune_subset.txt +5 -0
- examples/ligands/7wux_smiles.smi +1 -0
- examples/ligands/compounds-3d-R.sdf +123 -0
- examples/ligands/compounds-3d-RS.sdf +246 -0
- examples/prot.fasta +4 -0
.gitattributes
CHANGED
@@ -1,35 +1,3 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.
|
24 |
-
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.json filter=lfs diff=lfs merge=lfs -text
|
3 |
+
examples/7pzb_need_search_msa/msa_resmsa_seq_0/out.tar.gz filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
*__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
*.o
|
9 |
+
*.obj
|
10 |
+
*.d
|
11 |
+
# ninjia relate
|
12 |
+
*ninja*
|
13 |
+
lock
|
14 |
+
# Distribution / packaging
|
15 |
+
.Python
|
16 |
+
.vscode
|
17 |
+
build/
|
18 |
+
develop-eggs/
|
19 |
+
dist/
|
20 |
+
downloads/
|
21 |
+
eggs/
|
22 |
+
.eggs/
|
23 |
+
lib/
|
24 |
+
lib64/
|
25 |
+
parts/
|
26 |
+
sdist/
|
27 |
+
var/
|
28 |
+
wheels/
|
29 |
+
share/python-wheels/
|
30 |
+
*.egg-info/
|
31 |
+
.installed.cfg
|
32 |
+
*.egg
|
33 |
+
MANIFEST
|
34 |
+
|
35 |
+
# PyInstaller
|
36 |
+
# Usually these files are written by a python script from a template
|
37 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
38 |
+
*.manifest
|
39 |
+
*.spec
|
40 |
+
|
41 |
+
# Installer logs
|
42 |
+
pip-log.txt
|
43 |
+
pip-delete-this-directory.txt
|
44 |
+
|
45 |
+
# Unit test / coverage reports
|
46 |
+
.tox/
|
47 |
+
.nox/
|
48 |
+
.coverage
|
49 |
+
.coverage.*
|
50 |
+
.cache
|
51 |
+
*.cover
|
52 |
+
*.py,cover
|
53 |
+
.hypothesis/
|
54 |
+
.pytest_cache/
|
55 |
+
|
56 |
+
# Translations
|
57 |
+
*.mo
|
58 |
+
*.pot
|
59 |
+
|
60 |
+
# Django stuff:
|
61 |
+
*.log
|
62 |
+
local_settings.py
|
63 |
+
db.sqlite3
|
64 |
+
db.sqlite3-journal
|
65 |
+
|
66 |
+
# Flask stuff:
|
67 |
+
instance/
|
68 |
+
.webassets-cache
|
69 |
+
|
70 |
+
# Scrapy stuff:
|
71 |
+
.scrapy
|
72 |
+
|
73 |
+
# Sphinx documentation
|
74 |
+
docs/_build/
|
75 |
+
|
76 |
+
# PyBuilder
|
77 |
+
.pybuilder/
|
78 |
+
target/
|
79 |
+
|
80 |
+
# Jupyter Notebook
|
81 |
+
.ipynb_checkpoints
|
82 |
+
|
83 |
+
# IPython
|
84 |
+
profile_default/
|
85 |
+
ipython_config.py
|
86 |
+
|
87 |
+
# data cache and checkpoints
|
88 |
+
data_cache/
|
89 |
+
checkpoints/
|
90 |
+
|
91 |
+
.pdm.toml
|
92 |
+
|
93 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
94 |
+
__pypackages__/
|
95 |
+
|
96 |
+
# Celery stuff
|
97 |
+
celerybeat-schedule
|
98 |
+
celerybeat.pid
|
99 |
+
|
100 |
+
# SageMath parsed files
|
101 |
+
*.sage.py
|
102 |
+
|
103 |
+
# Environments
|
104 |
+
.env
|
105 |
+
.venv
|
106 |
+
env/
|
107 |
+
venv/
|
108 |
+
ENV/
|
109 |
+
env.bak/
|
110 |
+
venv.bak/
|
111 |
+
|
112 |
+
# Spyder project settings
|
113 |
+
.spyderproject
|
114 |
+
.spyproject
|
115 |
+
|
116 |
+
# Rope project settings
|
117 |
+
.ropeproject
|
118 |
+
|
119 |
+
# mkdocs documentation
|
120 |
+
/site
|
121 |
+
|
122 |
+
# mypy
|
123 |
+
.mypy_cache/
|
124 |
+
.dmypy.json
|
125 |
+
dmypy.json
|
126 |
+
|
127 |
+
# Pyre type checker
|
128 |
+
.pyre/
|
129 |
+
|
130 |
+
# pytype static type analyzer
|
131 |
+
.pytype/
|
132 |
+
|
133 |
+
# Cython debug symbols
|
134 |
+
cython_debug/
|
135 |
+
|
136 |
+
wandb/
|
137 |
+
run_logs/
|
138 |
+
.gradio/
|
139 |
+
flagged/
|
140 |
+
release_data/
|
141 |
+
.pt
|
README.md
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
---
|
2 |
-
title: Protenix New
|
3 |
-
emoji: 💻
|
4 |
-
colorFrom: gray
|
5 |
-
colorTo: pink
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 5.15.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -1,7 +1,509 @@
|
|
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
import gradio as gr
|
3 |
+
import os
|
4 |
+
import uuid
|
5 |
+
from datetime import datetime
|
6 |
+
import numpy as np
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
from configs.configs_base import configs as configs_base
|
9 |
+
from configs.configs_data import data_configs
|
10 |
+
from configs.configs_inference import inference_configs
|
11 |
+
from runner.inference import download_infercence_cache, update_inference_configs, infer_predict, infer_detect, InferenceRunner
|
12 |
+
from protenix.config import parse_configs, parse_sys_args
|
13 |
+
from runner.msa_search import update_infer_json
|
14 |
+
from protenix.web_service.prediction_visualization import plot_best_confidence_measure, PredictionLoader
|
15 |
+
from process_data import process_data
|
16 |
+
import json
|
17 |
+
from typing import Dict, List
|
18 |
+
from Bio.PDB import MMCIFParser, PDBIO
|
19 |
+
import tempfile
|
20 |
+
import shutil
|
21 |
+
from Bio import PDB
|
22 |
+
from gradio_molecule3d import Molecule3D
|
23 |
|
24 |
+
EXAMPLE_PATH = './examples/example.json'
|
25 |
+
example_json=[{'sequences': [{'proteinChain': {'sequence': 'MAEVIRSSAFWRSFPIFEEFDSETLCELSGIASYRKWSAGTVIFQRGDQGDYMIVVVSGRIKLSLFTPQGRELMLRQHEAGALFGEMALLDGQPRSADATAVTAAEGYVIGKKDFLALITQRPKTAEAVIRFLCAQLRDTTDRLETIALYDLNARVARFFLATLRQIHGSEMPQSANLRLTLSQTDIASILGASRPKVNRAILSLEESGAIKRADGIICCNVGRLLSIADPEEDLEHHHHHHHH', 'count': 2}}, {'dnaSequence': {'sequence': 'CTAGGTAACATTACTCGCG', 'count': 2}}, {'dnaSequence': {'sequence': 'GCGAGTAATGTTAC', 'count': 2}}, {'ligand': {'ligand': 'CCD_PCG', 'count': 2}}], 'name': '7pzb_need_search_msa'}]
|
26 |
|
27 |
+
# Custom CSS for styling
|
28 |
+
custom_css = """
|
29 |
+
#logo {
|
30 |
+
width: 50%;
|
31 |
+
}
|
32 |
+
.title {
|
33 |
+
font-size: 32px;
|
34 |
+
font-weight: bold;
|
35 |
+
color: #4CAF50;
|
36 |
+
display: flex;
|
37 |
+
align-items: center; /* Vertically center the logo and text */
|
38 |
+
}
|
39 |
+
"""
|
40 |
+
|
41 |
+
|
42 |
+
os.environ["LAYERNORM_TYPE"] = "fast_layernorm"
|
43 |
+
os.environ["USE_DEEPSPEED_EVO_ATTTENTION"] = "true"
|
44 |
+
# Set environment variable in the script
|
45 |
+
os.environ['CUTLASS_PATH'] = './cutlass'
|
46 |
+
|
47 |
+
# reps = [
|
48 |
+
# {
|
49 |
+
# "model": 0,
|
50 |
+
# "chain": "",
|
51 |
+
# "resname": "",
|
52 |
+
# "style": "cartoon", # Use cartoon style
|
53 |
+
# "color": "whiteCarbon",
|
54 |
+
# "residue_range": "",
|
55 |
+
# "around": 0,
|
56 |
+
# "byres": False,
|
57 |
+
# "visible": True # Ensure this representation is visible
|
58 |
+
# }
|
59 |
+
# ]
|
60 |
+
|
61 |
+
reps = [
|
62 |
+
{
|
63 |
+
"model": 0,
|
64 |
+
"chain": "",
|
65 |
+
"resname": "",
|
66 |
+
"style": "cartoon",
|
67 |
+
"color": "whiteCarbon",
|
68 |
+
"residue_range": "",
|
69 |
+
"around": 0,
|
70 |
+
"byres": False,
|
71 |
+
"opacity": 0.2,
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"model": 1,
|
75 |
+
"chain": "",
|
76 |
+
"resname": "",
|
77 |
+
"style": "cartoon",
|
78 |
+
"color": "cyanCarbon",
|
79 |
+
"residue_range": "",
|
80 |
+
"around": 0,
|
81 |
+
"byres": False,
|
82 |
+
"opacity": 0.8,
|
83 |
+
}
|
84 |
+
]
|
85 |
+
|
86 |
+
|
87 |
+
def align_pdb_files(pdb_file_1, pdb_file_2):
|
88 |
+
# Load the structures
|
89 |
+
parser = PDB.PPBuilder()
|
90 |
+
io = PDB.PDBIO()
|
91 |
+
structure_1 = PDB.PDBParser(QUIET=True).get_structure('Structure_1', pdb_file_1)
|
92 |
+
structure_2 = PDB.PDBParser(QUIET=True).get_structure('Structure_2', pdb_file_2)
|
93 |
+
|
94 |
+
# Superimpose the second structure onto the first
|
95 |
+
super_imposer = PDB.Superimposer()
|
96 |
+
model_1 = structure_1[0]
|
97 |
+
model_2 = structure_2[0]
|
98 |
+
|
99 |
+
# Extract the coordinates from the two structures
|
100 |
+
atoms_1 = [atom for atom in model_1.get_atoms() if atom.get_name() == "CA"] # Use CA atoms
|
101 |
+
atoms_2 = [atom for atom in model_2.get_atoms() if atom.get_name() == "CA"]
|
102 |
+
|
103 |
+
# Align the structures based on the CA atoms
|
104 |
+
coord_1 = [atom.get_coord() for atom in atoms_1]
|
105 |
+
coord_2 = [atom.get_coord() for atom in atoms_2]
|
106 |
+
|
107 |
+
super_imposer.set_atoms(atoms_1, atoms_2)
|
108 |
+
super_imposer.apply(model_2) # Apply the transformation to model_2
|
109 |
+
|
110 |
+
# Save the aligned structure back to the original file
|
111 |
+
io.set_structure(structure_2) # Save the aligned structure to the second file (original file)
|
112 |
+
io.save(pdb_file_2)
|
113 |
+
|
114 |
+
# Function to convert .cif to .pdb and save as a temporary file
|
115 |
+
def convert_cif_to_pdb(cif_path):
|
116 |
+
"""
|
117 |
+
Convert a CIF file to a PDB file and save it as a temporary file.
|
118 |
+
|
119 |
+
Args:
|
120 |
+
cif_path (str): Path to the input CIF file.
|
121 |
+
|
122 |
+
Returns:
|
123 |
+
str: Path to the temporary PDB file.
|
124 |
+
"""
|
125 |
+
# Initialize the MMCIF parser
|
126 |
+
parser = MMCIFParser()
|
127 |
+
structure = parser.get_structure("protein", cif_path)
|
128 |
+
|
129 |
+
# Create a temporary file for the PDB output
|
130 |
+
with tempfile.NamedTemporaryFile(suffix=".pdb", delete=False) as temp_file:
|
131 |
+
temp_pdb_path = temp_file.name
|
132 |
+
|
133 |
+
# Save the structure as a PDB file
|
134 |
+
io = PDBIO()
|
135 |
+
io.set_structure(structure)
|
136 |
+
io.save(temp_pdb_path)
|
137 |
+
|
138 |
+
return temp_pdb_path
|
139 |
+
|
140 |
+
def plot_3d(pred_loader):
|
141 |
+
# Get the CIF file path for the given prediction ID
|
142 |
+
cif_path = sorted(pred_loader.cif_paths)[0]
|
143 |
+
|
144 |
+
# Convert the CIF file to a temporary PDB file
|
145 |
+
temp_pdb_path = convert_cif_to_pdb(cif_path)
|
146 |
+
|
147 |
+
return temp_pdb_path, cif_path
|
148 |
+
|
149 |
+
|
150 |
+
def parse_json_input(json_data: List[Dict]) -> Dict:
|
151 |
+
"""Convert Protenix JSON format to UI-friendly structure"""
|
152 |
+
components = {
|
153 |
+
"protein_chains": [],
|
154 |
+
"dna_sequences": [],
|
155 |
+
"ligands": [],
|
156 |
+
"complex_name": ""
|
157 |
+
}
|
158 |
+
|
159 |
+
for entry in json_data:
|
160 |
+
components["complex_name"] = entry.get("name", "")
|
161 |
+
for seq in entry["sequences"]:
|
162 |
+
if "proteinChain" in seq:
|
163 |
+
components["protein_chains"].append({
|
164 |
+
"sequence": seq["proteinChain"]["sequence"],
|
165 |
+
"count": seq["proteinChain"]["count"]
|
166 |
+
})
|
167 |
+
elif "dnaSequence" in seq:
|
168 |
+
components["dna_sequences"].append({
|
169 |
+
"sequence": seq["dnaSequence"]["sequence"],
|
170 |
+
"count": seq["dnaSequence"]["count"]
|
171 |
+
})
|
172 |
+
elif "ligand" in seq:
|
173 |
+
components["ligands"].append({
|
174 |
+
"type": seq["ligand"]["ligand"],
|
175 |
+
"count": seq["ligand"]["count"]
|
176 |
+
})
|
177 |
+
return components
|
178 |
+
|
179 |
+
def create_protenix_json(input_data: Dict) -> List[Dict]:
|
180 |
+
"""Convert UI inputs to Protenix JSON format"""
|
181 |
+
sequences = []
|
182 |
+
|
183 |
+
for pc in input_data["protein_chains"]:
|
184 |
+
sequences.append({
|
185 |
+
"proteinChain": {
|
186 |
+
"sequence": pc["sequence"],
|
187 |
+
"count": pc["count"]
|
188 |
+
}
|
189 |
+
})
|
190 |
+
|
191 |
+
for dna in input_data["dna_sequences"]:
|
192 |
+
sequences.append({
|
193 |
+
"dnaSequence": {
|
194 |
+
"sequence": dna["sequence"],
|
195 |
+
"count": dna["count"]
|
196 |
+
}
|
197 |
+
})
|
198 |
+
|
199 |
+
for lig in input_data["ligands"]:
|
200 |
+
sequences.append({
|
201 |
+
"ligand": {
|
202 |
+
"ligand": lig["type"],
|
203 |
+
"count": lig["count"]
|
204 |
+
}
|
205 |
+
})
|
206 |
+
|
207 |
+
return [{
|
208 |
+
"sequences": sequences,
|
209 |
+
"name": input_data["complex_name"]
|
210 |
+
}]
|
211 |
+
|
212 |
+
|
213 |
+
|
214 |
+
def predict_structure(input_collector: dict):
|
215 |
+
"""Handle both input types"""
|
216 |
+
os.makedirs("./output", exist_ok=True)
|
217 |
+
|
218 |
+
# Generate random filename with timestamp
|
219 |
+
random_name = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
|
220 |
+
save_path = os.path.join("./output", f"{random_name}.json")
|
221 |
+
|
222 |
+
print(input_collector)
|
223 |
+
|
224 |
+
# Handle JSON input
|
225 |
+
if input_collector["json"]:
|
226 |
+
# Handle different input types
|
227 |
+
if isinstance(input_collector["json"], str): # Example JSON case (file path)
|
228 |
+
input_data = json.load(open(input_collector["json"]))
|
229 |
+
elif hasattr(input_collector["json"], "name"): # File upload case
|
230 |
+
input_data = json.load(open(input_collector["json"].name))
|
231 |
+
else: # Direct JSON data case
|
232 |
+
input_data = input_collector["json"]
|
233 |
+
else: # Manual input case
|
234 |
+
input_data = create_protenix_json(input_collector["data"])
|
235 |
+
|
236 |
+
with open(save_path, "w") as f:
|
237 |
+
json.dump(input_data, f, indent=2)
|
238 |
+
|
239 |
+
if input_data==example_json and input_collector['watermark']==True:
|
240 |
+
configs.saved_path = './output/example_output/'
|
241 |
+
else:
|
242 |
+
# run msa
|
243 |
+
json_file = update_infer_json(save_path, './output', True)
|
244 |
+
|
245 |
+
# Run prediction
|
246 |
+
configs.input_json_path = json_file
|
247 |
+
configs.watermark = input_collector['watermark']
|
248 |
+
configs.saved_path = os.path.join("./output/", random_name)
|
249 |
+
infer_predict(runner, configs)
|
250 |
+
#saved_path = os.path.join('./output', f"{sample_name}", f"seed_{seed}", 'predictions')
|
251 |
+
|
252 |
+
# Generate visualizations
|
253 |
+
pred_loader = PredictionLoader(os.path.join(configs.saved_path, 'predictions'))
|
254 |
+
view3d, cif_path = plot_3d(pred_loader=pred_loader)
|
255 |
+
if configs.watermark:
|
256 |
+
pred_loader = PredictionLoader(os.path.join(configs.saved_path, 'predictions_orig'))
|
257 |
+
view3d_orig, _ = plot_3d(pred_loader=pred_loader)
|
258 |
+
align_pdb_files(view3d, view3d_orig)
|
259 |
+
view3d = [view3d, view3d_orig]
|
260 |
+
plot_best_confidence_measure(os.path.join(configs.saved_path, 'predictions'))
|
261 |
+
confidence_img_path = os.path.join(os.path.join(configs.saved_path, 'predictions'), "best_sample_confidence.png")
|
262 |
+
|
263 |
+
return view3d, confidence_img_path, cif_path
|
264 |
+
|
265 |
+
|
266 |
+
logger = logging.getLogger(__name__)
|
267 |
+
LOG_FORMAT = "%(asctime)s,%(msecs)-3d %(levelname)-8s [%(filename)s:%(lineno)s %(funcName)s] %(message)s"
|
268 |
+
logging.basicConfig(
|
269 |
+
format=LOG_FORMAT,
|
270 |
+
level=logging.INFO,
|
271 |
+
datefmt="%Y-%m-%d %H:%M:%S",
|
272 |
+
filemode="w",
|
273 |
+
)
|
274 |
+
configs_base["use_deepspeed_evo_attention"] = (
|
275 |
+
os.environ.get("USE_DEEPSPEED_EVO_ATTTENTION", False) == "true"
|
276 |
+
)
|
277 |
+
arg_str = "--seeds 101 --dump_dir ./output --input_json_path ./examples/example.json --model.N_cycle 10 --sample_diffusion.N_sample 5 --sample_diffusion.N_step 200 "
|
278 |
+
configs = {**configs_base, **{"data": data_configs}, **inference_configs}
|
279 |
+
configs = parse_configs(
|
280 |
+
configs=configs,
|
281 |
+
arg_str=arg_str,
|
282 |
+
fill_required_with_null=True,
|
283 |
+
)
|
284 |
+
configs.load_checkpoint_path='./checkpoint.pt'
|
285 |
+
download_infercence_cache(configs, model_version="v0.2.0")
|
286 |
+
runner = InferenceRunner(configs)
|
287 |
+
add_watermark = gr.Checkbox(label="Add Watermark", value=True)
|
288 |
+
add_watermark1 = gr.Checkbox(label="Add Watermark", value=True)
|
289 |
+
|
290 |
+
with gr.Blocks(title="FoldMark", css=custom_css) as demo:
|
291 |
+
with gr.Row():
|
292 |
+
# Use a Column to align the logo and title horizontally
|
293 |
+
gr.Image(value="./assets/foldmark_head.png", elem_id="logo", label="Logo", height=150, show_label=False)
|
294 |
+
|
295 |
+
with gr.Tab("Structure Predictor (JSON Upload)"):
|
296 |
+
# First create the upload component
|
297 |
+
json_upload = gr.File(label="Upload JSON", file_types=[".json"])
|
298 |
+
|
299 |
+
# Then create the example component that references it
|
300 |
+
gr.Examples(
|
301 |
+
examples=[[EXAMPLE_PATH]],
|
302 |
+
inputs=[json_upload],
|
303 |
+
label="Click to use example JSON:",
|
304 |
+
examples_per_page=1
|
305 |
+
)
|
306 |
+
|
307 |
+
# Rest of the components
|
308 |
+
upload_name = gr.Textbox(label="Complex Name (optional)")
|
309 |
+
upload_output = gr.JSON(label="Parsed Components")
|
310 |
+
|
311 |
+
json_upload.upload(
|
312 |
+
fn=lambda f: parse_json_input(json.load(open(f.name))),
|
313 |
+
inputs=json_upload,
|
314 |
+
outputs=upload_output
|
315 |
+
)
|
316 |
+
|
317 |
+
# Shared prediction components
|
318 |
+
with gr.Row():
|
319 |
+
add_watermark.render()
|
320 |
+
submit_btn = gr.Button("Predict Structure", variant="primary")
|
321 |
+
#structure_view = gr.HTML(label="3D Visualization")
|
322 |
+
|
323 |
+
with gr.Row():
|
324 |
+
view3d = Molecule3D(label="3D Visualization", reps=reps)
|
325 |
+
legend = gr.Markdown("""
|
326 |
+
**Color Legend:**
|
327 |
+
|
328 |
+
- <span style="color:grey">Unwatermarked Structure</span>
|
329 |
+
- <span style="color:cyan">Watermarked Structure</span>
|
330 |
+
""")
|
331 |
+
with gr.Row():
|
332 |
+
cif_file = gr.File(label="Download CIF File")
|
333 |
+
with gr.Row():
|
334 |
+
confidence_plot_image = gr.Image(label="Confidence Measures")
|
335 |
+
|
336 |
+
input_collector = gr.JSON(visible=False)
|
337 |
+
|
338 |
+
# Map inputs to a dictionary
|
339 |
+
submit_btn.click(
|
340 |
+
fn=lambda j, w: {"json": j, "watermark": w},
|
341 |
+
inputs=[json_upload, add_watermark],
|
342 |
+
outputs=input_collector
|
343 |
+
).then(
|
344 |
+
fn=predict_structure,
|
345 |
+
inputs=input_collector,
|
346 |
+
outputs=[view3d, confidence_plot_image, cif_file]
|
347 |
+
)
|
348 |
+
|
349 |
+
gr.Markdown("""
|
350 |
+
The example of the uploaded json file for structure prediction.
|
351 |
+
<pre>
|
352 |
+
[{
|
353 |
+
"sequences": [
|
354 |
+
{
|
355 |
+
"proteinChain": {
|
356 |
+
"sequence": "MAEVIRSSAFWRSFPIFEEFDSETLCELSGIASYRKWSAGTVIFQRGDQGDYMIVVVSGRIKLSLFTPQGRELMLRQHEAGALFGEMALLDGQPRSADATAVTAAEGYVIGKKDFLALITQRPKTAEAVIRFLCAQLRDTTDRLETIALYDLNARVARFFLATLRQIHGSEMPQSANLRLTLSQTDIASILGASRPKVNRAILSLEESGAIKRADGIICCNVGRLLSIADPEEDLEHHHHHHHH",
|
357 |
+
"count": 2
|
358 |
+
}
|
359 |
+
},
|
360 |
+
{
|
361 |
+
"dnaSequence": {
|
362 |
+
"sequence": "CTAGGTAACATTACTCGCG",
|
363 |
+
"count": 2
|
364 |
+
}
|
365 |
+
},
|
366 |
+
{
|
367 |
+
"dnaSequence": {
|
368 |
+
"sequence": "GCGAGTAATGTTAC",
|
369 |
+
"count": 2
|
370 |
+
}
|
371 |
+
},
|
372 |
+
{
|
373 |
+
"ligand": {
|
374 |
+
"ligand": "CCD_PCG",
|
375 |
+
"count": 2
|
376 |
+
}
|
377 |
+
}
|
378 |
+
],
|
379 |
+
"name": "7pzb"
|
380 |
+
}]
|
381 |
+
</pre>
|
382 |
+
""")
|
383 |
+
|
384 |
+
with gr.Tab("Structure Predictor (Manual Input)"):
|
385 |
+
with gr.Row():
|
386 |
+
complex_name = gr.Textbox(label="Complex Name")
|
387 |
+
|
388 |
+
# Replace gr.Group with gr.Accordion
|
389 |
+
with gr.Accordion(label="Protein Chains", open=True):
|
390 |
+
protein_chains = gr.Dataframe(
|
391 |
+
headers=["Sequence", "Count"],
|
392 |
+
datatype=["str", "number"],
|
393 |
+
row_count=1,
|
394 |
+
col_count=(2, "fixed")
|
395 |
+
)
|
396 |
+
|
397 |
+
# Repeat for other groups
|
398 |
+
with gr.Accordion(label="DNA Sequences", open=True):
|
399 |
+
dna_sequences = gr.Dataframe(
|
400 |
+
headers=["Sequence", "Count"],
|
401 |
+
datatype=["str", "number"],
|
402 |
+
row_count=1
|
403 |
+
)
|
404 |
+
|
405 |
+
with gr.Accordion(label="Ligands", open=True):
|
406 |
+
ligands = gr.Dataframe(
|
407 |
+
headers=["Ligand Type", "Count"],
|
408 |
+
datatype=["str", "number"],
|
409 |
+
row_count=1
|
410 |
+
)
|
411 |
+
|
412 |
+
manual_output = gr.JSON(label="Generated JSON")
|
413 |
+
|
414 |
+
complex_name.change(
|
415 |
+
fn=lambda x: {"complex_name": x},
|
416 |
+
inputs=complex_name,
|
417 |
+
outputs=manual_output
|
418 |
+
)
|
419 |
+
|
420 |
+
# Shared prediction components
|
421 |
+
with gr.Row():
|
422 |
+
add_watermark1.render()
|
423 |
+
submit_btn = gr.Button("Predict Structure", variant="primary")
|
424 |
+
#structure_view = gr.HTML(label="3D Visualization")
|
425 |
+
|
426 |
+
with gr.Row():
|
427 |
+
view3d = Molecule3D(label="3D Visualization", reps=reps)
|
428 |
+
legend = gr.Markdown("""
|
429 |
+
**Color Legend:**
|
430 |
+
|
431 |
+
- <span style="color:grey">Unwatermarked Structure</span>
|
432 |
+
- <span style="color:cyan">Watermarked Structure</span>
|
433 |
+
""")
|
434 |
+
with gr.Row():
|
435 |
+
cif_file = gr.File(label="Download CIF File")
|
436 |
+
with gr.Row():
|
437 |
+
confidence_plot_image = gr.Image(label="Confidence Measures")
|
438 |
+
|
439 |
+
input_collector = gr.JSON(visible=False)
|
440 |
+
|
441 |
+
# Map inputs to a dictionary
|
442 |
+
submit_btn.click(
|
443 |
+
fn=lambda c, p, d, l, w: {"data": {"complex_name": c, "protein_chains": p, "dna_sequences": d, "ligands": l}, "watermark": w},
|
444 |
+
inputs=[complex_name, protein_chains, dna_sequences, ligands, add_watermark1],
|
445 |
+
outputs=input_collector
|
446 |
+
).then(
|
447 |
+
fn=predict_structure,
|
448 |
+
inputs=input_collector,
|
449 |
+
outputs=[view3d, confidence_plot_image, cif_file]
|
450 |
+
)
|
451 |
+
|
452 |
+
def is_watermarked(file):
|
453 |
+
# Generate a unique subdirectory and filename
|
454 |
+
unique_id = str(uuid.uuid4())
|
455 |
+
subdir = os.path.join('./output', unique_id)
|
456 |
+
os.makedirs(subdir, exist_ok=True)
|
457 |
+
filename = f"{unique_id}.cif"
|
458 |
+
file_path = os.path.join(subdir, filename)
|
459 |
+
|
460 |
+
# Save the uploaded file to the new location
|
461 |
+
shutil.copy(file.name, file_path)
|
462 |
+
|
463 |
+
# Call your processing functions
|
464 |
+
configs.process_success = process_data(subdir)
|
465 |
+
configs.subdir = subdir
|
466 |
+
result = infer_detect(runner, configs)
|
467 |
+
# This function should return 'Watermarked' or 'Not Watermarked'
|
468 |
+
temp_pdb_path = convert_cif_to_pdb(file_path)
|
469 |
+
if result==False:
|
470 |
+
return "Not Watermarked", temp_pdb_path
|
471 |
+
else:
|
472 |
+
return "Watermarked", temp_pdb_path
|
473 |
+
|
474 |
+
|
475 |
+
|
476 |
+
with gr.Tab("Watermark Detector"):
|
477 |
+
# First create the upload component
|
478 |
+
cif_upload = gr.File(label="Upload .cif", file_types=["..cif"])
|
479 |
+
|
480 |
+
with gr.Row():
|
481 |
+
cif_3d_view = Molecule3D(label="3D Visualization of Input", reps=reps)
|
482 |
+
|
483 |
+
# Prediction output
|
484 |
+
prediction_output = gr.Textbox(label="Prediction")
|
485 |
+
|
486 |
+
# Define the interaction
|
487 |
+
cif_upload.change(is_watermarked, inputs=cif_upload, outputs=[prediction_output, cif_3d_view])
|
488 |
+
|
489 |
+
# Example files
|
490 |
+
example_files = [
|
491 |
+
"./examples/7r6r_watermarked.cif",
|
492 |
+
"./examples/7pzb_unwatermarked.cif"
|
493 |
+
]
|
494 |
+
|
495 |
+
gr.Examples(examples=example_files, inputs=cif_upload)
|
496 |
+
|
497 |
+
|
498 |
+
|
499 |
+
|
500 |
+
|
501 |
+
|
502 |
+
if __name__ == "__main__":
|
503 |
+
|
504 |
+
demo.launch(
|
505 |
+
server_name="0.0.0.0",
|
506 |
+
server_port=7860,
|
507 |
+
show_api=False,
|
508 |
+
share=True
|
509 |
+
)
|
assets/foldmark.png
ADDED
![]() |
assets/foldmark_head.png
ADDED
![]() |
configs/__init__.py
ADDED
File without changes
|
configs/configs_base.py
ADDED
@@ -0,0 +1,409 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2024 ByteDance and/or its affiliates.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
# pylint: disable=C0114,C0301
|
16 |
+
from protenix.config.extend_types import (
|
17 |
+
GlobalConfigValue,
|
18 |
+
ListValue,
|
19 |
+
RequiredValue,
|
20 |
+
ValueMaybeNone,
|
21 |
+
)
|
22 |
+
|
23 |
+
basic_configs = {
|
24 |
+
"project": RequiredValue(str),
|
25 |
+
"run_name": RequiredValue(str),
|
26 |
+
"base_dir": RequiredValue(str),
|
27 |
+
# training
|
28 |
+
"eval_interval": RequiredValue(int),
|
29 |
+
"log_interval": RequiredValue(int),
|
30 |
+
"checkpoint_interval": -1,
|
31 |
+
"eval_first": False, # run evaluate() before training steps
|
32 |
+
"iters_to_accumulate": 1,
|
33 |
+
"eval_only": False,
|
34 |
+
"load_checkpoint_path": "",
|
35 |
+
"load_ema_checkpoint_path": "",
|
36 |
+
"load_strict": False,
|
37 |
+
"load_params_only": True,
|
38 |
+
"skip_load_step": False,
|
39 |
+
"skip_load_optimizer": False,
|
40 |
+
"skip_load_scheduler": False,
|
41 |
+
"train_confidence_only": False,
|
42 |
+
"use_wandb": True,
|
43 |
+
"wandb_id": "",
|
44 |
+
"seed": 66,
|
45 |
+
"deterministic": False,
|
46 |
+
"ema_decay": -1.0,
|
47 |
+
"eval_ema_only": False, # whether wandb only tracking ema checkpoint metrics
|
48 |
+
"ema_mutable_param_keywords": [""],
|
49 |
+
}
|
50 |
+
data_configs = {
|
51 |
+
# Data
|
52 |
+
"train_crop_size": 256,
|
53 |
+
"test_max_n_token": -1,
|
54 |
+
"train_lig_atom_rename": False,
|
55 |
+
"train_shuffle_mols": False,
|
56 |
+
"train_shuffle_sym_ids": False,
|
57 |
+
"test_lig_atom_rename": False,
|
58 |
+
"test_shuffle_mols": False,
|
59 |
+
"test_shuffle_sym_ids": False,
|
60 |
+
}
|
61 |
+
optim_configs = {
|
62 |
+
# Optim
|
63 |
+
"lr": 0.0018,
|
64 |
+
"lr_scheduler": "af3",
|
65 |
+
"warmup_steps": 10,
|
66 |
+
"max_steps": RequiredValue(int),
|
67 |
+
"min_lr_ratio": 0.1,
|
68 |
+
"decay_every_n_steps": 50000,
|
69 |
+
"grad_clip_norm": 10,
|
70 |
+
# Optim - Adam
|
71 |
+
"adam": {
|
72 |
+
"beta1": 0.9,
|
73 |
+
"beta2": 0.95,
|
74 |
+
"weight_decay": 1e-8,
|
75 |
+
"lr": GlobalConfigValue("lr"),
|
76 |
+
"use_adamw": False,
|
77 |
+
},
|
78 |
+
# Optim - LRScheduler
|
79 |
+
"af3_lr_scheduler": {
|
80 |
+
"warmup_steps": GlobalConfigValue("warmup_steps"),
|
81 |
+
"decay_every_n_steps": GlobalConfigValue("decay_every_n_steps"),
|
82 |
+
"decay_factor": 0.95,
|
83 |
+
"lr": GlobalConfigValue("lr"),
|
84 |
+
},
|
85 |
+
}
|
86 |
+
model_configs = {
|
87 |
+
# Model
|
88 |
+
"c_s": 384,
|
89 |
+
"c_z": 128,
|
90 |
+
"c_s_inputs": 449, # c_s_inputs == c_token + 32 + 32 + 1
|
91 |
+
"watermark": 32,
|
92 |
+
"c_atom": 128,
|
93 |
+
"c_atompair": 16,
|
94 |
+
"c_token": 384,
|
95 |
+
"n_blocks": 48,
|
96 |
+
"max_atoms_per_token": 24, # DNA G max_atoms = 23
|
97 |
+
"no_bins": 64,
|
98 |
+
"sigma_data": 16.0,
|
99 |
+
"diffusion_batch_size": 48,
|
100 |
+
"diffusion_chunk_size": ValueMaybeNone(4), # chunksize of diffusion_batch_size
|
101 |
+
"blocks_per_ckpt": ValueMaybeNone(
|
102 |
+
1
|
103 |
+
), # NOTE: Number of blocks in each activation checkpoint, if None, no checkpointing is performed.
|
104 |
+
# switch of kernels
|
105 |
+
"use_memory_efficient_kernel": False,
|
106 |
+
"use_deepspeed_evo_attention": True,
|
107 |
+
"use_flash": False,
|
108 |
+
"use_lma": False,
|
109 |
+
"use_xformer": False,
|
110 |
+
"find_unused_parameters": False,
|
111 |
+
"dtype": "bf16", # default training dtype: bf16
|
112 |
+
"loss_metrics_sparse_enable": True, # the swicth for both sparse lddt metrics and sparse bond/smooth lddt loss
|
113 |
+
"skip_amp": {
|
114 |
+
"sample_diffusion": True,
|
115 |
+
"confidence_head": True,
|
116 |
+
"sample_diffusion_training": True,
|
117 |
+
"loss": True,
|
118 |
+
},
|
119 |
+
"infer_setting": {
|
120 |
+
"chunk_size": ValueMaybeNone(
|
121 |
+
64
|
122 |
+
), # should set to null for normal training and small dataset eval [for efficiency]
|
123 |
+
"sample_diffusion_chunk_size": ValueMaybeNone(
|
124 |
+
1
|
125 |
+
), # should set to null for normal training and small dataset eval [for efficiency]
|
126 |
+
"lddt_metrics_sparse_enable": GlobalConfigValue("loss_metrics_sparse_enable"),
|
127 |
+
"lddt_metrics_chunk_size": ValueMaybeNone(
|
128 |
+
1
|
129 |
+
), # only works if loss_metrics_sparse_enable, can set as default 1
|
130 |
+
},
|
131 |
+
"train_noise_sampler": {
|
132 |
+
"p_mean": -1.2,
|
133 |
+
"p_std": 1.5,
|
134 |
+
"sigma_data": 16.0, # NOTE: in EDM, this is 1.0
|
135 |
+
},
|
136 |
+
"inference_noise_scheduler": {
|
137 |
+
"s_max": 160.0,
|
138 |
+
"s_min": 4e-4,
|
139 |
+
"rho": 7,
|
140 |
+
"sigma_data": 16.0, # NOTE: in EDM, this is 1.0
|
141 |
+
},
|
142 |
+
"sample_diffusion": {
|
143 |
+
"gamma0": 0.8,
|
144 |
+
"gamma_min": 1.0,
|
145 |
+
"noise_scale_lambda": 1.003,
|
146 |
+
"step_scale_eta": 1.5,
|
147 |
+
"N_step": 200,
|
148 |
+
"N_sample": 5,
|
149 |
+
"N_step_mini_rollout": 20,
|
150 |
+
"N_sample_mini_rollout": 1,
|
151 |
+
},
|
152 |
+
"model": {
|
153 |
+
"N_model_seed": 1, # for inference
|
154 |
+
"N_cycle": 4,
|
155 |
+
"input_embedder": {
|
156 |
+
"c_atom": GlobalConfigValue("c_atom"),
|
157 |
+
"c_atompair": GlobalConfigValue("c_atompair"),
|
158 |
+
"c_token": GlobalConfigValue("c_token"),
|
159 |
+
},
|
160 |
+
"relative_position_encoding": {
|
161 |
+
"r_max": 32,
|
162 |
+
"s_max": 2,
|
163 |
+
"c_z": GlobalConfigValue("c_z"),
|
164 |
+
},
|
165 |
+
"template_embedder": {
|
166 |
+
"c": 64,
|
167 |
+
"c_z": GlobalConfigValue("c_z"),
|
168 |
+
"n_blocks": 0,
|
169 |
+
"dropout": 0.25,
|
170 |
+
"blocks_per_ckpt": GlobalConfigValue("blocks_per_ckpt"),
|
171 |
+
},
|
172 |
+
"msa_module": {
|
173 |
+
"c_m": 64,
|
174 |
+
"c_z": GlobalConfigValue("c_z"),
|
175 |
+
"c_s_inputs": GlobalConfigValue("c_s_inputs"),
|
176 |
+
"n_blocks": 4,
|
177 |
+
"msa_dropout": 0.15,
|
178 |
+
"pair_dropout": 0.25,
|
179 |
+
"blocks_per_ckpt": GlobalConfigValue("blocks_per_ckpt"),
|
180 |
+
},
|
181 |
+
"pairformer": {
|
182 |
+
"n_blocks": GlobalConfigValue("n_blocks"),
|
183 |
+
"c_z": GlobalConfigValue("c_z"),
|
184 |
+
"c_s": GlobalConfigValue("c_s"),
|
185 |
+
"n_heads": 16,
|
186 |
+
"dropout": 0.25,
|
187 |
+
"blocks_per_ckpt": GlobalConfigValue("blocks_per_ckpt"),
|
188 |
+
},
|
189 |
+
"pairformer_encoder": {
|
190 |
+
"n_blocks": 6,
|
191 |
+
"c_z": GlobalConfigValue("c_z"),
|
192 |
+
"c_s": GlobalConfigValue("c_s"),
|
193 |
+
"n_heads": 16,
|
194 |
+
"dropout": 0.25,
|
195 |
+
"blocks_per_ckpt": GlobalConfigValue("blocks_per_ckpt"),
|
196 |
+
},
|
197 |
+
"pairformer_decoder": {
|
198 |
+
"n_blocks": 6,
|
199 |
+
"c_z": GlobalConfigValue("c_z"),
|
200 |
+
"c_s": GlobalConfigValue("c_s"),
|
201 |
+
"n_heads": 16,
|
202 |
+
"dropout": 0.25,
|
203 |
+
"blocks_per_ckpt": GlobalConfigValue("blocks_per_ckpt"),
|
204 |
+
},
|
205 |
+
"diffusion_module": {
|
206 |
+
"use_fine_grained_checkpoint": True,
|
207 |
+
"sigma_data": GlobalConfigValue("sigma_data"),
|
208 |
+
"c_token": 768,
|
209 |
+
"c_atom": GlobalConfigValue("c_atom"),
|
210 |
+
"c_atompair": GlobalConfigValue("c_atompair"),
|
211 |
+
"c_z": GlobalConfigValue("c_z"),
|
212 |
+
"c_s": GlobalConfigValue("c_s"),
|
213 |
+
"c_s_inputs": GlobalConfigValue("c_s_inputs"),
|
214 |
+
"initialization": {
|
215 |
+
"zero_init_condition_transition": False,
|
216 |
+
"zero_init_atom_encoder_residual_linear": False,
|
217 |
+
"he_normal_init_atom_encoder_small_mlp": False,
|
218 |
+
"he_normal_init_atom_encoder_output": False,
|
219 |
+
"glorot_init_self_attention": False,
|
220 |
+
"zero_init_adaln": True,
|
221 |
+
"zero_init_residual_condition_transition": False,
|
222 |
+
"zero_init_dit_output": True,
|
223 |
+
"zero_init_atom_decoder_linear": False,
|
224 |
+
},
|
225 |
+
"atom_encoder": {
|
226 |
+
"n_blocks": 3,
|
227 |
+
"n_heads": 4,
|
228 |
+
},
|
229 |
+
"transformer": {
|
230 |
+
"n_blocks": 24,
|
231 |
+
"n_heads": 16,
|
232 |
+
},
|
233 |
+
"atom_decoder": {
|
234 |
+
"n_blocks": 3,
|
235 |
+
"n_heads": 4,
|
236 |
+
},
|
237 |
+
"blocks_per_ckpt": GlobalConfigValue("blocks_per_ckpt"),
|
238 |
+
},
|
239 |
+
"diffusion_module_encoder_decoder": {
|
240 |
+
"use_fine_grained_checkpoint": True,
|
241 |
+
"sigma_data": GlobalConfigValue("sigma_data"),
|
242 |
+
"c_token": 768,
|
243 |
+
"c_atom": GlobalConfigValue("c_atom"),
|
244 |
+
"c_atompair": GlobalConfigValue("c_atompair"),
|
245 |
+
"c_z": GlobalConfigValue("c_z"),
|
246 |
+
"c_s": GlobalConfigValue("c_s"),
|
247 |
+
"c_s_inputs": GlobalConfigValue("c_s_inputs"),
|
248 |
+
"watermark": GlobalConfigValue("watermark"),
|
249 |
+
"initialization": {
|
250 |
+
"zero_init_condition_transition": False,
|
251 |
+
"zero_init_atom_encoder_residual_linear": False,
|
252 |
+
"he_normal_init_atom_encoder_small_mlp": False,
|
253 |
+
"he_normal_init_atom_encoder_output": False,
|
254 |
+
"glorot_init_self_attention": False,
|
255 |
+
"zero_init_adaln": True,
|
256 |
+
"zero_init_residual_condition_transition": False,
|
257 |
+
"zero_init_dit_output": True,
|
258 |
+
"zero_init_atom_decoder_linear": False,
|
259 |
+
},
|
260 |
+
"atom_encoder": {
|
261 |
+
"n_blocks": 3,
|
262 |
+
"n_heads": 4,
|
263 |
+
},
|
264 |
+
"transformer": {
|
265 |
+
"n_blocks": 6,
|
266 |
+
"n_heads": 16,
|
267 |
+
},
|
268 |
+
"atom_decoder": {
|
269 |
+
"n_blocks": 3,
|
270 |
+
"n_heads": 4,
|
271 |
+
},
|
272 |
+
"blocks_per_ckpt": GlobalConfigValue("blocks_per_ckpt"),
|
273 |
+
},
|
274 |
+
"confidence_head": {
|
275 |
+
"c_z": GlobalConfigValue("c_z"),
|
276 |
+
"c_s": GlobalConfigValue("c_s"),
|
277 |
+
"c_s_inputs": GlobalConfigValue("c_s_inputs"),
|
278 |
+
"n_blocks": 4,
|
279 |
+
"max_atoms_per_token": GlobalConfigValue("max_atoms_per_token"),
|
280 |
+
"pairformer_dropout": 0.0,
|
281 |
+
"blocks_per_ckpt": GlobalConfigValue("blocks_per_ckpt"),
|
282 |
+
"distance_bin_start": 3.25,
|
283 |
+
"distance_bin_end": 52.0,
|
284 |
+
"distance_bin_step": 1.25,
|
285 |
+
"stop_gradient": True,
|
286 |
+
},
|
287 |
+
"distogram_head": {
|
288 |
+
"c_z": GlobalConfigValue("c_z"),
|
289 |
+
"no_bins": GlobalConfigValue("no_bins"),
|
290 |
+
},
|
291 |
+
},
|
292 |
+
}
|
293 |
+
perm_configs = {
|
294 |
+
# Chain and Atom Permutation
|
295 |
+
"chain_permutation": {
|
296 |
+
"train": {
|
297 |
+
"mini_rollout": True,
|
298 |
+
"diffusion_sample": False,
|
299 |
+
},
|
300 |
+
"test": {
|
301 |
+
"diffusion_sample": True,
|
302 |
+
},
|
303 |
+
"permute_by_pocket": True,
|
304 |
+
"configs": {
|
305 |
+
"use_center_rmsd": False,
|
306 |
+
"find_gt_anchor_first": False,
|
307 |
+
"accept_it_as_it_is": False,
|
308 |
+
"enumerate_all_anchor_pairs": False,
|
309 |
+
"selection_metric": "aligned_rmsd",
|
310 |
+
},
|
311 |
+
},
|
312 |
+
"atom_permutation": {
|
313 |
+
"train": {
|
314 |
+
"mini_rollout": True,
|
315 |
+
"diffusion_sample": False,
|
316 |
+
},
|
317 |
+
"test": {
|
318 |
+
"diffusion_sample": True,
|
319 |
+
},
|
320 |
+
"permute_by_pocket": True,
|
321 |
+
"global_align_wo_symmetric_atom": False,
|
322 |
+
},
|
323 |
+
}
|
324 |
+
loss_configs = {
|
325 |
+
"loss": {
|
326 |
+
"diffusion_lddt_chunk_size": ValueMaybeNone(1),
|
327 |
+
"diffusion_bond_chunk_size": ValueMaybeNone(1),
|
328 |
+
"diffusion_chunk_size_outer": ValueMaybeNone(-1),
|
329 |
+
"diffusion_sparse_loss_enable": GlobalConfigValue("loss_metrics_sparse_enable"),
|
330 |
+
"diffusion_lddt_loss_dense": True, # only set true in initial training for training speed
|
331 |
+
"resolution": {"min": 0.1, "max": 4.0},
|
332 |
+
"weight": {
|
333 |
+
"alpha_confidence": 1e-4,
|
334 |
+
"alpha_pae": 0.0, # or 1 in finetuning stage 3
|
335 |
+
"alpha_except_pae": 1.0,
|
336 |
+
"alpha_diffusion": 4.0,
|
337 |
+
"alpha_distogram": 3e-2,
|
338 |
+
"alpha_bond": 0.0, # or 1 in finetuning stages
|
339 |
+
"smooth_lddt": 1.0, # or 0 in finetuning stages
|
340 |
+
"watermark": 1.0,
|
341 |
+
},
|
342 |
+
"plddt": {
|
343 |
+
"min_bin": 0,
|
344 |
+
"max_bin": 1.0,
|
345 |
+
"no_bins": 50,
|
346 |
+
"normalize": True,
|
347 |
+
"eps": 1e-6,
|
348 |
+
},
|
349 |
+
"pde": {
|
350 |
+
"min_bin": 0,
|
351 |
+
"max_bin": 32,
|
352 |
+
"no_bins": 64,
|
353 |
+
"eps": 1e-6,
|
354 |
+
},
|
355 |
+
"resolved": {
|
356 |
+
"eps": 1e-6,
|
357 |
+
},
|
358 |
+
"pae": {
|
359 |
+
"min_bin": 0,
|
360 |
+
"max_bin": 32,
|
361 |
+
"no_bins": 64,
|
362 |
+
"eps": 1e-6,
|
363 |
+
},
|
364 |
+
"diffusion": {
|
365 |
+
"mse": {
|
366 |
+
"weight_mse": 1 / 3,
|
367 |
+
"weight_dna": 5.0,
|
368 |
+
"weight_rna": 5.0,
|
369 |
+
"weight_ligand": 10.0,
|
370 |
+
"eps": 1e-6,
|
371 |
+
},
|
372 |
+
"bond": {
|
373 |
+
"eps": 1e-6,
|
374 |
+
},
|
375 |
+
"smooth_lddt": {
|
376 |
+
"eps": 1e-6,
|
377 |
+
},
|
378 |
+
},
|
379 |
+
"watermark": {
|
380 |
+
"eps": 1e-6,
|
381 |
+
},
|
382 |
+
"distogram": {
|
383 |
+
"min_bin": 2.3125,
|
384 |
+
"max_bin": 21.6875,
|
385 |
+
"no_bins": 64,
|
386 |
+
"eps": 1e-6,
|
387 |
+
},
|
388 |
+
},
|
389 |
+
"metrics": {
|
390 |
+
"lddt": {
|
391 |
+
"eps": 1e-6,
|
392 |
+
},
|
393 |
+
"complex_ranker_keys": ListValue(["plddt", "gpde", "ranking_score"]),
|
394 |
+
"chain_ranker_keys": ListValue(["chain_ptm", "chain_plddt"]),
|
395 |
+
"interface_ranker_keys": ListValue(
|
396 |
+
["chain_pair_iptm", "chain_pair_iptm_global", "chain_pair_plddt"]
|
397 |
+
),
|
398 |
+
"clash": {"af3_clash_threshold": 1.1, "vdw_clash_threshold": 0.75},
|
399 |
+
},
|
400 |
+
}
|
401 |
+
|
402 |
+
configs = {
|
403 |
+
**basic_configs,
|
404 |
+
**data_configs,
|
405 |
+
**optim_configs,
|
406 |
+
**model_configs,
|
407 |
+
**perm_configs,
|
408 |
+
**loss_configs,
|
409 |
+
}
|
configs/configs_data.py
ADDED
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2024 ByteDance and/or its affiliates.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
# pylint: disable=C0114,C0301
|
16 |
+
import os
|
17 |
+
from copy import deepcopy
|
18 |
+
|
19 |
+
from protenix.config.extend_types import GlobalConfigValue, ListValue
|
20 |
+
|
21 |
+
default_test_configs = {
|
22 |
+
"sampler_configs": {
|
23 |
+
"sampler_type": "uniform",
|
24 |
+
},
|
25 |
+
"cropping_configs": {
|
26 |
+
"method_weights": [
|
27 |
+
0.0, # ContiguousCropping
|
28 |
+
0.0, # SpatialCropping
|
29 |
+
1.0, # SpatialInterfaceCropping
|
30 |
+
],
|
31 |
+
"crop_size": -1,
|
32 |
+
},
|
33 |
+
"lig_atom_rename": GlobalConfigValue("test_lig_atom_rename"),
|
34 |
+
"shuffle_mols": GlobalConfigValue("test_shuffle_mols"),
|
35 |
+
"shuffle_sym_ids": GlobalConfigValue("test_shuffle_sym_ids"),
|
36 |
+
}
|
37 |
+
|
38 |
+
default_weighted_pdb_configs = {
|
39 |
+
"sampler_configs": {
|
40 |
+
"sampler_type": "weighted",
|
41 |
+
"beta_dict": {
|
42 |
+
"chain": 0.5,
|
43 |
+
"interface": 1,
|
44 |
+
},
|
45 |
+
"alpha_dict": {
|
46 |
+
"prot": 3,
|
47 |
+
"nuc": 3,
|
48 |
+
"ligand": 1,
|
49 |
+
},
|
50 |
+
"force_recompute_weight": True,
|
51 |
+
},
|
52 |
+
"cropping_configs": {
|
53 |
+
"method_weights": ListValue([0.2, 0.4, 0.4]),
|
54 |
+
"crop_size": GlobalConfigValue("train_crop_size"),
|
55 |
+
},
|
56 |
+
"sample_weight": 0.5,
|
57 |
+
"limits": -1,
|
58 |
+
"lig_atom_rename": GlobalConfigValue("train_lig_atom_rename"),
|
59 |
+
"shuffle_mols": GlobalConfigValue("train_shuffle_mols"),
|
60 |
+
"shuffle_sym_ids": GlobalConfigValue("train_shuffle_sym_ids"),
|
61 |
+
}
|
62 |
+
|
63 |
+
DATA_ROOT_DIR = "./"
|
64 |
+
|
65 |
+
# Use CCD cache created by scripts/gen_ccd_cache.py priority. (without date in filename)
|
66 |
+
# See: docs/prepare_data.md
|
67 |
+
# CCD_COMPONENTS_FILE_PATH = os.path.join(DATA_ROOT_DIR, "components.cif")
|
68 |
+
# CCD_COMPONENTS_RDKIT_MOL_FILE_PATH = os.path.join(
|
69 |
+
# DATA_ROOT_DIR, "components.cif.rdkit_mol.pkl"
|
70 |
+
# )
|
71 |
+
|
72 |
+
# if (not os.path.exists(CCD_COMPONENTS_FILE_PATH)) or (
|
73 |
+
# not os.path.exists(CCD_COMPONENTS_RDKIT_MOL_FILE_PATH)
|
74 |
+
# ):
|
75 |
+
CCD_COMPONENTS_FILE_PATH = os.path.join(DATA_ROOT_DIR, "components.v20240608.cif")
|
76 |
+
CCD_COMPONENTS_RDKIT_MOL_FILE_PATH = os.path.join(
|
77 |
+
DATA_ROOT_DIR, "components.v20240608.cif.rdkit_mol.pkl"
|
78 |
+
)
|
79 |
+
|
80 |
+
|
81 |
+
# This is a patch in inference stage for users that do not have root permission.
|
82 |
+
# If you run
|
83 |
+
# ```
|
84 |
+
# bash inference_demo.sh
|
85 |
+
# ```
|
86 |
+
# or
|
87 |
+
# ```
|
88 |
+
# protenix predict --input examples/example.json --out_dir ./output
|
89 |
+
# ````
|
90 |
+
# The checkpoint and the data cache will be downloaded to the current code directory.
|
91 |
+
if (not os.path.exists(CCD_COMPONENTS_FILE_PATH)) or (
|
92 |
+
not os.path.exists(CCD_COMPONENTS_RDKIT_MOL_FILE_PATH)
|
93 |
+
):
|
94 |
+
print("Try to find the ccd cache data in the code directory for inference.")
|
95 |
+
current_file_path = os.path.abspath(__file__)
|
96 |
+
current_directory = os.path.dirname(current_file_path)
|
97 |
+
code_directory = os.path.dirname(current_directory)
|
98 |
+
|
99 |
+
data_cache_dir = os.path.join(code_directory, "release_data/ccd_cache")
|
100 |
+
CCD_COMPONENTS_FILE_PATH = os.path.join(data_cache_dir, "components.cif")
|
101 |
+
CCD_COMPONENTS_RDKIT_MOL_FILE_PATH = os.path.join(
|
102 |
+
data_cache_dir, "components.cif.rdkit_mol.pkl"
|
103 |
+
)
|
104 |
+
if (not os.path.exists(CCD_COMPONENTS_FILE_PATH)) or (
|
105 |
+
not os.path.exists(CCD_COMPONENTS_RDKIT_MOL_FILE_PATH)
|
106 |
+
):
|
107 |
+
|
108 |
+
CCD_COMPONENTS_FILE_PATH = os.path.join(
|
109 |
+
data_cache_dir, "components.v20240608.cif"
|
110 |
+
)
|
111 |
+
CCD_COMPONENTS_RDKIT_MOL_FILE_PATH = os.path.join(
|
112 |
+
data_cache_dir, "components.v20240608.cif.rdkit_mol.pkl"
|
113 |
+
)
|
114 |
+
|
115 |
+
data_configs = {
|
116 |
+
"num_dl_workers": 16,
|
117 |
+
"epoch_size": 10000,
|
118 |
+
"train_ref_pos_augment": True,
|
119 |
+
"test_ref_pos_augment": True,
|
120 |
+
"train_sets": ListValue(["weightedPDB_before2109_wopb_nometalc_0925"]),
|
121 |
+
"train_sampler": {
|
122 |
+
"train_sample_weights": ListValue([1.0]),
|
123 |
+
"sampler_type": "weighted",
|
124 |
+
},
|
125 |
+
"test_sets": ListValue(["recentPDB_1536_sample384_0925"]),
|
126 |
+
"weightedPDB_before2109_wopb_nometalc_0925": {
|
127 |
+
"base_info": {
|
128 |
+
"mmcif_dir": os.path.join(DATA_ROOT_DIR, "mmcif"),
|
129 |
+
"bioassembly_dict_dir": os.path.join(DATA_ROOT_DIR, "mmcif_bioassembly"),
|
130 |
+
"indices_fpath": os.path.join(
|
131 |
+
DATA_ROOT_DIR,
|
132 |
+
"indices/weightedPDB_indices_before_2021-09-30_wo_posebusters_resolution_below_9.csv.gz",
|
133 |
+
),
|
134 |
+
"pdb_list": "",
|
135 |
+
"random_sample_if_failed": True,
|
136 |
+
"max_n_token": -1, # can be used for removing data with too many tokens.
|
137 |
+
"use_reference_chains_only": False,
|
138 |
+
"exclusion": { # do not sample the data based on ions.
|
139 |
+
"mol_1_type": ListValue(["ions"]),
|
140 |
+
"mol_2_type": ListValue(["ions"]),
|
141 |
+
},
|
142 |
+
},
|
143 |
+
**deepcopy(default_weighted_pdb_configs),
|
144 |
+
},
|
145 |
+
"recentPDB_1536_sample384_0925": {
|
146 |
+
"base_info": {
|
147 |
+
"mmcif_dir": os.path.join(DATA_ROOT_DIR, "mmcif"),
|
148 |
+
"bioassembly_dict_dir": os.path.join(
|
149 |
+
DATA_ROOT_DIR, "recentPDB_bioassembly"
|
150 |
+
),
|
151 |
+
"indices_fpath": os.path.join(
|
152 |
+
DATA_ROOT_DIR, "indices/recentPDB_low_homology_maxtoken1536.csv"
|
153 |
+
),
|
154 |
+
"pdb_list": os.path.join(
|
155 |
+
DATA_ROOT_DIR,
|
156 |
+
"indices/recentPDB_low_homology_maxtoken1024_sample384_pdb_id.txt",
|
157 |
+
),
|
158 |
+
"max_n_token": GlobalConfigValue("test_max_n_token"), # filter data
|
159 |
+
"sort_by_n_token": False,
|
160 |
+
"group_by_pdb_id": True,
|
161 |
+
"find_eval_chain_interface": True,
|
162 |
+
},
|
163 |
+
**deepcopy(default_test_configs),
|
164 |
+
},
|
165 |
+
"posebusters_0925": {
|
166 |
+
"base_info": {
|
167 |
+
"mmcif_dir": os.path.join(DATA_ROOT_DIR, "posebusters_mmcif"),
|
168 |
+
"bioassembly_dict_dir": os.path.join(
|
169 |
+
DATA_ROOT_DIR, "posebusters_bioassembly"
|
170 |
+
),
|
171 |
+
"indices_fpath": os.path.join(
|
172 |
+
DATA_ROOT_DIR, "indices/posebusters_indices_mainchain_interface.csv"
|
173 |
+
),
|
174 |
+
"pdb_list": "",
|
175 |
+
"find_pocket": True,
|
176 |
+
"find_all_pockets": False,
|
177 |
+
"max_n_token": GlobalConfigValue("test_max_n_token"), # filter data
|
178 |
+
},
|
179 |
+
**deepcopy(default_test_configs),
|
180 |
+
},
|
181 |
+
"msa": {
|
182 |
+
"enable": True,
|
183 |
+
"enable_rna_msa": False,
|
184 |
+
"prot": {
|
185 |
+
"pairing_db": "uniref100",
|
186 |
+
"non_pairing_db": "mmseqs_other",
|
187 |
+
"pdb_mmseqs_dir": os.path.join(DATA_ROOT_DIR, "mmcif_msa"),
|
188 |
+
"seq_to_pdb_idx_path": os.path.join(DATA_ROOT_DIR, "seq_to_pdb_index.json"),
|
189 |
+
"indexing_method": "sequence",
|
190 |
+
},
|
191 |
+
"rna": {
|
192 |
+
"seq_to_pdb_idx_path": "",
|
193 |
+
"rna_msa_dir": "",
|
194 |
+
"indexing_method": "sequence",
|
195 |
+
},
|
196 |
+
"strategy": "random",
|
197 |
+
"merge_method": "dense_max",
|
198 |
+
"min_size": {
|
199 |
+
"train": 1,
|
200 |
+
"test": 2048,
|
201 |
+
},
|
202 |
+
"max_size": {
|
203 |
+
"train": 16384,
|
204 |
+
"test": 16384,
|
205 |
+
},
|
206 |
+
"sample_cutoff": {
|
207 |
+
"train": 2048,
|
208 |
+
"test": 2048,
|
209 |
+
},
|
210 |
+
},
|
211 |
+
"template": {
|
212 |
+
"enable": False,
|
213 |
+
},
|
214 |
+
"ccd_components_file": CCD_COMPONENTS_FILE_PATH,
|
215 |
+
"ccd_components_rdkit_mol_file": CCD_COMPONENTS_RDKIT_MOL_FILE_PATH,
|
216 |
+
}
|
configs/configs_inference.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2024 ByteDance and/or its affiliates.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
|
15 |
+
# pylint: disable=C0114
|
16 |
+
import os
|
17 |
+
|
18 |
+
from protenix.config.extend_types import ListValue, RequiredValue
|
19 |
+
|
20 |
+
code_directory = '/n/netscratch/mzitnik_lab/Lab/zzx/'
|
21 |
+
# The model will be download to the following dir if not exists:
|
22 |
+
# "./release_data/checkpoint/model_v0.2.0.pt"
|
23 |
+
inference_configs = {
|
24 |
+
"seeds": ListValue([101]),
|
25 |
+
"dump_dir": "./output",
|
26 |
+
"need_atom_confidence": True,
|
27 |
+
"sorted_by_ranking_score": True,
|
28 |
+
"input_json_path": RequiredValue(str),
|
29 |
+
"num_workers": 16,
|
30 |
+
"use_msa": True,
|
31 |
+
}
|
cutlass
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Subproject commit f7b19de32c5d1f3cedfc735c2849f12b537522ee
|
dataset/7pzb.pkl.gz
ADDED
Binary file (136 kB). View file
|
|
dataset/7pzb_unwatermarked.cif
ADDED
The diff for this file is too large to render.
See raw diff
|
|
dataset/output.csv
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"entity_1_id","chain_1_id","mol_1_type","cluster_1_id","entity_2_id","chain_2_id","mol_2_type","cluster_2_id","cluster_id","pdb_id","assembly_id","release_date","num_tokens","num_prot_chains","resolution","type","mol_type_group","sub_mol_1_type","sub_mol_2_type","eval_type"
|
2 |
+
"1","A","prot","NotInClusterTxt","","","","","NotInClusterTxt","7pzb","","9999-12-31",600,2,-1.0,"chain","intra_prot","prot","","intra_prot"
|
3 |
+
"1","B","prot","NotInClusterTxt","","","","","NotInClusterTxt","7pzb","","9999-12-31",600,2,-1.0,"chain","intra_prot","prot","","intra_prot"
|
4 |
+
"2","C","nuc","CTAGGTAACATTACTCGCG","","","","","CTAGGTAACATTACTCGCG","7pzb","","9999-12-31",600,2,-1.0,"chain","intra_nuc","dna","","intra_dna"
|
5 |
+
"2","D","nuc","CTAGGTAACATTACTCGCG","","","","","CTAGGTAACATTACTCGCG","7pzb","","9999-12-31",600,2,-1.0,"chain","intra_nuc","dna","","intra_dna"
|
6 |
+
"3","E","nuc","GCGAGTAATGTTAC","","","","","GCGAGTAATGTTAC","7pzb","","9999-12-31",600,2,-1.0,"chain","intra_nuc","dna","","intra_dna"
|
7 |
+
"3","F","nuc","GCGAGTAATGTTAC","","","","","GCGAGTAATGTTAC","7pzb","","9999-12-31",600,2,-1.0,"chain","intra_nuc","dna","","intra_dna"
|
8 |
+
"4","G","ligand","PCG","","","","","PCG","7pzb","","9999-12-31",600,2,-1.0,"chain","intra_ligand","non_bonded_ligand","","intra_ligand"
|
9 |
+
"4","H","ligand","PCG","","","","","PCG","7pzb","","9999-12-31",600,2,-1.0,"chain","intra_ligand","non_bonded_ligand","","intra_ligand"
|
10 |
+
"1","A","prot","NotInClusterTxt","1","B","prot","NotInClusterTxt","NotInClusterTxt:NotInClusterTxt","7pzb","","9999-12-31",600,2,-1.0,"interface","prot_prot","prot","prot","prot_prot"
|
11 |
+
"1","A","prot","NotInClusterTxt","2","C","nuc","CTAGGTAACATTACTCGCG","CTAGGTAACATTACTCGCG:NotInClusterTxt","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_prot","prot","dna","dna_prot"
|
12 |
+
"1","A","prot","NotInClusterTxt","3","E","nuc","GCGAGTAATGTTAC","GCGAGTAATGTTAC:NotInClusterTxt","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_prot","prot","dna","dna_prot"
|
13 |
+
"1","A","prot","NotInClusterTxt","4","G","ligand","PCG","NotInClusterTxt:PCG","7pzb","","9999-12-31",600,2,-1.0,"interface","ligand_prot","prot","non_bonded_ligand","ligand_prot"
|
14 |
+
"1","A","prot","NotInClusterTxt","4","H","ligand","PCG","NotInClusterTxt:PCG","7pzb","","9999-12-31",600,2,-1.0,"interface","ligand_prot","prot","non_bonded_ligand","ligand_prot"
|
15 |
+
"1","B","prot","NotInClusterTxt","2","D","nuc","CTAGGTAACATTACTCGCG","CTAGGTAACATTACTCGCG:NotInClusterTxt","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_prot","prot","dna","dna_prot"
|
16 |
+
"1","B","prot","NotInClusterTxt","3","F","nuc","GCGAGTAATGTTAC","GCGAGTAATGTTAC:NotInClusterTxt","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_prot","prot","dna","dna_prot"
|
17 |
+
"1","B","prot","NotInClusterTxt","4","G","ligand","PCG","NotInClusterTxt:PCG","7pzb","","9999-12-31",600,2,-1.0,"interface","ligand_prot","prot","non_bonded_ligand","ligand_prot"
|
18 |
+
"1","B","prot","NotInClusterTxt","4","H","ligand","PCG","NotInClusterTxt:PCG","7pzb","","9999-12-31",600,2,-1.0,"interface","ligand_prot","prot","non_bonded_ligand","ligand_prot"
|
19 |
+
"2","C","nuc","CTAGGTAACATTACTCGCG","2","D","nuc","CTAGGTAACATTACTCGCG","CTAGGTAACATTACTCGCG:CTAGGTAACATTACTCGCG","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_nuc","dna","dna","nuc_nuc"
|
20 |
+
"2","C","nuc","CTAGGTAACATTACTCGCG","3","E","nuc","GCGAGTAATGTTAC","CTAGGTAACATTACTCGCG:GCGAGTAATGTTAC","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_nuc","dna","dna","nuc_nuc"
|
21 |
+
"2","C","nuc","CTAGGTAACATTACTCGCG","3","F","nuc","GCGAGTAATGTTAC","CTAGGTAACATTACTCGCG:GCGAGTAATGTTAC","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_nuc","dna","dna","nuc_nuc"
|
22 |
+
"2","D","nuc","CTAGGTAACATTACTCGCG","3","E","nuc","GCGAGTAATGTTAC","CTAGGTAACATTACTCGCG:GCGAGTAATGTTAC","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_nuc","dna","dna","nuc_nuc"
|
23 |
+
"2","D","nuc","CTAGGTAACATTACTCGCG","3","F","nuc","GCGAGTAATGTTAC","CTAGGTAACATTACTCGCG:GCGAGTAATGTTAC","7pzb","","9999-12-31",600,2,-1.0,"interface","nuc_nuc","dna","dna","nuc_nuc"
|
docs/colabfold_compatiable_msa.md
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### Using Local Colabfold_search to Generate Protenix-Compatible MSA
|
2 |
+
|
3 |
+
Colabfold provides an easy-to-use and efficient MSA search pipeline that's ideal for generating MSAs during inference. Unfortunately, this pipeline cannot fully match Protenix's MSA search process designed for training, as the current `colabfold_search` omits species information in the MSA, preventing correct pairing by Protenix's data pipeline. To address this issue, we provide the `scripts/colabfold_msa.py` script, which post-processes `colabfold_search` results by adding pseudo taxonomy IDs to paired MSAs to match Protenix's data pipeline.
|
4 |
+
|
5 |
+
Here's an example:
|
6 |
+
```bash
|
7 |
+
python3 scripts/colabfold_msa.py examples/dimer.fasta <path/to/colabfold_db> dimer_colabfold_msa --db1 uniref30_2103_db --db3 colabfold_envdb_202108_db --mmseqs_path <path/to/mmseqs>
|
8 |
+
```
|
9 |
+
|
10 |
+
#### Configuring Colabfold_search
|
11 |
+
Installation of colabfold and mmseqs2 is required.
|
12 |
+
|
13 |
+
colabfold can be installed with: `pip install colabfold[alphafold]`.
|
14 |
+
|
15 |
+
Build MMseqs2 from source:
|
16 |
+
|
17 |
+
```bash
|
18 |
+
wget https://github.com/soedinglab/MMseqs2/archive/refs/tags/16-747c6.tar.gz
|
19 |
+
tar xzf 16-747c6.tar.gz
|
20 |
+
cd MMseqs2-16-747c6/
|
21 |
+
mkdir build && cd build
|
22 |
+
cmake -DCMAKE_BUILD_TYPE=RELEASE -DCMAKE_INSTALL_PREFIX=. ..
|
23 |
+
make -j8
|
24 |
+
make install
|
25 |
+
```
|
26 |
+
|
27 |
+
Download ColabFold database:
|
28 |
+
```bash
|
29 |
+
git clone https://github.com/sokrypton/ColabFold.git
|
30 |
+
cd ColabFold
|
31 |
+
# Configure database:
|
32 |
+
MMSEQS_NO_INDEX=1 ./setup_databases.sh <path/to/colabfold_db>
|
33 |
+
```
|
docs/docker_installation.md
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### Run with Docker
|
2 |
+
|
3 |
+
1. Install Docker (with GPU Support)
|
4 |
+
|
5 |
+
Ensure that Docker is installed and configured with GPU support. Follow these steps:
|
6 |
+
* Install [Docker](https://www.docker.com/) if not already installed.
|
7 |
+
* Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) to enable GPU support.
|
8 |
+
* Verify the setup with:
|
9 |
+
```bash
|
10 |
+
docker run --rm --gpus all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi
|
11 |
+
```
|
12 |
+
|
13 |
+
2. Pull the Docker image, which was built based on this [Dockerfile](../Dockerfile)
|
14 |
+
```bash
|
15 |
+
docker pull ai4s-cn-beijing.cr.volces.com/infra/protenix:v0.0.1
|
16 |
+
```
|
17 |
+
|
18 |
+
3. Clone this repository and `cd` into it
|
19 |
+
```bash
|
20 |
+
git clone https://github.com/bytedance/protenix.git
|
21 |
+
cd ./protenix
|
22 |
+
pip install -e .
|
23 |
+
```
|
24 |
+
|
25 |
+
4. Run Docker with an interactive shell
|
26 |
+
```bash
|
27 |
+
docker run --gpus all -it -v $(pwd):/workspace -v /dev/shm:/dev/shm ai4s-cn-beijing.cr.volces.com/infra/protenix:v0.0.1 /bin/bash
|
28 |
+
```
|
29 |
+
|
30 |
+
After running above commands, you’ll be inside the container’s environment and can execute commands as you would on a normal Linux terminal.
|
docs/infer_json_format.md
ADDED
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### Format of the input JSON file
|
2 |
+
The JSON file format closely resembles that used by the AlphaFold Server, with a few key differences:
|
3 |
+
|
4 |
+
1. There are no restrictions on the types of ligands, ions, and modifications, whereas the AlphaFold Server currently supports only a limited set of specific CCD codes.
|
5 |
+
2. Users can specify bonds between entities, such as covalent bonds between ligands and polymers.
|
6 |
+
3. It supports inputting ligands in the form of SMILES strings or molecular structure files.
|
7 |
+
4. Ligands composed of multiple CCD codes can be treated as a single entity. This feature is useful for representing glycans, for example, "NAG-NAG".
|
8 |
+
5. The "glycans" field is no longer supported. Glycans can be fully represented by inputting multiple ligands with defined bonding or by providing their SMILES strings.
|
9 |
+
|
10 |
+
Here is an overview of the JSON file format:
|
11 |
+
```json
|
12 |
+
[
|
13 |
+
{
|
14 |
+
"name": "Test Fold Job Number One",
|
15 |
+
"sequences": [...],
|
16 |
+
"covalent_bonds": [...]
|
17 |
+
}
|
18 |
+
]
|
19 |
+
```
|
20 |
+
The JSON file consists of a list of dictionaries, where each dictionary represents a set of sequences you want to model.
|
21 |
+
Even if you are modeling only one set of sequences, the top-level structure should still be a list.
|
22 |
+
|
23 |
+
Each dictionary contains the following three keys:
|
24 |
+
* `name`: A string representing the name of the inference job.
|
25 |
+
* `sequences`: A list of dictionaries that describe the entities (e.g., proteins, DNA, RNA, small molecules, and ions) involved in the inference.
|
26 |
+
* `covalent_bonds`: An optional list of dictionaries that define the covalent bonds between atoms from different entities.
|
27 |
+
|
28 |
+
Details of `sequences` and `covalent_bonds` are provided below.
|
29 |
+
|
30 |
+
#### sequences
|
31 |
+
There are 5 kinds of supported sequences:
|
32 |
+
* `proteinChain` – used for proteins
|
33 |
+
* `dnaSequence` – used for DNA (single strand)
|
34 |
+
* `rnaSequence` – used for RNA (single strand)
|
35 |
+
* `ligand` – used for ligands
|
36 |
+
* `ion` – used for ions
|
37 |
+
|
38 |
+
##### proteinChain
|
39 |
+
```json
|
40 |
+
{
|
41 |
+
"proteinChain": {
|
42 |
+
"sequence": "PREACHINGS",
|
43 |
+
"count": 1,
|
44 |
+
"modifications": [
|
45 |
+
{
|
46 |
+
"ptmType": "CCD_HY3",
|
47 |
+
"ptmPosition": 1,
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"ptmType": "CCD_P1L",
|
51 |
+
"ptmPosition": 5
|
52 |
+
}
|
53 |
+
],
|
54 |
+
"msa":{
|
55 |
+
"precomputed_msa_dir": "./precomputed_msa",
|
56 |
+
"pairing_db": "uniref100",
|
57 |
+
},
|
58 |
+
},
|
59 |
+
}
|
60 |
+
```
|
61 |
+
* `sequence`: A string representating a protein sequence, which can only contain the 20 standard amino acid type and X (UNK) for unknown residues.
|
62 |
+
* `count`: The number of copies of this protein chain (integer).
|
63 |
+
* `modifications`: An optional list of dictionaries that describe post-translational modifications.
|
64 |
+
|
65 |
+
* `ptmType`: A string containing CCD code of the modification.
|
66 |
+
* `ptmPosition`: The position of the modified amino acid (integer).
|
67 |
+
* `msa`: A dictionary containing options for Multiple Sequence Alignment (MSA). **If you want to search MSAs using our inference pipeline, you should not set this field or set it to an empty dictionary**:
|
68 |
+
* `precomputed_msa_dir`: The path to a directory containing precomputed MSAs. This directory should contain two specific files: "pairing.a3m" for MSAs used for pairing, and "non_pairing.a3m" for non-pairing MSAs.
|
69 |
+
* `pairing_db`: The name of the genomic database used for pairing MSAs. The default is "uniref100" and you should not change it. In fact, The MSA search against the UniRef30, a clustered version of the UniRef100.
|
70 |
+
|
71 |
+
##### dnaSequence
|
72 |
+
```json
|
73 |
+
{
|
74 |
+
"dnaSequence": {
|
75 |
+
"sequence": "GATTACA",
|
76 |
+
"modifications": [
|
77 |
+
{
|
78 |
+
"modificationType": "CCD_6OG",
|
79 |
+
"basePosition": 1
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"modificationType": "CCD_6MA",
|
83 |
+
"basePosition": 2
|
84 |
+
}
|
85 |
+
],
|
86 |
+
"count": 1
|
87 |
+
}
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"dnaSequence": {
|
91 |
+
"sequence": "TGTAATC",
|
92 |
+
"count": 1
|
93 |
+
}
|
94 |
+
}
|
95 |
+
```
|
96 |
+
Please note that the `dnaSequence` type refers to a single stranded DNA sequence. If you
|
97 |
+
wish to model double-stranded DNA, please add a second `dnaSequence` entry representing
|
98 |
+
the sequence of the reverse complement strand.
|
99 |
+
|
100 |
+
* `sequence`: A string containing a DNA sequence; only letters A, T, G, C and N (unknown ribonucleotide) are allowed.
|
101 |
+
* `count`: The number of copies of this DNA chain (integer).
|
102 |
+
* `modifications`: An optional list of dictionaries describing of
|
103 |
+
the DNA chemical modifications:
|
104 |
+
* `modificationType`: A string containing CCD code of modification.
|
105 |
+
* `basePosition`: A position of the modified nucleotide (integer).
|
106 |
+
|
107 |
+
##### rnaSequence
|
108 |
+
```json
|
109 |
+
{
|
110 |
+
"rnaSequence": {
|
111 |
+
"sequence": "GUAC",
|
112 |
+
"modifications": [
|
113 |
+
{
|
114 |
+
"modificationType": "CCD_2MG",
|
115 |
+
"basePosition": 1
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"modificationType": "CCD_5MC",
|
119 |
+
"basePosition": 4
|
120 |
+
}
|
121 |
+
],
|
122 |
+
"count": 1
|
123 |
+
}
|
124 |
+
}
|
125 |
+
```
|
126 |
+
* `sequence`: A string representing the RNA sequence (single-stranded); only letters A, U, G, C and N (unknown nucleotides) are allowed.
|
127 |
+
* `count`: The number of copies of this RNA chain (integer).
|
128 |
+
* `modifications`: An optional list of dictionaries describing RNA chemical modifications:
|
129 |
+
* `modificationType`: A string containing
|
130 |
+
CCD code of modification.
|
131 |
+
* `basePosition`: The position of the modified nucleotide (integer).
|
132 |
+
|
133 |
+
##### ligand
|
134 |
+
```json
|
135 |
+
{
|
136 |
+
"ligand": {
|
137 |
+
"ligand": "CCD_ATP",
|
138 |
+
"count": 1
|
139 |
+
}
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"ligand": {
|
143 |
+
"ligand": "FILE_your_file_path/atp.sdf",
|
144 |
+
"count": 1
|
145 |
+
}
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"ligand": {
|
149 |
+
"ligand": "Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O",
|
150 |
+
"count": 1
|
151 |
+
}
|
152 |
+
}
|
153 |
+
```
|
154 |
+
* `ligand`: A string representing the ligand. `ligand` can be one of the following three:
|
155 |
+
* A string containing the CCD code of the ligand, prefixed with "CCD_". For glycans or similar structures, this can be a concatenation of multiple CCD codes, for example, "CCD_NAG_BMA_BGC".
|
156 |
+
* A molecular SMILES string representing the ligand.
|
157 |
+
* A path to a molecular structure file, prefixed with "FILE_", where the supported file formats are PDB, SDF, MOL, and MOL2. The file must include the 3D conformation of the molecule.
|
158 |
+
|
159 |
+
* `count` is the number of copies of this ligand (integer).
|
160 |
+
|
161 |
+
##### ion
|
162 |
+
```json
|
163 |
+
{
|
164 |
+
"ion": {
|
165 |
+
"ion": "MG",
|
166 |
+
"count": 2
|
167 |
+
}
|
168 |
+
},
|
169 |
+
{
|
170 |
+
"ion": {
|
171 |
+
"ion": "NA",
|
172 |
+
"count": 3
|
173 |
+
}
|
174 |
+
}
|
175 |
+
```
|
176 |
+
* `ion`: A string containing the CCD code for the ion. Note that, unlike ligands, the ion code **does not** start with "CCD_".
|
177 |
+
* `count`: The number of copies of this ion (integer).
|
178 |
+
|
179 |
+
#### covalent_bonds
|
180 |
+
```json
|
181 |
+
"covalent_bonds": [
|
182 |
+
{
|
183 |
+
"entity1": "2",
|
184 |
+
"copy1": 1,
|
185 |
+
"position1": "2",
|
186 |
+
"atom1": "N6",
|
187 |
+
"entity2": "3",
|
188 |
+
"copy2": 1,
|
189 |
+
"position2": "1",
|
190 |
+
"atom2": "C1"
|
191 |
+
}
|
192 |
+
]
|
193 |
+
```
|
194 |
+
|
195 |
+
The `covalent_bonds` section specifies covalent bonds between a polymer and a ligand, or between two ligands.
|
196 |
+
To define a covalent bond, two atoms involved in the bond must be identified. The following fields are used:
|
197 |
+
|
198 |
+
* `entity1`, `entity2`: The entity numbers for the two atoms involved in the bond.
|
199 |
+
The entity number corresponds to the order in which the entity appears in the `sequences` list, starting from 1.
|
200 |
+
* `copy2`, `copy2`: The copy index (starting from 1) of the `left_entity` and `right_entity`, respectively. These fields are optional, but if specified, both `left_copy` and `right_copy` must be filled simultaneously or left empty at the same time. If neither field is provided, a bond will be created between all pairs of copies of the two entities. For example, if both entity1 and entity2 have two copies, a bond will be formed between entity1.copy1 and entity2.copy1, as well as between entity1.copy2 and entity2.copy2. In this case, the number of copies for both entities must be equal.
|
201 |
+
* `position1`, `position2` - The position of the residue (or ligand part) within the entity.
|
202 |
+
The position value starts at 1 and can vary based on the type of entity:
|
203 |
+
* For **polymers** (e.g., proteins, DNA, RNA), the position corresponds to the location of the residue in the sequence.
|
204 |
+
* For **ligands** composed of multiple CCD codes, the position refers to the serial number of the CCD code.
|
205 |
+
* For **single CCD code ligands**, or ligands defined by **SMILES** or **FILE**, the position is always set to 1.
|
206 |
+
|
207 |
+
* `atom1`, `atom2` - The atom names (or atom indices) of the atoms to be bonded.
|
208 |
+
* If the entity is a polymer or described by a CCD code, the atom names are consistent with those defined in the CCD.
|
209 |
+
* If the entity is a ligand defined by SMILES or a FILE, atoms can be specified by their atom index. The atom index corresponds to the position of the atom in the file or in the SMILES string, starting from 0.
|
210 |
+
|
211 |
+
Deprecation Notice: The previous fields such as old `left_entity`, `right_entity`, and other fields starting with `left`/`right` have been updated to use `1` and `2` to denote the two atoms forming a bond. The current code still supports the old field names, but they may be deprecated in the future, leaving only the new field names.
|
212 |
+
|
213 |
+
### Format of the model output
|
214 |
+
The outputs will be saved in the directory provided via the `--dump_dir` flag in the inference script. The outputs include the predicted structures in CIF format and the confidence in JSON files. The `--dump_dir` will have the following structure:
|
215 |
+
|
216 |
+
```bash
|
217 |
+
├── <name>/ # specified in the input JSON file
|
218 |
+
│ ├── <seed>/ # specified via the `--seeds` flag in the inference script
|
219 |
+
│ │ ├── <name>_<seed>_sample_0.cif
|
220 |
+
│ │ ├── <name>_<seed>_summary_confidence_sample_0.json
|
221 |
+
│ │ └──... # the number of samples in each seed is specified via `--sample_diffusion.N_sample ` flag in the inference script
|
222 |
+
│ └──...
|
223 |
+
└── ...
|
224 |
+
```
|
225 |
+
|
226 |
+
The contents of each output file are as follows:
|
227 |
+
- `<name>_<seed>_sample_*.cif` - A CIF format text file containing the predicted structure
|
228 |
+
- `<name>_<seed>_summary_confidence_sample_*.json` - A JSON format text file containing various confidence scores for assessing the reliability of predictions. Here’s a description of each score:
|
229 |
+
|
230 |
+
- `plddt` - Predicted Local Distance Difference Test (pLDDT) score. Higher values indicate greater confidence.
|
231 |
+
- `gpde` - Globl Predicted Distance Error (PDE) score. Lower values indicate greater confidence.
|
232 |
+
- `ptm` - Predicted TM-score (pTM). Values closer to 1 indicate greater confidence.
|
233 |
+
- `iptm` - Interface Predicted TM-score, used to estimate the accuracy of interfaces between chains. Values closer to 1 indicate greater confidence.
|
234 |
+
- `chain_ptm` - pTM score calculated for individual chains with the shape of [N_chains], indicating the reliability of specific chain structure.
|
235 |
+
- `chain_pair_iptm`: Pairwise interface pTM scores between chain pairs with the shape of [N_chains, N_chains], indicating the reliability of specific chain-chain interactions.
|
236 |
+
- `chain_iptm` - Average ipTM scores for each chain with the shape of [N_chains].
|
237 |
+
- `chain_pair_iptm_global` - Averge `chain_iptm` between chain pairs with the shape of [N_chains, N_chains]. For interface containing a small molecule, ion, or bonded ligand chain (named `C*`), this value is equal to the `chain_iptm` value of `C*`.
|
238 |
+
- `chain_plddt` - pLDDT scores calculated for individual chains with the shape of [N_chains].
|
239 |
+
- `chain_pair_plddt` - Pairwise pLDDT scores for chain pairs with the shape of [N_chains, N_chains].
|
240 |
+
- `has_clash` - Boolean flag indicating if there are steric clashes in the predicted structure.
|
241 |
+
- `disorder` - Predicted regions of intrinsic disorder within the protein, highlighting residues that may be flexible or unstructured.
|
242 |
+
- `ranking_score` - Predicted confidence score for ranking complexes. Higher values indicate greater confidence.
|
243 |
+
- `num_recycles`: Number of recycling steps used during inference.
|
docs/kernels.md
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### Setting up kernels
|
2 |
+
|
3 |
+
- **Custom CUDA layernorm kernels** modified from [FastFold](https://github.com/hpcaitech/FastFold) and [Oneflow](https://github.com/Oneflow-Inc/oneflow) accelerate about 30%-50% during different training stages. To use this feature, run the following command:
|
4 |
+
```bash
|
5 |
+
export LAYERNORM_TYPE=fast_layernorm
|
6 |
+
```
|
7 |
+
If the environment variable `LAYERNORM_TYPE` is set to `fast_layernorm`, the model will employ the layernorm we have developed; otherwise, the naive PyTorch layernorm will be adopted. The kernels will be compiled when `fast_layernorm` is called for the first time.
|
8 |
+
- **[DeepSpeed DS4Sci_EvoformerAttention kernel](https://www.deepspeed.ai/tutorials/ds4sci_evoformerattention/)** is a memory-efficient attention kernel developed as part of a collaboration between OpenFold and the DeepSpeed4Science initiative. To use this feature, run the following command:
|
9 |
+
```bash
|
10 |
+
export USE_DEEPSPEED_EVO_ATTTENTION=true
|
11 |
+
```
|
12 |
+
DS4Sci_EvoformerAttention is implemented based on [CUTLASS](https://github.com/NVIDIA/cutlass). If you use this feature, You need to clone the CUTLASS repository and specify the path to it in the environment variable CUTLASS_PATH. The [Dockerfile](Dockerfile) has already include this setting:
|
13 |
+
```bash
|
14 |
+
RUN git clone -b v3.5.1 https://github.com/NVIDIA/cutlass.git /opt/cutlass
|
15 |
+
ENV CUTLASS_PATH=/opt/cutlass
|
16 |
+
```
|
17 |
+
If you set up `Protenix` by `pip`, you can set environment variable `CUTLASS_PATH` as follows:
|
18 |
+
|
19 |
+
```bash
|
20 |
+
git clone -b v3.5.1 https://github.com/NVIDIA/cutlass.git /path/to/cutlass
|
21 |
+
export CUTLASS_PATH=/path/to/cutlass
|
22 |
+
```
|
23 |
+
|
24 |
+
The kernels will be compiled when DS4Sci_EvoformerAttention is called for the first time.
|
docs/model_performance.md
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### Training
|
2 |
+
Some settings follow those in the [AlphaFold 3](https://www.nature.com/articles/s41586-024-07487-w) paper, The table below shows the training settings for different fine-tuning stages:
|
3 |
+
|
4 |
+
| Arguments | Initial training | Fine tuning 1 | Fine tuning 2 | Fine tuning 3 |
|
5 |
+
|-----------------------------------------|--------|---------|-------|-----|
|
6 |
+
| `train_crop_size` | 384 | 640 | 768 | 768 |
|
7 |
+
| `diffusion_batch_size` | 48 | 32 | 32 | 32 |
|
8 |
+
| `loss.weight.alpha_pae` | 0 | 0 | 0 | 1.0 |
|
9 |
+
| `loss.weight.alpha_bond` | 0 | 1.0 | 1.0 | 0 |
|
10 |
+
| `loss.weight.smooth_lddt` | 1.0 | 0 | 0 | 0 |
|
11 |
+
| `loss.weight.alpha_confidence` | 1e-4 | 1e-4 | 1e-4 | 1e-4|
|
12 |
+
| `loss.weight.alpha_diffusion` | 4.0 | 4.0 | 4.0 | 0 |
|
13 |
+
| `loss.weight.alpha_distogram` | 0.03 | 0.03 | 0.03 | 0 |
|
14 |
+
| `train_confidence_only` | False | False | False | True|
|
15 |
+
| full BF16-mixed speed(A100, s/step) | ~12 | ~30 | ~44 | ~13 |
|
16 |
+
| full BF16-mixed peak memory (G) | ~34 | ~35 | ~48 | ~24 |
|
17 |
+
|
18 |
+
We recommend carrying out the training on A100-80G or H20/H100 GPUs. If utilizing full BF16-Mixed precision training, the initial training stage can also be performed on A800-40G GPUs. GPUs with smaller memory, such as A30, you'll need to reduce the model size, such as decreasing `model.pairformer.nblocks` and `diffusion_batch_size`.
|
19 |
+
|
20 |
+
### Inference
|
21 |
+
|
22 |
+
The model will be infered in BF16 Mixed precision, by **default**, the `SampleDiffusion`,`ConfidenceHead` part will still be infered in FP32 precision.
|
23 |
+
|
24 |
+
Below are reference examples of cuda memory usage (G).
|
25 |
+
|
26 |
+
| Ntoken | Natom | Default | Full BF16 Mixed |
|
27 |
+
|--------|-------|-------|------------------|
|
28 |
+
| 500 | 10000 | 5.6 | 5.1 |
|
29 |
+
| 1500 | 30000 | 24.8 | 19.2 |
|
30 |
+
| 2500 | 25000 | 52.2 | 34.8 |
|
31 |
+
| 3500 | 35000 | 67.6 | 38.2 |
|
32 |
+
| 4500 | 45000 | 77.0 | 59.2 |
|
33 |
+
| 5000 | 50000 | OOM | 72.8 |
|
34 |
+
|
35 |
+
The script in [runner/inference.py](../runner/inference.py) will automatically change the default precision to compute `SampleDiffusion`,`ConfidenceHead` to avoid OOM as follows:
|
36 |
+
```python
|
37 |
+
def update_inference_configs(configs: Any, N_token: int):
|
38 |
+
# Setting the default inference configs for different N_token and N_atom
|
39 |
+
# when N_token is larger than 3000, the default config might OOM even on a
|
40 |
+
# A100 80G GPUS,
|
41 |
+
if N_token > 3840:
|
42 |
+
configs.skip_amp.confidence_head = False
|
43 |
+
configs.skip_amp.sample_diffusion = False
|
44 |
+
elif N_token > 2560:
|
45 |
+
configs.skip_amp.confidence_head = False
|
46 |
+
configs.skip_amp.sample_diffusion = True
|
47 |
+
else:
|
48 |
+
configs.skip_amp.confidence_head = True
|
49 |
+
configs.skip_amp.sample_diffusion = True
|
50 |
+
return configs
|
51 |
+
```
|
docs/msa_pipeline.md
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## MSA data pipeline
|
2 |
+
If you download our released wwPDB dataset as in [training.md](./training.md), the mmcif_msa [450G] dir has the following directory structure.
|
3 |
+
```bash
|
4 |
+
├── seq_to_pdb_index.json [45M] # sequence to integers mapping file
|
5 |
+
├── mmcif_msa [450G] # msa files
|
6 |
+
├── 0
|
7 |
+
├── uniref100_hits.a3m
|
8 |
+
├── mmseqs_other_hits.a3m
|
9 |
+
├── 1
|
10 |
+
├── uniref100_hits.a3m
|
11 |
+
├── mmseqs_other_hits.a3m
|
12 |
+
├── 2
|
13 |
+
├── uniref100_hits.a3m
|
14 |
+
├── mmseqs_other_hits.a3m
|
15 |
+
...
|
16 |
+
├── 157201
|
17 |
+
├── uniref100_hits.a3m
|
18 |
+
├── mmseqs_other_hits.a3m
|
19 |
+
|
20 |
+
```
|
21 |
+
|
22 |
+
Each integer in the first-level directory under mmcif_msa (for example, 0, 1, 2, and 157201) represents a unique protein sequence. The key of `seq_to_pdb_index.json` is the unique protein sequence, and the value is the integer corresponding to the first-level subdirectory of mmcif_msa mentioned above.
|
23 |
+
|
24 |
+
This document is used to provide the steps to convert the MSA obtained from colabfold into the Protenix training format.
|
25 |
+
|
26 |
+
### Steps to get your own MSA data for training
|
27 |
+
|
28 |
+
#### Step1: get input protein sequence
|
29 |
+
Run the following command:
|
30 |
+
|
31 |
+
```python
|
32 |
+
python3 scripts/msa/step1-get_prot_seq.py
|
33 |
+
```
|
34 |
+
you will get outputs in `scripts/msa/data/pdb_seqs` dir. The result dir is as follows,
|
35 |
+
|
36 |
+
```bash
|
37 |
+
├── pdb_index_to_seq.json # mapping integers to sequences
|
38 |
+
├── seq_to_pdb_index.json # mapping sequences to integers identifiers when saving MSA, This file is required in training for finding local MSA path from sequence
|
39 |
+
├── pdb_seq.fasta # Input of MSA
|
40 |
+
├── pdb_seq.csv # Intermediate Files
|
41 |
+
├── seq_to_pdb_id_entity_id.json # Intermediate Files
|
42 |
+
```
|
43 |
+
|
44 |
+
#### Step2: run msa search
|
45 |
+
We give detailed environment configuration and search commands in
|
46 |
+
|
47 |
+
```python
|
48 |
+
scripts/msa/step2-get_msa.ipynb
|
49 |
+
```
|
50 |
+
|
51 |
+
The searched MSA is in `scripts/msa/data/mmcif_msa_initial`, The result dir is as follows,
|
52 |
+
```bash
|
53 |
+
├── 0.a3m
|
54 |
+
├── 1.a3m
|
55 |
+
├── 2.a3m
|
56 |
+
├── 3.a3m
|
57 |
+
├── pdb70_220313_db.m8
|
58 |
+
├── uniref_tax.m8 # record Taxonomy ID which is used by MSA Pairing
|
59 |
+
```
|
60 |
+
#### Steps3: MSA Post-Processing
|
61 |
+
|
62 |
+
The overall solution is to search the MSA containing taxonomy information only once for the unique sequence, and pair it according to the species information of each MSA.
|
63 |
+
|
64 |
+
For MSA Post-Processing, Taxonomy ID from UniRef30 DB is added to MSAs and MSAs is split into `uniref100_hits.a3m` and `mmseqs_other_hits.a3m`, which correspond to `pairing.a3m` and `non_pairing.a3m` in inference stage respectively.
|
65 |
+
|
66 |
+
You can run:
|
67 |
+
```python
|
68 |
+
python3 scripts/msa/step3-uniref_add_taxid.py
|
69 |
+
|
70 |
+
python3 scripts/msa/step4-split_msa_to_uniref_and_others.py
|
71 |
+
```
|
72 |
+
|
73 |
+
The final pairing and non_pairing MSAs in `scripts/msa/data/mmcif_msa` is as follows:
|
74 |
+
|
75 |
+
|
76 |
+
```
|
77 |
+
>query
|
78 |
+
GPTHRFVQKVEEMVQNHMTYSLQDVGGDANWQLVVEEGEMKVYRREVEENGIVLDPLKATHAVKGVTGHEVCNYFWNVDVRNDWETTIENFHVVETLADNAIIIYQTHKRVWPASQRDVLYLSVIRKIPALTENDPETWIVCNFSVDHDSAPLNNRCVRAKINVAMICQTLVSPPEGNQEISRDNILCKITYVANVNPGGWAPASVLRAVAKREYPKFLKRFTSYVQEKTAGKPILF
|
79 |
+
>UniRef100_A0A0S7JZT1_188132/ 246 0.897 6.614E-70 2 236 237 97 331 332
|
80 |
+
--THRFADKVEEMVQNHMTYSLQDVGGDANWQLVIEEGEMKVYRREVEENGIVLDPLKATHAVKGVTGHEVCHYFWDTDVRNDWETTIDNFNVVETLSDNAIIVYQTHKRVWPASQRDILFLSAIRKILAKNENDPDTWLVCNFSVDHDKAPPTNRCVRAKINVAMICQTLVSPPEGDKEISRDNILCKITYVANVNPGGWAPASVLRAVAKREYPKFLKRFTSYVQEKTAGNPILF
|
81 |
+
>UniRef100_A0A4W6GBN4_8187/ 246 0.893 9.059E-70 2 236 237 373 607 608
|
82 |
+
--THRFANKVEEMVQNHMTYSLQDVGGDANWQLVIEEGEMKVYRREVEENGIVLDPLKATHSVKGVTGHEVCHYFWDTDVRMDWETTIENFNVVEKLSENAIIVYQTHKRVWPASQRDVLYLSAIRKIMATNENDPDTWLVCNFSVDHNNAPPTNRCVRAKINVAMICQTLVSPPEGDKEISRDNILCKITYVANVNPGGWAPASVLRAVAKREYPKFLKRFTSYVQEKTAGKPILF
|
83 |
+
```
|
84 |
+
|
85 |
+
```
|
86 |
+
>query
|
87 |
+
MAEVIRSSAFWRSFPIFEEFDSETLCELSGIASYRKWSAGTVIFQRGDQGDYMIVVVSGRIKLSLFTPQGRELMLRQHEAGALFGEMALLDGQPRSADATAVTAAEGYVIGKKDFLALITQRPKTAEAVIRFLCAQLRDTTDRLETIALYDLNARVARFFLATLRQIHGSEMPQSANLRLTLSQTDIASILGASRPKVNRAILSLEESGAIKRADGIICCNVGRLLSIADPEEDLEHHHHHHHH
|
88 |
+
>MGYP001165762451 218 0.325 1.019E-59 5 230 244 3 228 230
|
89 |
+
-----DKVEFLKGVPLFSELPEAHLQSLGELLIERSYRRGATIFFEGDPGDALYIVRSGIVKISRVAEDGREKTLAFLGKGEPFGEMALIDGGPRSAIAQALEATSLYALHRADFLAALTENPALSLGVIKVLSARLQQANAQLMDLVFRDVRGRVAQALLDLARR-HGVPLTNGRMISVKLTHQEIANLVGTARETVSRTFAELQDSGIIRIeGRNIVLLDAAQLEGYAAG-------------
|
90 |
+
>A0A160T8V6 218 0.285 1.019E-59 0 227 244 0 229 237
|
91 |
+
MPTTRDsnAVQALQVVPFFANLPEDHVAALAKALVPRRFSPGQVIFHLGDPGGLLYLISRGKIKISHTTSDGQEVVLAILGPGDFFGEMALIDDAPRSATAITLEPSETWTLHREEFIQYLTDNPEFALHVLKTLARHIRRLNTQLADIFFLDLPGRLARTLLNLADQ-YGRRAADGTIIDLSLTQTDLAEMTGATRVSINKALGRFRRAGWIQvTGRQVTVLDRAALEAL----------------
|
92 |
+
>AP58_3_1055460.scaffolds.fasta_scaffold1119545_2 216 0.304 3.581E-59 10 225 244 5 221 226
|
93 |
+
----------LSRVPLFAELPPERIHELAQSVRRRTYHRGETIFHKGDPGNGLYIIAAGQVKIVLPSEMGEEAMLAVLEGGEFFGELALFDGLPRSATVVAVQNAEVLVLHRDDFMSFVGRNPEVVSALFAALSRRLRDADEMIEDAIFLDVPGRLAKRLLDLAEKHGRAEEKGGVAIDLKLTQQDLAAMVGATRESVNKHLGWMRDHGLIQLDRqRIVILKPDDLR------------------
|
94 |
+
```
|
95 |
+
### Format of MSA
|
96 |
+
In `uniref100_hits.a3m`(training stage) or `pairing.a3m`(inference stage), the header must starts with the following format, which we use for pairing:
|
97 |
+
```
|
98 |
+
>UniRef100_{hitname}_{taxonomyid}/
|
99 |
+
```
|
100 |
+
|
101 |
+
we also provide a pipeline of local Colabfold_search to Generate Protenix-Compatible MSAs in [colabfold_compatiable_msa.md](./colabfold_compatiable_msa.md).
|
docs/prepare_training_data.md
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Start with CIF files and prepare your own training data.
|
2 |
+
|
3 |
+
## Data Preparation
|
4 |
+
|
5 |
+
1. **Prepare CIF Files**: Place the CIF files you want to convert into training data in a folder. Alternatively, you can use a `txt` file to record the paths to these CIF files, with each line corresponding to the path of a specific CIF file.
|
6 |
+
|
7 |
+
2. **Prepare Protein Clustering File (Optional)**: The protein clustering file contains category information for each `[PDB ID]_[Entity ID]`. In the Protenix training data, we cluster protein sequences using a 40% sequence identity threshold.
|
8 |
+
|
9 |
+
You can download the official clustering results file provided by RCSB PDB using the following command, and use it directly:
|
10 |
+
```bash
|
11 |
+
wget https://cdn.rcsb.org/resources/sequence/clusters/clusters-by-entity-40.txt
|
12 |
+
```
|
13 |
+
|
14 |
+
If you prefer to perform your own clustering of protein sequences, ensure the final results are formatted as a text file like this:
|
15 |
+
Each line represents a cluster, containing `[PDB ID]_[Entity ID]` entries separated by spaces.
|
16 |
+
|
17 |
+
3. **Update the CCD (Chemical Component Dictionary) Cache File (If needed)**: We provide a pre-processed file, with a cutoff date of 2024-06-08, that records the reference conformers for each CCD Code. If the training data you're preparing is more recent than this date, there may be issues with some CCD Codes might be missing. For example, the CCD Code "WTB," appearing in the PDB ID: 8P3K released on 2024-11-20, is not defined in the previously provided CCD file. In such cases, you need to run the following script to download and update the CCD CIF files:
|
18 |
+
|
19 |
+
```bash
|
20 |
+
python3 scripts/gen_ccd_cache.py -c [ccd_cache_dir] -n [num_cpu]
|
21 |
+
```
|
22 |
+
|
23 |
+
After running the script, three files will be generated in the specified "ccd_cache_dir":
|
24 |
+
|
25 |
+
- `components.cif` (CCD CIF file downloaded from RCSB)
|
26 |
+
- `components.cif.rdkit_mol.pkl` (pre-processed dictionary, where the key is the CCD Code and the value is an RDKit Mol object with 3D structure)
|
27 |
+
- `components.txt` (a list containing all the CCD Codes)
|
28 |
+
|
29 |
+
When running Protenix, it first uses
|
30 |
+
```bash
|
31 |
+
`release_data/ccd_cache/components.cif`
|
32 |
+
`release_data/ccd_cache/components.cif.rdkit_mol.pkl`
|
33 |
+
```
|
34 |
+
if unavailable, it switches to
|
35 |
+
```bash
|
36 |
+
`release_data/ccd_cache/components.v20240608.cif`
|
37 |
+
`release_data/ccd_cache/components.v20240608.cif.rdkit_mol.pkl`
|
38 |
+
```
|
39 |
+
Notes:
|
40 |
+
- The `-c` parameter is optional. If not specified, files will be saved in the "release_data/ccd_cache" folder within the Protenix code directory by default.
|
41 |
+
- You can add the `-d` parameter when running the script to skip the CIF file download step, in which case the script will directly process the "components.cif" file located in the "ccd_cache_dir".
|
42 |
+
|
43 |
+
## Data Preprocessing
|
44 |
+
Execute the script to preprocess the data:
|
45 |
+
```bash
|
46 |
+
python3 scripts/prepare_training_data.py -i [input_path] -o [output_csv] -b [output_dir] -c [cluster_txt] -n [num_cpu]
|
47 |
+
```
|
48 |
+
|
49 |
+
The preprocessed structures will be saved as `.pkl.gz` files. Additionally, a `CSV` file will be generated to catalog the chains and interfaces within these structures, which will facilitate sampling during the training process.
|
50 |
+
|
51 |
+
You can view the explanation of the parameters by using the `--help` command.
|
52 |
+
```
|
53 |
+
python3 scripts/prepare_training_data.py --help
|
54 |
+
```
|
55 |
+
|
56 |
+
Note that there is an optional parameter `-d` in the script. When this parameter is not used, the script processes CIF files downloaded from RCSB PDB by applying the full set of WeightedPDB training data filters. These filters include:
|
57 |
+
|
58 |
+
- Removing water molecules
|
59 |
+
- Removing hydrogen atoms
|
60 |
+
- Deleting polymer chains composed entirely of unknown residues
|
61 |
+
- Eliminating chains where the Cα distance between adjacent numbered residues exceeds 10 angstroms
|
62 |
+
- Removing elements labeled as "X"
|
63 |
+
- Deleting chains where no residues have been resolved
|
64 |
+
- When the number of chains exceeds 20, selecting one central atom from those capable of forming interfaces and retaining the 20 nearest chains to it. If a ligand is covalently bonded to a polymer, it is considered as one chain together. Additionally, if the number of chains is greater than 20 but the total number of tokens in these chains is less than 5120, more chains will be retained until the 5120 token limit is reached.
|
65 |
+
- Removing chains with one-third of their heavy atoms colliding
|
66 |
+
|
67 |
+
For CIF files generated through model inference where these filtering steps aren't desired, you can run the script with the `-d` parameter, which disables all these filters. The CIF structure will not be expanded to Assembly 1 in this case.
|
68 |
+
|
69 |
+
|
70 |
+
## Output Format
|
71 |
+
### Bioassembly Dict
|
72 |
+
In the folder specified by the `-b` parameter of the data preprocessing script, a corresponding `[pdb_id].pkl.gz` file is generated for each successfully processed CIF file. This file contains a dictionary saved with `pickle.dump`, with the following contents:
|
73 |
+
```
|
74 |
+
| Key | Value Type | Description |
|
75 |
+
|----------------------------|---------------|-------------------------------------------------------------------------------|
|
76 |
+
| pdb_id | str | PDB Code |
|
77 |
+
| assembly_id | str | Assembly ID |
|
78 |
+
| sequences | dict[str, str]| Key is polymer's label_entity_id, value is canonical_sequence |
|
79 |
+
| release_date | str | PDB's Release Date |
|
80 |
+
| num_assembly_polymer_chains| int | Number of assembly polymer chains (pdbx_struct_assembly.oligomeric_count) |
|
81 |
+
| num_prot_chains | int | Number of protein chains in AtomArray |
|
82 |
+
| entity_poly_type | dict[str, str]| Key is polymer's label_entity_id, value is corresponding to entity_poly.type |
|
83 |
+
| resolution | float | Resolution; if no resolution, value is -1 |
|
84 |
+
| num_tokens | int | Number of tokens |
|
85 |
+
| atom_array | AtomArray | AtomArray from structure processing |
|
86 |
+
| token_array | TokenArray | TokenArray generated based on AtomArray |
|
87 |
+
| msa_features | None | (Placeholder) |
|
88 |
+
| template_features | None | (Placeholder) |
|
89 |
+
```
|
90 |
+
|
91 |
+
### Indices CSV
|
92 |
+
After the script successfully completes, a CSV file will be generated in the directory specified by `-o`.
|
93 |
+
Each row contains information about a pre-processed chain or interface, and the content of each column is described as follows:
|
94 |
+
```
|
95 |
+
| Column Name | Value Type | Meaning | Required |
|
96 |
+
|----------------|------------|------------------------------------------------------------------------|----------|
|
97 |
+
| type | str | "chain" or "interface" | Y |
|
98 |
+
| pdb_id | str | PDB Code (entry.id) | Y |
|
99 |
+
| cluster_id | str | Cluster_id of the chain/interface | Y |
|
100 |
+
| assembly_id | str | Assembly id | N |
|
101 |
+
| release_date | str | Release date | N |
|
102 |
+
| resolution | float | Resolution; if no resolution, value is -1 | N |
|
103 |
+
| num_tokens | int | Number of tokens in AtomArray of Bioassembly Dict | N |
|
104 |
+
| num_prot_chains| int | Number of protein chains in AtomArray of Bioassembly Dict | N |
|
105 |
+
| eval_type | str | Classification used for evaluation | N |
|
106 |
+
| entity_1_id | str | Chain 1's label_entity_id | Y |
|
107 |
+
| chain_1_id | str | Chain 1's chain ID | Y |
|
108 |
+
| mol_1_type | str | Chain 1's corresponding mol_type ("protein", "nuc", "ligand", "ions") | Y |
|
109 |
+
| sub_mol_1_type | str | Sub-classification of Chain 1's entity corresponding to mol_type | N |
|
110 |
+
| cluster_1_id | str | Chain 1's cluster ID | Y |
|
111 |
+
| entity_2_id | str | Chain 2's label_entity_id | Y |
|
112 |
+
| chain_2_id | str | Chain 2's chain ID | Y |
|
113 |
+
| mol_2_type | str | Chain 2's corresponding mol_type ("protein", "nuc", "ligand", "ions") | Y |
|
114 |
+
| sub_mol_2_type | str | Sub-classification of Chain 2's entity corresponding to mol_type | N |
|
115 |
+
| cluster_2_id | str | Chain 2's cluster_id | Y |
|
116 |
+
```
|
117 |
+
Notes:
|
118 |
+
- In the table, columns marked with 'Y' under 'Required' indicate that these columns are essential for training. If you are creating your own CSV for training purposes, these columns must be included. Columns marked with 'N' are optional and can be excluded.
|
119 |
+
- For rows where the "type" is "chain", the values in columns related to Chain 2 should all be filled with empty strings.
|
docs/training.md
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### Preparing the datasets
|
2 |
+
To download the [wwPDB dataset](https://www.wwpdb.org/) and proprecessed training data, you need at least 1T disk space.
|
3 |
+
|
4 |
+
Use the following command to download the preprocessed wwpdb training databases:
|
5 |
+
|
6 |
+
```bash
|
7 |
+
wget -P /af3-dev/release_data/ https://af3-dev.tos-cn-beijing.volces.com/release_data.tar.gz
|
8 |
+
tar -xzvf /af3-dev/release_data/release_data.tar.gz -C /af3-dev/release_data/
|
9 |
+
rm /af3-dev/release_data/release_data.tar.gz
|
10 |
+
```
|
11 |
+
|
12 |
+
|
13 |
+
The data should be placed in the `/af3-dev/release_data/` directory. You can also download it to a different directory, but remember to modify the `DATA_ROOT_DIR` in [configs/configs_data.py](../configs/configs_data.py) correspondingly. Data hierarchy after extraction is as follows:
|
14 |
+
|
15 |
+
```bash
|
16 |
+
├── components.v20240608.cif [408M] # ccd source file
|
17 |
+
├── components.v20240608.cif.rdkit_mol.pkl [121M] # rdkit Mol object generated by ccd source file
|
18 |
+
├── indices [33M] # chain or interface entries
|
19 |
+
├── mmcif [283G] # raw mmcif data
|
20 |
+
├── mmcif_bioassembly [36G] # preprocessed wwPDB structural data
|
21 |
+
├── mmcif_msa [450G] # msa files
|
22 |
+
├── posebusters_bioassembly [42M] # preprocessed posebusters structural data
|
23 |
+
├── posebusters_mmcif [361M] # raw mmcif data
|
24 |
+
├── recentPDB_bioassembly [1.5G] # preprocessed recentPDB structural data
|
25 |
+
└── seq_to_pdb_index.json [45M] # sequence to pdb id mapping file
|
26 |
+
```
|
27 |
+
|
28 |
+
Data processing scripts have also been released. you can refer to [prepare_training_data.md](./prepare_training_data.md) for generating `{dataset}_bioassembly` and `indices`. And you can refer to [msa_pipeline.md](./msa_pipeline.md) for pipelines to get `mmcif_msa` and `seq_to_pdb_index.json`.
|
29 |
+
|
30 |
+
### Training demo
|
31 |
+
After the installation and data preparations, you can run the following command to train the model from scratch:
|
32 |
+
|
33 |
+
```bash
|
34 |
+
bash train_demo.sh
|
35 |
+
```
|
36 |
+
Key arguments in this scripts are explained as follows:
|
37 |
+
* `dtype`: data type used in training. Valid options include `"bf16"` and `"fp32"`.
|
38 |
+
* `--dtype fp32`: the model will be trained in full FP32 precision.
|
39 |
+
* `--dtype bf16`: the model will be trained in BF16 Mixed precision, by default, the `SampleDiffusion`,`ConfidenceHead`, `Mini-rollout` and `Loss` part will still be training in FP32 precision. if you want to train and infer the model in full BF16 Mixed precision, pass the following arguments to the [train_demo.sh](../train_demo.sh):
|
40 |
+
```bash
|
41 |
+
--skip_amp.sample_diffusion_training false \
|
42 |
+
--skip_amp.confidence_head false \
|
43 |
+
--skip_amp.sample_diffusion false \
|
44 |
+
--skip_amp.loss false \
|
45 |
+
```
|
46 |
+
* `ema_decay`: the decay rate of the EMA, default is 0.999.
|
47 |
+
* `sample_diffusion.N_step`: during evalutaion, the number of steps for the diffusion process is reduced to 20 to improve efficiency.
|
48 |
+
|
49 |
+
* `data.train_sets/data.test_sets`: the datasets used for training and evaluation. If there are multiple datasets, separate them with commas.
|
50 |
+
* Some settings follow those in the [AlphaFold 3](https://www.nature.com/articles/s41586-024-07487-w) paper, The table in [model_performance.md](../docs/model_performance.md) shows the training settings and memory usages for different training stages.
|
51 |
+
* In this version, we do not use the template and RNA MSA feature for training. As the default settings in [configs/configs_base.py](../configs/configs_base.py) and [configs/configs_data.py](../configs/configs_data.py):
|
52 |
+
```bash
|
53 |
+
--model.template_embedder.n_blocks 0 \
|
54 |
+
--data.msa.enable_rna_msa false \
|
55 |
+
```
|
56 |
+
This will be considered in our future work.
|
57 |
+
|
58 |
+
* The model also supports distributed training with PyTorch’s [`torchrun`](https://pytorch.org/docs/stable/elastic/run.html). For example, if you’re running distributed training on a single node with 4 GPUs, you can use:
|
59 |
+
```bash
|
60 |
+
torchrun --nproc_per_node=4 runner/train.py
|
61 |
+
```
|
62 |
+
You can also pass other arguments with `--<ARGS_KEY> <ARGS_VALUE>` as you want.
|
63 |
+
|
64 |
+
|
65 |
+
If you want to speed up training, see [<u> setting up kernels documentation </u>](./kernels.md).
|
66 |
+
|
67 |
+
### Finetune demo
|
68 |
+
|
69 |
+
If you want to fine-tune the model on a specific subset, such as an antibody dataset, you only need to provide a PDB list file and load the pretrained weights as [finetune_demo.sh](../finetune_demo.sh) shows:
|
70 |
+
|
71 |
+
```bash
|
72 |
+
# wget -P /af3-dev/release_model/ https://af3-dev.tos-cn-beijing.volces.com/release_model/model_v0.2.0.pt
|
73 |
+
checkpoint_path="/af3-dev/release_model/model_v0.2.0.pt"
|
74 |
+
...
|
75 |
+
|
76 |
+
--load_checkpoint_path ${checkpoint_path} \
|
77 |
+
--load_checkpoint_ema_path ${checkpoint_path} \
|
78 |
+
--data.weightedPDB_before2109_wopb_nometalc_0925.base_info.pdb_list examples/subset.txt \
|
79 |
+
```
|
80 |
+
|
81 |
+
, where the `subset.txt` is a file containing the PDB IDs like:
|
82 |
+
```bash
|
83 |
+
6hvq
|
84 |
+
5mqc
|
85 |
+
5zin
|
86 |
+
3ew0
|
87 |
+
5akv
|
88 |
+
```
|
examples/7dc6.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
examples/7dc6_watermarked.pdb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
examples/7pzb/msa/1/non_pairing.a3m
ADDED
The diff for this file is too large to render.
See raw diff
|
|
examples/7pzb/msa/1/pairing.a3m
ADDED
The diff for this file is too large to render.
See raw diff
|
|
examples/7pzb_need_search_msa/msa_resmsa_seq_0/0.a3m
ADDED
The diff for this file is too large to render.
See raw diff
|
|
examples/7pzb_need_search_msa/msa_resmsa_seq_0/0/non_pairing.a3m
ADDED
The diff for this file is too large to render.
See raw diff
|
|
examples/7pzb_need_search_msa/msa_resmsa_seq_0/0/pairing.a3m
ADDED
The diff for this file is too large to render.
See raw diff
|
|
examples/7pzb_need_search_msa/msa_resmsa_seq_0/msa.sh
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
MMSEQS="$1"
|
3 |
+
QUERY="$2"
|
4 |
+
BASE="$4"
|
5 |
+
DB1="$5"
|
6 |
+
DB2="$6"
|
7 |
+
DB3="$7"
|
8 |
+
USE_ENV="$8"
|
9 |
+
USE_TEMPLATES="$9"
|
10 |
+
FILTER="${10}"
|
11 |
+
TAXONOMY="${11}"
|
12 |
+
M8OUT="${12}"
|
13 |
+
DATAPATH="${13}"
|
14 |
+
EXPAND_EVAL=inf
|
15 |
+
ALIGN_EVAL=10
|
16 |
+
DIFF=3000
|
17 |
+
QSC=-20.0
|
18 |
+
MAX_ACCEPT=1000000
|
19 |
+
if [ "${FILTER}" = "1" ]; then
|
20 |
+
# 0.1 was not used in benchmarks due to POSIX shell bug in line above
|
21 |
+
# EXPAND_EVAL=0.1
|
22 |
+
ALIGN_EVAL=10
|
23 |
+
QSC=0.8
|
24 |
+
MAX_ACCEPT=100000
|
25 |
+
fi
|
26 |
+
export MMSEQS_CALL_DEPTH=1
|
27 |
+
SEARCH_PARAM="--num-iterations 3 --db-load-mode 2 -a --k-score 'seq:96,prof:80' -e 0.1 --max-seqs 10000"
|
28 |
+
FILTER_PARAM="--filter-min-enable 1000 --diff ${DIFF} --qid 0.0,0.2,0.4,0.6,0.8,1.0 --qsc 0 --max-seq-id 0.95"
|
29 |
+
EXPAND_PARAM="--expansion-mode 0 -e ${EXPAND_EVAL} --expand-filter-clusters ${FILTER} --max-seq-id 0.95"
|
30 |
+
mkdir -p "${BASE}"
|
31 |
+
colabfold_search --db1 uniref30_2103_db --db2 pdb70_220313_db --db3 colabfold_envdb_202108_db "${QUERY}" "${DATAPATH}" "${BASE}" --mmseqs "${MMSEQS}" --use-templates 1
|
32 |
+
|
33 |
+
wait
|
Protenix_new.zip → examples/7pzb_need_search_msa/msa_resmsa_seq_0/out.tar.gz
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0f796ed53bd887c84e8a76a8f4ff70de4886dbbe9f8b4d5197e54a67209b00d
|
3 |
+
size 1903397
|
examples/7pzb_need_search_msa/msa_resmsa_seq_0/pdb70_220313_db.m8
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
query_0 3H3U_A 0.265 230 161 3 1 229 1 223 1.823E-50 180 4M6I151M1I51M1D16M
|
2 |
+
query_0 4A2U_H 0.265 230 161 3 1 229 4 226 1.823E-50 180 4M6I151M1I51M1D16M
|
3 |
+
query_0 3R6S_A 0.265 230 164 4 1 229 21 246 3.058E-49 177 1M1I4M2I153M1I51M1D16M
|
4 |
+
query_0 4CYD_A 0.268 220 159 2 11 229 6 224 4.183E-49 176 151M1I51M1D16M
|
5 |
+
query_0 4EV0_D 0.292 219 146 2 12 229 2 212 6.863E-45 164 155M8I44M1D11M
|
6 |
+
query_0 4R8H_B 0.245 200 147 3 21 219 20 216 3.444E-39 147 70M1D85M2I35M1I6M
|
7 |
+
query_0 1O3Q_A 0.246 199 146 3 22 219 1 196 4.706E-39 147 69M1D85M2I35M1I6M
|
8 |
+
query_0 6DT4_A 0.245 200 147 3 21 219 11 207 6.430E-39 147 70M1D85M2I35M1I6M
|
9 |
+
query_0 3KCC_A 0.240 200 148 3 21 219 58 254 4.181E-38 144 70M1D85M2I35M1I6M
|
10 |
+
query_0 3FX3_B 0.237 236 170 4 1 233 4 232 7.803E-38 143 2M1D7M2I158M5I55M2D4M
|
11 |
+
query_0 2OZ6_A 0.256 199 142 2 25 219 6 202 1.066E-37 143 68M4D83M2I42M
|
12 |
+
query_0 3DKW_E 0.220 218 164 3 11 226 11 224 5.069E-37 141 84M1D75M4I40M1D13M
|
13 |
+
query_0 2XHK_B 0.268 212 146 4 17 225 12 217 6.136E-36 138 10M5I63M2D67M1I51M1D12M
|
14 |
+
query_0 2XHK_A 0.268 212 146 4 17 225 12 217 6.136E-36 138 10M5I63M2D67M1I51M1D12M
|
15 |
+
query_0 3LA7_B 0.241 195 144 3 36 227 47 240 4.801E-34 132 57M2D69M1I51M1D14M
|
16 |
+
query_0 3E97_A 0.205 219 170 3 11 228 8 223 3.105E-33 130 138M1I27M2I35M1D15M
|
17 |
+
query_0 3IWZ_D 0.233 206 149 4 20 219 22 224 9.491E-32 126 71M1D32M5D54M2I33M1I7M
|
18 |
+
query_0 2PQQ_A 0.331 151 95 1 1 151 3 147 4.486E-31 124 4M6I141M
|
19 |
+
query_0 2ZCW_A 0.250 200 137 5 33 229 6 195 1.139E-30 123 18M2D40M1I22M7I51M2I45M1D11M
|
20 |
+
query_0 5CVR_A 0.201 218 168 5 14 229 39 252 5.374E-30 121 3M2I18M1D49M1I87M1I39M1D16M
|
21 |
+
query_0 5E44_A 0.201 218 168 5 14 229 39 252 5.374E-30 121 3M2I18M1D49M1I87M1I39M1D16M
|
22 |
+
query_0 5W5B_A 0.200 230 166 5 5 229 19 235 2.534E-29 119 10M1I4M4D44M1I97M11I46M1D11M
|
23 |
+
query_0 5W5A_B 0.200 230 166 5 5 229 30 246 2.534E-29 119 10M1I4M4D44M1I97M11I46M1D11M
|
24 |
+
query_0 4I2O_A 0.216 208 152 4 32 237 40 238 2.218E-28 116 57M3I76M5I41M2D15M1I8M
|
25 |
+
query_0 2GAU_A 0.190 220 173 4 11 229 14 229 3.024E-28 115 4M2I145M1I9M1I46M1D11M
|
26 |
+
query_0 3B02_A 0.221 203 146 5 35 236 2 193 1.422E-27 113 54M1I26M6I54M2I36M1D16M2I5M
|
27 |
+
query_0 1O5L_A 0.155 199 158 3 24 220 14 204 6.681E-27 111 72M1D69M8I41M1D7M
|
28 |
+
query_0 2FMY_D 0.181 220 169 4 11 228 6 216 9.102E-27 111 59M1I15M7I69M1I51M2D15M
|
29 |
+
query_0 1FT9_A 0.215 218 157 8 20 234 12 218 5.816E-26 109 5M1I20M1D4M1I19M1I15M7I70M1I50M1D3M1D18M
|
30 |
+
query_0 3DV8_A 0.188 218 166 4 14 228 8 217 2.001E-25 107 69M2D81M1I7M7I38M1D12M
|
31 |
+
query_0 4MUV_A 0.290 141 87 2 2 136 2 135 2.777E-23 101 12M6D56M7I60M
|
32 |
+
query_0 6EO1_B 0.311 138 82 3 5 136 218 348 9.509E-23 99 9M6D51M2I6M5I59M
|
33 |
+
query_0 3BEH_D 0.311 138 82 3 5 136 218 348 9.509E-23 99 9M6D51M2I6M5I59M
|
34 |
+
query_0 7T8O_B 0.160 231 170 11 1 221 4 220 8.176E-22 96 1M2I5M2I23M2D13M1I27M1D18M1D22M5D8M1I37M7I8M1I37M1D8M
|
35 |
+
query_0 4D7S_B 0.190 131 103 1 11 141 71 198 1.112E-21 96 61M3I67M
|
36 |
+
query_0 4D7T_A 0.190 131 103 1 11 141 71 198 1.112E-21 96 61M3I67M
|
37 |
+
query_0 3SHR_B 0.269 115 83 1 6 119 154 268 5.156E-21 94 60M1D54M
|
38 |
+
query_0 2Z69_A 0.241 141 106 1 11 150 14 154 7.006E-21 94 84M1D56M
|
39 |
+
query_0 2Z69_B 0.241 141 106 1 11 150 14 154 7.006E-21 94 84M1D56M
|
40 |
+
query_0 6CJU_D 0.198 126 98 1 11 136 315 437 7.006E-21 94 61M3I62M
|
41 |
+
query_0 6CJT_D 0.198 126 98 1 11 136 315 437 7.006E-21 94 61M3I62M
|
42 |
+
query_0 5L0N_A 0.269 115 83 1 6 119 10 124 1.293E-20 93 60M1D54M
|
43 |
+
query_0 5J48_A 0.269 115 83 1 6 119 10 124 1.293E-20 93 60M1D54M
|
44 |
+
query_0 4KU8_A 0.269 115 83 1 6 119 10 124 1.293E-20 93 60M1D54M
|
45 |
+
query_0 4QX5_A 0.269 115 83 1 6 119 10 124 1.293E-20 93 60M1D54M
|
46 |
+
query_0 4Z07_E 0.269 115 83 1 6 119 137 251 1.293E-20 93 60M1D54M
|
47 |
+
query_0 4Z07_A 0.269 115 83 1 6 119 137 251 1.293E-20 93 60M1D54M
|
48 |
+
query_0 4OLL_A 0.213 187 136 5 2 187 9 185 8.124E-20 91 9M1D67M1I41M1I24M6I10M2I25M
|
49 |
+
query_0 4ONU_A 0.213 187 136 5 2 187 9 185 8.124E-20 91 9M1D67M1I41M1I24M6I10M2I25M
|
50 |
+
query_0 3GYD_B 0.232 146 105 2 3 144 36 178 2.034E-19 89 6M3I117M4D16M
|
51 |
+
query_0 3J4Q_C 0.198 146 103 4 9 147 252 390 2.034E-19 89 55M5D7M2D50M2I1M5I19M
|
52 |
+
query_0 4ORF_A 0.208 187 137 5 2 187 9 185 3.748E-19 89 9M1D67M1I41M1I24M6I10M2I25M
|
53 |
+
query_0 4DIN_B 0.173 138 108 3 1 132 236 373 3.748E-19 89 7M4D56M1D62M1D7M
|
54 |
+
query_0 5KJZ_A 0.184 130 101 2 1 125 5 134 5.087E-19 88 7M4D56M1D62M
|
55 |
+
query_0 5J3U_A 0.220 181 121 7 11 189 16 178 5.087E-19 88 57M1I51M1I7M3I3M3I19M2D9M7I7M3I8M
|
56 |
+
query_0 2QVS_B 0.191 146 104 4 9 147 160 298 9.370E-19 87 55M5D7M2D50M2I1M5I19M
|
57 |
+
query_0 6BYR_B 0.176 130 102 2 1 125 234 363 1.271E-18 87 7M4D56M1D62M
|
58 |
+
query_0 6NO7_D 0.176 130 102 2 1 125 235 364 1.271E-18 87 7M4D56M1D62M
|
59 |
+
query_0 5JIX_A 0.194 144 114 2 2 143 6 149 2.341E-18 86 67M1D35M1D40M
|
60 |
+
query_0 5JIZ_A 0.194 144 114 2 2 143 6 149 2.341E-18 86 67M1D35M1D40M
|
61 |
+
query_0 6RSX_A 0.169 142 117 1 1 142 153 293 2.341E-18 86 67M1I74M
|
62 |
+
query_0 5V4S_D 0.252 119 86 2 11 129 340 455 2.341E-18 86 53M2I11M1I52M
|
63 |
+
query_0 5D1I_B 0.280 125 88 2 1 125 9 131 3.175E-18 86 2M1I68M1I53M
|
64 |
+
query_0 5D1I_A 0.280 125 88 2 1 125 9 131 3.175E-18 86 2M1I68M1I53M
|
65 |
+
query_0 2QCS_B 0.169 130 103 2 1 125 145 274 5.842E-18 85 7M4D56M1D62M
|
66 |
+
query_0 7NP4_D 0.209 148 111 3 13 159 593 735 5.842E-18 85 51M2I11M3I68M1D12M
|
67 |
+
query_0 2MPF_A 0.206 131 99 2 13 143 27 152 7.924E-18 85 51M2I7M3I68M
|
68 |
+
query_0 3BPZ_C 0.206 131 99 2 13 143 76 201 7.924E-18 85 51M2I7M3I68M
|
69 |
+
query_0 5KHI_A 0.206 131 99 2 13 143 75 200 7.924E-18 85 51M2I7M3I68M
|
70 |
+
query_0 5JON_A 0.206 131 99 2 13 143 391 516 7.924E-18 85 51M2I7M3I68M
|
71 |
+
query_0 5JON_B 0.206 131 99 2 13 143 391 516 7.924E-18 85 51M2I7M3I68M
|
72 |
+
query_0 6GYO_D 0.213 131 98 2 13 143 394 519 1.075E-17 84 51M2I11M3I64M
|
73 |
+
query_0 7O4H_D 0.183 196 149 6 9 196 1069 1261 1.457E-17 84 55M1I11M1I14M3D34M1I32M2D27M3D12M
|
74 |
+
query_0 4AVB_B 0.197 177 132 3 11 187 15 181 1.976E-17 83 61M1I47M1I20M8I39M
|
75 |
+
query_0 4AVA_A 0.197 177 132 3 11 187 15 181 1.976E-17 83 61M1I47M1I20M8I39M
|
76 |
+
query_0 5DZC_A 0.221 140 97 5 7 145 403 531 2.678E-17 83 59M3I4M2I27M1D18M1I3M5I17M
|
77 |
+
query_0 4RZ7_A 0.221 140 97 5 7 145 403 531 2.678E-17 83 59M3I4M2I27M1D18M1I3M5I17M
|
78 |
+
query_0 5U6O_D 0.193 129 99 2 15 143 474 597 3.630E-17 83 54M2I2M3I68M
|
79 |
+
query_0 6UQF_D 0.193 129 99 2 15 143 474 597 3.630E-17 83 54M2I2M3I68M
|
80 |
+
query_0 7RHL_B 0.160 187 145 6 13 190 518 701 4.921E-17 82 51M1I11M1I14M3D34M1I21M3D7M3D37M
|
81 |
+
query_0 2MNG_A 0.230 117 85 2 13 129 16 127 6.669E-17 82 51M2I11M3I50M
|
82 |
+
query_0 6FTF_B 0.204 127 95 2 1 123 146 270 6.669E-17 82 7M4D113M2I1M
|
83 |
+
query_0 3OF1_A 0.220 150 107 4 11 160 9 148 9.038E-17 81 59M5I48M1I4M3I21M1I8M
|
84 |
+
query_0 6WEL_D 0.217 170 117 7 2 160 477 641 2.248E-16 80 9M2D54M2I9M1I14M6D34M1I23M3D6M1I5M
|
85 |
+
query_0 4OFG_A 0.214 140 98 5 7 145 11 139 4.124E-16 79 59M3I4M2I27M1D18M1I3M5I17M
|
86 |
+
query_0 4OFF_A 0.214 140 98 5 7 145 11 139 4.124E-16 79 59M3I4M2I27M1D18M1I3M5I17M
|
87 |
+
query_0 5KBF_B 0.187 160 121 2 3 160 36 188 4.124E-16 79 127M7I19M2D5M
|
88 |
+
query_0 6HQ2_B 0.305 108 74 1 32 139 2 108 7.562E-16 79 34M1I73M
|
89 |
+
query_0 6HQ3_A 0.305 108 74 1 32 139 2 108 7.562E-16 79 34M1I73M
|
90 |
+
query_0 6HQ7_A 0.316 101 68 1 39 139 7 106 1.024E-15 78 27M1I73M
|
91 |
+
query_0 3PNA_A 0.218 119 85 3 1 119 33 143 1.876E-15 78 9M2I3M1I57M5I42M
|
92 |
+
query_0 3PVB_B 0.218 119 85 3 1 119 40 150 1.876E-15 78 9M2I3M1I57M5I42M
|
93 |
+
query_0 7RHS_A 0.216 125 89 3 11 129 481 602 1.876E-15 78 54M2I9M1I14M6D39M
|
94 |
+
query_0 3TNP_B 0.193 145 104 3 9 147 267 404 3.437E-15 77 64M6D48M2I1M5I19M
|
95 |
+
query_0 5K8S_B 0.221 113 81 3 11 123 14 119 4.651E-15 76 55M3I3M2I47M2I1M
|
96 |
+
query_0 1CX4_A 0.184 146 104 4 9 147 156 293 6.292E-15 76 57M1I6M7D48M2I1M5I19M
|
97 |
+
query_0 6WJF_C 0.184 146 104 4 9 147 267 404 6.292E-15 76 57M1I6M7D48M2I1M5I19M
|
98 |
+
query_0 7LFT_B 0.216 120 85 3 11 124 344 460 6.292E-15 76 54M2I9M1I14M6D34M
|
99 |
+
query_0 7RHL_A 0.216 120 85 3 11 124 344 460 6.292E-15 76 54M2I9M1I14M6D34M
|
100 |
+
query_0 7O4H_B 0.216 120 85 3 11 124 472 588 8.512E-15 76 54M2I9M1I14M6D34M
|
101 |
+
query_0 5VA1_A 0.154 188 142 6 9 187 495 674 1.151E-14 75 57M5I16M1D6M1D41M2D8M5D14M3I29M
|
102 |
+
query_0 7RHS_D 0.164 194 143 7 9 196 549 729 1.557E-14 75 55M1I11M1I14M3D41M4I20M7I17M2D4M1D13M
|
103 |
+
query_0 5H5O_A 0.285 126 85 2 20 144 6 127 2.106E-14 74 46M4I21M1D54M
|
104 |
+
query_0 5H5O_B 0.285 126 85 2 20 144 6 127 2.106E-14 74 46M4I21M1D54M
|
105 |
+
query_0 7CAL_C 0.136 205 173 3 9 213 370 570 2.847E-14 74 59M1I87M2I52M1I3M
|
106 |
+
query_0 7T4X_A 0.166 150 124 1 9 158 365 513 3.850E-14 74 59M1I90M
|
107 |
+
query_0 7T4X_B 0.166 150 124 1 9 158 365 513 3.850E-14 74 59M1I90M
|
108 |
+
query_0 6SYG_A 0.157 127 100 3 9 133 6 127 9.505E-14 72 57M5I16M1D6M1D41M
|
109 |
+
query_0 2N7G_A 0.157 127 100 3 9 133 25 146 9.505E-14 72 57M5I16M1D6M1D41M
|
110 |
+
query_0 3MDP_A 0.165 127 103 2 11 134 8 134 1.284E-13 72 53M1D8M2D63M
|
111 |
+
query_0 1O7F_A 0.237 122 90 2 11 130 44 164 1.735E-13 72 50M2D26M1I43M
|
112 |
+
query_0 4F7Z_A 0.237 122 90 2 11 130 44 164 1.735E-13 72 50M2D26M1I43M
|
113 |
+
query_0 4L11_A 0.168 125 97 2 11 133 74 193 2.344E-13 71 55M5I22M2D41M
|
114 |
+
query_0 3IDB_B 0.214 107 83 1 16 122 45 150 4.276E-13 70 50M1I56M
|
115 |
+
query_0 3IDC_B 0.214 107 83 1 16 122 51 156 4.276E-13 70 50M1I56M
|
116 |
+
query_0 6V1X_B 0.138 130 111 1 9 138 370 498 4.276E-13 70 59M1I70M
|
117 |
+
query_0 3OGJ_A 0.179 117 89 4 3 119 19 128 5.774E-13 70 3M1I7M1I51M3I2M2I47M
|
118 |
+
query_0 3OCP_A 0.179 117 89 4 3 119 19 128 5.774E-13 70 3M1I7M1I51M3I2M2I47M
|
119 |
+
query_0 4LLO_A 0.161 155 116 5 11 158 29 176 5.774E-13 70 58M5I14M1I4M3D43M4D16M1I6M
|
120 |
+
query_0 6PBY_A 0.161 155 116 5 11 158 538 685 5.774E-13 70 58M5I14M1I4M3D43M4D16M1I6M
|
121 |
+
query_0 5K7L_A 0.161 155 116 5 11 158 549 696 5.774E-13 70 58M5I14M1I4M3D43M4D16M1I6M
|
122 |
+
query_0 5C8W_B 0.189 111 85 1 11 121 29 134 7.795E-13 70 55M5I51M
|
123 |
+
query_0 5C8W_F 0.189 111 85 1 11 121 29 134 7.795E-13 70 55M5I51M
|
124 |
+
query_0 5E16_A 0.198 111 84 1 11 121 34 139 1.917E-12 68 59M5I47M
|
125 |
+
query_0 2MHF_A 0.212 127 93 2 13 137 7 128 3.489E-12 68 53M5I23M2D44M
|
126 |
+
query_0 3UKN_B 0.212 127 93 2 13 137 79 200 3.489E-12 68 53M5I23M2D44M
|
127 |
+
query_0 3DN7_A 0.177 192 142 6 11 201 12 188 1.154E-11 66 4M1I2M2I67M1D69M9I13M1I3M2I18M
|
128 |
+
query_0 3DN7_B 0.177 192 142 6 11 201 12 188 1.154E-11 66 4M1I2M2I67M1D69M9I13M1I3M2I18M
|
129 |
+
query_0 4MGZ_E 0.196 107 80 3 17 121 40 142 2.097E-11 65 23M1D30M4I31M1D17M
|
130 |
+
query_0 3CF6_E 0.196 107 80 3 17 121 40 142 2.097E-11 65 23M1D30M4I31M1D17M
|
131 |
+
query_0 6H7E_B 0.226 106 76 4 18 121 157 258 1.687E-10 63 20M1D26M1I6M3I31M1D17M
|
132 |
+
query_0 1WGP_A 0.208 115 79 5 12 115 9 122 5.897E-09 58 51M1I9M2D15M3D6M5D4M1D18M
|
133 |
+
query_0 2D93_A 0.201 119 88 4 3 119 10 123 1.064E-08 57 35M1D27M2I5M3I27M1D18M
|
134 |
+
query_0 6M63_A 0.201 144 95 5 6 136 244 380 6.213E-08 55 14M1D8M2I6M3D14M5I16M9D66M
|
135 |
+
query_0 2FBH_A 0.178 123 78 6 118 232 3 110 3.733E-02 36 21M1D13M5I5M1I11M9I31M5D3M2D16M
|
examples/7pzb_need_search_msa/msa_resmsa_seq_0/tmp_5561987135da4188987956d9f05d1af2.fasta
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
>query_0
|
2 |
+
MAEVIRSSAFWRSFPIFEEFDSETLCELSGIASYRKWSAGTVIFQRGDQGDYMIVVVSGRIKLSLFTPQGRELMLRQHEAGALFGEMALLDGQPRSADATAVTAAEGYVIGKKDFLALITQRPKTAEAVIRFLCAQLRDTTDRLETIALYDLNARVARFFLATLRQIHGSEMPQSANLRLTLSQTDIASILGASRPKVNRAILSLEESGAIKRADGIICCNVGRLLSIADPEEDLEHHHHHHHH
|
examples/7pzb_need_search_msa/msa_resmsa_seq_0/uniref_tax.m8
ADDED
The diff for this file is too large to render.
See raw diff
|
|
examples/7pzb_unwatermarked.cif
ADDED
The diff for this file is too large to render.
See raw diff
|
|
examples/7r6r/msa/1/non_pairing.a3m
ADDED
The diff for this file is too large to render.
See raw diff
|
|
examples/7r6r/msa/1/pairing.a3m
ADDED
The diff for this file is too large to render.
See raw diff
|
|
examples/7r6r_watermarked.cif
ADDED
The diff for this file is too large to render.
See raw diff
|
|
examples/7wux/msa/1/non_pairing.a3m
ADDED
The diff for this file is too large to render.
See raw diff
|
|
examples/7wux/msa/1/pairing.a3m
ADDED
@@ -0,0 +1,544 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
>query
|
2 |
+
MASWSHPQFEKGGTHVAETSAPTRSEPDTRVLTLPGTASAPEFRLIDIDGLLNNRATTDVRDLGSGRLNAWGNSFPAAELPAPGSLITVAGIPFTWANAHARGDNIRCEGQVVDIPPGQYDWIYLLAASERRSEDTIWAHYDDGHADPLRVGISDFLDGTPAFGELSAFRTSRMHYPHHVQEGLPTTMWLTRVGMPRHGVARSLRLPRSVAMHVFALTLRTAAAVRLAEGATT
|
3 |
+
>UniRef100_UPI000B1A43EC_1463901/ 204 0.926 4.008E-55 15 232 233 0 217 218
|
4 |
+
---------------MAETSVRTEGRPDTRVLRLPGAASAPEFRLIDIDGLLNNRATTDVRDLGSGRLNAWGNSFPAAELPAPGSLIEVAGIPFTWANAHATGDNVRCEGQVVDIPPGRYDWIYLLAASERRSEDTIWAHYDDGHADPLRVGISDFLDGTPAFGELSAFRTSRMHYPHHVQEGLPTTMWLTRVGMPRHGVAHSLRLPRSVAMHVFALTLRTTADVRLAQGATT
|
5 |
+
>UniRef100_UPI001BE85A7B_2819193/ 186 0.830 5.494E-49 19 230 233 4 215 218
|
6 |
+
-------------------SVLTAAKPGPRVHRLPGTASAPEFCLIGIDDLLNNRATTSVSDLDSGRLNAWGNSFPAEELPTPGALIEVAGIPFTWANAHAKGDNVRCEGQVIDIPPGQYDWIYLLAASERRSEDTIWAHYEDGHADPLRVGVSDFLDGTPAFGELTAFRTSRMHYPHHVQERLPTTMWLTRVGMPRRGIAQSLRLPRLVAMHVFALTLVTGIDVRRAQGA--
|
7 |
+
>UniRef100_UPI0006899DE6_66377/ 178 0.726 2.919E-46 15 230 233 3 218 221
|
8 |
+
---------------ISSNTIRTHTPSDRLLDRLHGKASAPEFCLISIDDLLNNRATTSTEDLDSGRLNAWGNSFPAEELPAPGARIEIAGIPFAWARSHAEGDNVRCEGQVIEIPPGQYDWIYLLAASERRSENTLWAHYADGTADPLRLGVSDFLDGTPAFGELPAFSTERMHYPHHVQQGLPTTMWLSRVGMPRRGRATALRLPRCVALHVFALTLLTGIDVRLADGA--
|
9 |
+
>UniRef100_UPI0012DD7F77_35754/ 178 0.705 2.919E-46 15 231 233 3 219 237
|
10 |
+
---------------VSDRPVLVPGAYDRLVQRLPGTTAQPDFCLIGIDDLLNNRATTGTGDLDAGRLNAWGNSFPSRELPEPGAVIEIAGIPFVWPDANPDGDNVRSEGQVIDIPPGRYDWIYLLAASERRSEDTLWVHYDDGHADPLRVGVSDFLDGTPAFGELPAFRTARMHYPHHVQERLPTTMWLARVGMPRRGRAQALRLPRLVALHVFAITLVTGIDVRLADGAD-
|
11 |
+
>UniRef100_A0A510HIW1_49319/ 171 0.295 8.250E-44 29 219 233 889 1080 1084
|
12 |
+
-----------------------------RVVEARCAAEDRGFCAADLRHSLNSDGVSTDANPGDGDFDGLGFSYPAEELPVPG-PFESGGVLYWFTETsDGAPNNIEARGQTVPLVPERYAAAHILGAAHHGAVETaATVTYADGSTERLQLSLSDWAQETPQFGEEVAVRTTHRHQEGAGDVGPPVAIFAFSLELDPSREVRFLTLPAEVRLHLFAITLR-------------
|
13 |
+
>UniRef100_A0A5N0E9G2_2545717/ 168 0.765 1.013E-42 27 222 233 11 206 217
|
14 |
+
---------------------------DRLVRRLPAGPARPEFCLIDLDDLLNNRATTGTADLDQGRLNAWGNSFPAEELPQPGTQIDVAGIPFVWANAHAHGDNVRCEGQLIDLPPGQYDWIYLLAASERRSEDTLWAYYDDGHADPLPVGISDFLDGTPAFGELSAFRTTRMHYPHHVQHGLPTTVWLTRVGLPRRGNAHAIRFPRLVAMHIFALTLLTGS----------
|
15 |
+
>UniRef100_Q1ASW4_266117/ 167 0.295 2.592E-42 29 219 233 69 260 264
|
16 |
+
-----------------------------RVVEARCAAEDRGFCAAGLGHSLNSDGVSTDANPNDGDFDGLGFSYPAEELPAPG-PFESGGVLYWYPETsDGAPNNIEARGQTVPLVPGRYAAAHILGAAHHGAVETaATVTYADGSTGRLQLRLSDWAREAPQFGEEVAVRTTHRHQEGAGDVGPPVAIFAVRLELDPSREARFLTLPEEARLHLFAITLR-------------
|
17 |
+
>UniRef100_A0A4R7V1K6_502181/ 167 0.406 2.592E-42 28 217 233 100 290 300
|
18 |
+
----------------------------TLVPW-PSDDTSGNAVPVDLSAHLNCVGIEPKERPGRGAFNIWHNTFPIEELPRPGSITVVGGVSFRFPAADGtRPDNLRCRGQRIELPSSRVDWLHLLAAAERRTEDVVTVHYADGTTRPQWLRVSDFWPETPSrFGELPAFRTSAMLYPNHVDSRMPPVIWHQRVPVAVRDGVVAVTLPDNPAVHVFAMT---------------
|
19 |
+
>UniRef100_UPI000B27D092_285514/ 166 0.425 3.546E-42 31 224 233 1 195 204
|
20 |
+
-------------------------------TAIPGpPTEESPYDPVDLTPCFNNTGISPASDTGRGSFNVWGNSFPAESLPSGRAPVTVDGVPFRFPPVGRGNDNVRCAGQFVPVETGRFDWLHLLAAAERRTEDTMAMHFEDGTVDPEWIRVSDFWAAPARFGETKAFETPVMHYPHHVQQGVSALLWAQRVPVTRRADLKGFRLPRNVALHIFAATLQQTAVV--------
|
21 |
+
>UniRef100_A0A920BQ67_571911/ 166 0.423 6.637E-42 42 223 233 3 185 190
|
22 |
+
------------------------------------------PLAVDLDPYADNTGITTADRKDAGGFNIWGNTYPADQLPAAG-PVRVDDIPFRFPPaAPGRPDNVRCAGQLIEVPAGRYDWIQLLAAAERRTEDPLWLHYADGGVDPEWLRVSDfWPETAAHFGETAAYRCTHLHYPRHVDRKFGPSIWRQRVPVPRDAALRAVRLPDNPAIHIFAMTLVPAAG---------
|
23 |
+
>UniRef100_UPI000AF72EB5_718014/ 165 0.748 1.242E-41 29 231 233 17 219 221
|
24 |
+
-----------------------------PVRSVRATTSSPEFLLVGIDDLLNNRAITGAADLGDGRLNAWGNSFPAEELPAPGMLVEVAGIPFQWANAHTEGDNVRCEGQIIDIPPARYDWIYLLAASERRSEDTLWAYYDDGYADPLRVGVSDFLDGTPVFGELPGFRTTRMHYPHHVQHGLPTTMWLSRVGMPRHGRALALRLPRSVALHVFAVTLLTGIDVRRADGTT-
|
25 |
+
>UniRef100_A0A939I1U9_2815937/ 164 0.273 1.699E-41 6 220 233 502 723 724
|
26 |
+
------PQGAPGTTVTAVTATASYRAQSTAVDTVSGEQTITQVVPYPsLEAAFNNVGATSESDTTPGNFDGGGDSYSTQALaragATPGAAISANGLGFTWPSADaGRADNVSAAGQEITF-GGQGQTLGFLGAEAGSVSGTVTVTYTDGTTSTGQLGFPNWcCTPTDAFGAKTAFTMDHRNTPTGPANfGISYGVFTNTVPLTPGKTIRSVRLPDAPAIHVFALTVQP------------
|
27 |
+
>UniRef100_UPI001F407CE4_1463833/ 163 0.462 4.348E-41 28 227 233 5 205 217
|
28 |
+
----------------------------TPPGTATPPASAPRYRVVELAAHRNNRAATRVHTTGAGGFNVWRNSFPVEHLPPGGSEVEVGGVPFRFPPVGEGDDNVRCDGQFVEVPAGRYDWIHLLAAAERRTEDTVELHYADGSVDSEWLRVSDFWSAPAWFGELPALRTPVMHYPYHVQPGLSAHLWAQRVPVPRRTALAGLRLPRNIAVHLFAATVqEPPGAAGLP-----
|
29 |
+
>UniRef100_UPI000E241227_68246/ 163 0.451 5.947E-41 45 228 233 12 196 197
|
30 |
+
---------------------------------------------LDLTAFANNTGITTEDRLSDGAFNIWGNTFPAEELPA-GGEITVDGIPFRFPaPAAGAPDNVRCAGQLLELPAGRYDWIHVLAAAERRTEDFVQLHYTDGSVDPEWLRISDfWPQTGARFGESAAFSCTRLHYPRHVQRSMGPTIWRQRVPVTRQRELTALTLPDNPAIHVFAMTLAPTTPPEAAE----
|
31 |
+
>UniRef100_A0A944KLD5_2819145/ 163 0.258 5.947E-41 46 222 233 506 691 697
|
32 |
+
----------------------------------------------DLAPYFDNTGISDDANQATANLDGYGFSYSAEALAAsgltPGATVTSDGVQYGWSGAAGQADNVVAAGQVLAVPAitGATELGVMGSATNGPSSGTMTITYTDGTTQQATLGFTDWTAGSPSLGNGVAASTQYRNSTGGSSQGLGTHLYTTTIALQAGKTVASVTLPtraDQGLLHVFALGTDKGA----------
|
33 |
+
>UniRef100_A0A1C6VGZ9_683228/ 161 0.443 2.082E-40 35 229 233 7 200 206
|
34 |
+
-----------------------------------GAGAGTRQRHVDLTDHVDNLGIVVPRWPHGGGFNIWGNAFPADELPPAGGVCTVDGVSFSFP-LAGNRDNIRCRGQVVALPPGHYDWLYVLAAAERRTEDTVRLHYTDGSTAEQWLRISDfWPDTAPRFGDVLAFRCSRMLYPRHPQPSMAPAIWQQRIPVSRPGEVHAVRLPDNPAMHVFALTAVT-DAELPAEG---
|
35 |
+
>UniRef100_UPI0021D8E261_2853165/ 161 0.440 2.082E-40 28 227 233 5 206 218
|
36 |
+
----------------------------TPPGTATPPAGAPRYRVVELAAHRNNRAASRMHTTGAGGFNVWRNSFPVEHLPPGGSEVEVGGVPFSFPPVGEGYDNVRCDGQFVEVPAGRYDWIHLLAAAERRAEDTVELHYADGSVDTEWLRVSDFWSAPAWFGELPAYRTPVMHYPYHVQPGIGAHLWAQRVPVPRRTALAGLRLPRNIAVHVFAATVQepPPGTAGLP-----
|
37 |
+
>UniRef100_A0A246RSA7_1185415/ 161 0.500 2.847E-40 44 223 233 12 191 196
|
38 |
+
--------------------------------------------PVDISRHRNNIAISSATATKAGHFNVWGNSFAAEHLPAGGSLVHVAGVPFRFPPVCAGPDNVRCEGQFVPVTESRYDWIHVLAAAERRCEDTVELHFGDGSVDAEPLRISDFWAAPAWFGELLAFRSPVMHYPHHVQQGVPAVMWAQRVPVTRRADLVGIRLPRNVAMHIFAVTLEHAEA---------
|
39 |
+
>UniRef100_UPI0018912F4F_2705253/ 160 0.262 3.894E-40 47 219 233 1054 1238 1525
|
40 |
+
-----------------------------------------------LPAAFNNDAITNDSNRGGADLDGAGASYSAQALAAvgvtPGAPLVHDGLTFTWPDRQvGQSDNVVAAGQTIDV-SGSGSTLGLLGTSTWGPStGSGTITYTDGSTQPYTIGFGDWANGTPPTGADVAIRAPYGNQP-GNQTGWAATVDYYPITLDPAKTVQSITLPSGsaqphggiPALHIFAMSIK-------------
|
41 |
+
>UniRef100_A0A2A3HR12_1938860/ 160 0.480 7.284E-40 37 217 233 8 188 212
|
42 |
+
-------------------------------------ASAPRYRVVELADHRNNRAATRVHTTAAGGFNVWRNSFPAEHLPPGGSQVEVGGVPFSFPPVGEGDDNVRCDGQFIAVPAGRYDWVHLLAAAERRTEDTVELHYADGSVDTEWLRVSDFWAAPAWFGELPAYRTPVMHYPYHVQPGVSAHLWAQRVPVPRRTELAGLRLPRNIAVHVFAAT---------------
|
43 |
+
>UniRef100_A0A838IYH6_2026798/ 159 0.292 9.962E-40 29 221 233 647 843 845
|
44 |
+
-----------------------------PRLALPEAFASDGFVALDLERAFNNDAFSSPSQPLKGNFDsrsgVLGATYPAERAPASLERIELGGVPFLFPPTDADANNVAFHGQRLEVPPGHYDELHLLGVSEQGNyQDTVRLVYEDGSVDEIPLGLSDWCQ-TPRYGEAIAFAFEQRRGAGGAIERITCRILVQLLPVRADSSLLRVDLPDRETMHLFALTLRHA-----------
|
45 |
+
>UniRef100_UPI001C21EFA6_2849653/ 159 0.434 1.362E-39 41 220 233 11 191 199
|
46 |
+
-----------------------------------------EPVPVPLTEHFDNVGFTQPDQLSSGAFNIWGNTFPADELP-PGPEVRLDGVPFLIPAAPvGSPDNLRCAGQLVAVPPGRYDWVRLLCAAERRTEDRVWLHYTDGTIDPEWLRVSDfWPETAPRFGESLAFRFSRMHYPRHVDRRMKPAIWSQRLPITRAAELAAVRLPDNPAIHLFAMTLVP------------
|
47 |
+
>UniRef100_A0A3M9MI77_2294115/ 158 0.270 1.863E-39 44 217 233 562 741 744
|
48 |
+
--------------------------------------------YANLAAAYNNVGVTAGADPTPGNFDGSGNSFNAELLAgqglTPGANVTANGFSFTWPNvAPGVADNAEAAGQLIKLT-GTGSTLAFLGSGVGDQSDTVTVHYTDGSTSTGTVGFPNWSFSDPtEFGAKLAFSTMGRNTPTGLADTAYAYrIFTNTIPLDTGKTVQSVQLPNNSALHLFAWT---------------
|
49 |
+
>UniRef100_A0A5N8W5X6_1803180/ 158 0.272 2.548E-39 44 219 233 760 936 940
|
50 |
+
--------------------------------------------PVDLTAHFDSDGISTHENTQDGDFDGTGRTYPAEELPAAG-PLTFEGVRYTFPSyADGALDNVTAKGQTIPVPPGRYAKVRVLGACSYGAlKTTLTATYTDGSTQEVELAMSDWAGTAPASGSEV-VRCTHRHGRSG-PDTLQVALFQTAVTVDQARELRSLTLPDTtkPALHLFALSVE-------------
|
51 |
+
>UniRef100_UPI0018923E89_280293/ 158 0.283 2.548E-39 47 219 233 1056 1240 1527
|
52 |
+
-----------------------------------------------LPAAFNNDAITNDSNRGGADLDGAGASFSAQALtsvgVTPGAPLVHDGLTFTWPNRQvGQSDNVVAAGQTIDVT-GSGSTLGLLGTSTWGAStGSGTITYTDGSTQPYTIGFGDWANGTPPTGGDVAIRAPYGNQP-GNQTGWAATIDYFPVTLDPSKTVQSITLPSGsaqphggiPAMHVFAMSIK-------------
|
53 |
+
>UniRef100_A0A9E3EXL3_2740538/ 158 0.256 3.485E-39 48 216 233 351 533 733
|
54 |
+
------------------------------------------------STAYNNVGISDDSNSSTANFDGGGYSYSAEALQAagltPGQQITANGITFTWPGVPaGVNDNYQANGQTIPITPvsGATTLAFLGSATNGPSSGTVTITYTDNSTQSFSLGFSDWTlnagNSPPSFGNSVIATTSYRNHSGAGPDNVDTNILYAGVTLQTGKTLQSVTLPSTTNqgkIHVFAI----------------
|
55 |
+
>UniRef100_UPI000AB2585B_688067/ 156 0.466 8.913E-39 44 223 233 6 185 188
|
56 |
+
--------------------------------------------PVDIAGHRNNTGISAATETKAGAFNVWGNSFAAEYLPEGDSLVHVAGVPFRFPPVCEGPDNIRCAGQFVRVPEGRYDWVHVLAAAERRCEDLVEMNYGDGSVDQEPLRVSDFWAAPAWFGEVKAFETPVMHYPHHVQQRVPAVMWAQRVPVTRRADLAGLRLPRNVAVHVFAMTLQRTEA---------
|
57 |
+
>UniRef100_A0A542DYC5_402171/ 156 0.261 8.913E-39 46 228 233 869 1066 1093
|
58 |
+
----------------------------------------------DIWPYYTNAGITDDAHTGAASFDGGGWSYSAQALAAagvtPGSTVTADGVHYTWPDVPvATPDNIEESGQTIPLKvPAGASTIGLLGSASNagssGAGGTVTVHYTDGTSSTFDAFFSDWTlgggGGTPVPGDTTAVTTAYRN-SSGGRDPVKTYVFSVSAPLDAGKTVASITLPqaQGGDAHVFAIGFDTTGASSQAP----
|
59 |
+
>UniRef100_UPI001AEB6A42_1365924/ 156 0.290 1.219E-38 7 220 233 695 914 917
|
60 |
+
-------DLERGGevlVHGPSPDLGIGPVPIsTPAPRwgLPALPDGGQTVIVDLAAALNNDAITSEFHMGDGDFDGAGNTYPAAQLPQTGQT-TDDGIPFEFVNgSEGTPNNVIPAAQTITLPPGNYMTLHLLAASDNGNtNRSMTITYADGTSQ-VPLQITDW-RTSPAFGETEALRTSQMHARTG-PQPVRLSIFHQKIPLDPAKQLVSITLPgaATPRPHLFAITLEK------------
|
61 |
+
>UniRef100_UPI0023608C19_1392640/ 156 0.216 1.219E-38 47 219 233 883 1071 1072
|
62 |
+
-----------------------------------------------LLALFNNTGISSDDDPSSANYDGVGYSYSEQALAtagvKPGGTVSAGGMEFVWPNVPaGEPDNIAADGQTINLSdvPAGASELAFLGSAtNGPSQGEVKITYTDGSTQTATLGFSDWTlnagSSSPSFGNVVAAKMPYRNSTSGNRDQVTTYVFASRpIPLEQGKQPKSVTLPssvDQGTLHVFAVSVK-------------
|
63 |
+
>UniRef100_C7PY52_479433/ 156 0.272 1.219E-38 47 219 233 1074 1258 1548
|
64 |
+
-----------------------------------------------LPAAFNNDAITNDSNRGGADLDGAGASFSAQALAsvgvTPGAPLVHDGLTFTWPDRQvGQSDNVVAAGQTIDI-SGSGSTLGLLGTSTWGaSSGSGTIAYTDGSTQPYTIAFGDWANGTPPTGGDVAIRAPYGNQP-GNQTGWAATIDYFPITLDATKTVQSITLPPgsaqphggTPAMHIFAMSIK-------------
|
65 |
+
>UniRef100_UPI0021F5AFBA_1550616/ 156 0.480 1.667E-38 36 218 233 7 189 195
|
66 |
+
------------------------------------PAGIGLYDTVDISGHRNNTAISAATETGAGAFNVWRNSFAAEYLPAGGSLVHVDGVPFEFPPVCEGPDNIRCAGQFIRVPEGRYDWIHVLAASERRSEDTVELNFADGSVDAEALRVSDFWAAPPWFGEVRAFESLVMHYPHHVQRGIPAVMWAQRVPVTRRAGLTGILLPRNVAVHVFALTL--------------
|
67 |
+
>UniRef100_UPI0004C58EA0_1883/ 156 0.436 1.667E-38 36 221 233 2 188 196
|
68 |
+
------------------------------------TSTTTTPVPLDLTALADNVGVTRPDRLSEGAFNIWGNTFPAEELP-PAGTVEVHGIPFRWPaTGDGSPDNVRCRGQLVTVPEGRYDWIHVLAAAERRTEDPLLLHFADGSVDPEWLRVSDFWPETASrFGERPAFSCTRLHYPRHIQHAMGPTVWRQRVPVTREERLRAFRLPDNPAMHLFAITLAPA-----------
|
69 |
+
>UniRef100_UPI001BCF2EC7_1581705/ 156 0.483 1.667E-38 44 223 233 15 194 199
|
70 |
+
--------------------------------------------PVDISGHRNNTAISAATQTGAGAFNVWGNSFAAEYLPAGESLVHVAGVPFRFPPVCDGPDNVRCAGQFVTVPAGRYDWLHVLAAAERRCEDTVELNFGDGSVDAEPLRVSDFWAAPAWFGEIKAFESPVMHYPHHVQRGVPAVMWAQRVPVTRRAGLAGLRLPRNVAVHVFAVTLQRAEA---------
|
71 |
+
>UniRef100_A0A2S9PQ64_2100817/ 156 0.422 1.667E-38 33 232 233 0 197 206
|
72 |
+
---------------------------------MPSTASTT-TRILDLSAVFDNIGASRAAATSTGAFNVWRNSFSAEHLPEPGTTVTVDEVPFLVPPfGTGGPDNVRCAGQLLEVQPDRYDWLYLLAAAERRVEDEVALHFADGTVDFEALRLSDFWAAPAVFGESEAFRTPAMHYPQHVQFGVPAGLFCQRVPVTRRAPLAGVRLPRNTAVHVFAATLLRAAAA--AGGPRT
|
73 |
+
>UniRef100_A0A367FP16_509200/ 155 0.478 2.279E-38 44 224 233 5 186 192
|
74 |
+
--------------------------------------------PVDLTGHWNNRGISAAAGTGSGGFNVWRNSFPAEYLPPPGAQVEVEGVPFRFGGPDPAGDNIRCAGQYVELPEGRYDWIHLLTAAERRTEDTVALHFAGGEVDFEPLRVSDfWARAHAAFGEAKAFETPVMHYPHHVQPRVEALMWSQRVPVTRRAPLRGLRLPRNVAIHLFAMTLQTAGEP--------
|
75 |
+
>UniRef100_A0A1W5T2H2_1977088/ 155 0.458 2.279E-38 44 224 233 15 195 197
|
76 |
+
--------------------------------------------PVDISGHRNNTAVSAATETKAAAFNVWGNSFAAEYLPAGGTLVHIAGVPFRFPPVCDGPDNIRCAGQFLDLPEGRYDWIHVLAAAERRCEDTAELHFDDGSVDPEPLRVSDFWSAPAWFGEVKAYESPVMHYPHHVQRNVSAVMWAQRVPVTRRAGLTGVRLPRNDALHLFAVTLQRAEAA--------
|
77 |
+
>UniRef100_A0A372M322_2293568/ 155 0.392 2.279E-38 22 220 233 12 215 222
|
78 |
+
----------------------TSELTHTPADFAPPGSPPADFTPVGLQQHFNGKGVSAPrRWPTTGGFNVWGNTFPAEGLPTGGGDAEVACIPFRFPvddSAAGAPDNLRCRGQSVDVPPGEYQWIHVLAAAERRTEDEAVLHYADGSTRREWLRISDFWPETDqRFGELLAFRTRYLMYPRHSQHNMVPSIWLQRVPVTAPGTVTAVELPDNPAIHIFAITLET------------
|
79 |
+
>UniRef100_UPI00131A6ADB_1840409/ 155 0.241 2.279E-38 49 218 233 893 1078 1080
|
80 |
+
-------------------------------------------------SVVNNTGISPDAKPAAANFDGVGWSYSADALAAagakPGGTVTVDGLSYTWPNFPvGEPDNVVAQGQTVALPgaAAGAGKLALLGAAANGkASGTLTITYTDGSTTRADIGFSDWTlgggSDQPSFGNRIAVSTPYRNSTGGQPQQIRTNVFAtVPIALDPGKRVRGITLPaqvQGGSLHVFAVAL--------------
|
81 |
+
>UniRef100_UPI001C84E3F6_763782/ 155 0.494 3.117E-38 44 218 233 2 177 189
|
82 |
+
--------------------------------------------PVDLAAHWNNRAISAADDRGSGGFNVWRNSFPAEYLPPPGARVEVAGVPFRFGGPSAAGDNVRCAGQYVELPEGRYDWIHLLTAAERRTEDTVALHFSDGEVDFESLRVSDfWAQAHAAFGETKAFETPVMHYPHHTQPRVEALMWSQRVPVTRRAPLTGLRLPRNIAVHVFALTL--------------
|
83 |
+
>UniRef100_UPI000B2085DA_46164/ 155 0.422 3.117E-38 43 227 233 5 190 191
|
84 |
+
-------------------------------------------VPVDLAPFFDNVGITPAGDLSSGAFNIWGNTFPAEELP-ERSPTTLGGVPFRFPDrGPGGADNLRCGGGLIPLPEGRYDWLYLLAAAERRTEDPVHLHYADGTVDPEWLRVSDfWPETQAWFGEREAIRCRSLHYPRHVQRPMGPALWRERIPVPREMPLAALRLPDNPAVHVFAVTLLPAGEVAPA-----
|
85 |
+
>UniRef100_A0A7K0CJB4_2585196/ 155 0.419 3.117E-38 43 221 233 7 186 196
|
86 |
+
-------------------------------------------VPVDLAPYADNTGITSARATAAGAFNLWGNTFPAGELPPPGTAV-VDGLPFRFPLAPaGEPDNVRCAGQLLPLPPGRYDWIQLLAAAERRTEDQALLHYADGAVDPEWLRVSDfWPQTRSRFGAAPAYECRVLHYPRHVDDKFGPVVWRHRVPVPRESDLAAVRLPDNPAIHVFAMTLVPA-----------
|
87 |
+
>UniRef100_UPI0018922BF9_280293/ 154 0.446 5.828E-38 44 218 233 2 178 189
|
88 |
+
--------------------------------------------PVDISPHFDNRGITGRGELSQGGFNIWDNTYPAEELPTPGGVVEVGSAPFLFPAlSPDGGDNLRCVGQFIQLPVGRYDWLYLLAASERRSEDTVYLHYADGSVDPEWLRVSDfWAETPPHFGEEAGLRCTVLHYPRHVQPFMGPAIWRTRVPVPRETPLSAVRLPDNPAIHIFALSL--------------
|
89 |
+
>UniRef100_A0A6H9Z8I5_359158/ 154 0.263 5.828E-38 20 220 233 743 944 947
|
90 |
+
--------------------APVRVTTKAPPWGLPPLPPAGQAVPVELAGHFDNDGITSEFFMGDGDFDGTGATYPAAALPQTG-RVTADGVEFLFVNGiEGSANNVTAAGQTIPVPAGRYARLHVLGASDNGNAGtTVTAVYADGGTAAVPLRLTDW-KSNPAYGESAAVRAPQFHTRTGAKD-IAVTIFHQKADLDPARELTALRLPNltRPRPHLFSLTLEK------------
|
91 |
+
>UniRef100_UPI00104757B6_1213861/ 153 0.475 1.090E-37 37 221 233 4 188 190
|
92 |
+
-------------------------------------TTANTFRPVEITEHWNNRSMSTVDDKGDGRFNVWRNSFPAEHLPRPGERVTVGGVPFDFPPATSAGDNARCAGQFVTLPPGHFDWIRLLASAERRVEDTVALHFADGQVDFEAIRVSDFWAAPACFGETLAYRTPVMHYPHHVQPRVEAMLWSQRVPVTRDATLTGLRLPNNRALHIFALTLQES-----------
|
93 |
+
>UniRef100_UPI001C690D85_2749435/ 153 0.444 1.490E-37 45 222 233 0 178 185
|
94 |
+
---------------------------------------------MDLTALADNVGVTRPDRLSEGAFNIWGNTFPADELP-PAGPVEVHGIPFRWPaTGDGSPDNVRCRGQLVTVPEGHYDWIHVLGAAERRTEDPLLLHFTDGSVDPEWLRVSDFWPETASrFGERPAFSCTRLHYPRHIQQAMGPTIWRQRVPVTREEPLRAFRLPDNPAIHLFAITLAPAA----------
|
95 |
+
>UniRef100_UPI0021556BFD_2675858/ 153 0.471 1.490E-37 45 218 233 16 189 195
|
96 |
+
---------------------------------------------VDISGHRNNTAISASTETKAGAFNVWGNSFAAEYLPAGESIVHVDGVPFEFPPVCDGPDNVRCAGQFIRVPEGRYDWIHVLAASERRSEETVELTFADGSVDPEPLRVSDFWAAPAWFGEVKAFESLVMHYPHHVQRGVPAVMWAQRVPVTRRAGLTGILLPRNVAVHVFAVTL--------------
|
97 |
+
>UniRef100_UPI002254C514_2975865/ 153 0.436 2.037E-37 42 227 233 2 189 191
|
98 |
+
------------------------------------------STVIDISEACNNLGIGKSGAGEDYGFNIWRNTFPAEDLPAPGSTVAVDGVDFEFPPrETGKGDNIRCRGQLVALPGGHYEWIYLLGAAERRTEDEVELHYADGAARTEWLRMSDFWPETDSwFGEPEAFRATGLRYPRHTQAGHRPVIWQQRIPVTVPGALTALRLPDNPAMHVFALTAVTDPGVRHA-----
|
99 |
+
>UniRef100_UPI0020A32361_334858/ 153 0.456 2.037E-37 41 220 233 1 182 191
|
100 |
+
-----------------------------------------KNTVVDISALCDNRGIQPPGTEGDYGFNIWSNTFPAEELPEPGSLVPVAGVPFEFPaRPAPGGDNIRCRGQLVPVPPGDYDWLYLLGAAERRTEDQVLLHYRDGTVRAEWLRMSDfWPQTEARFGEPLAFRTSAMRYPRHTHPAHAPSIWQQRVPVTVPGEITAVRLPDNPAMHVFALTLGT------------
|
101 |
+
>UniRef100_A0A8F5GM97_2750812/ 153 0.412 2.037E-37 45 224 233 8 188 192
|
102 |
+
---------------------------------------------VDLAPFFDNVGITPSGDPSAGAFNIWGNTFPAGELP-EGPRAMLGGVPFRFPDrGPGGADNLRCAGGLIPLPEGRYDWLYLLAAAERRTEDPVHLHYADGTVDPEWLRVSDfWPETEPWFGEREGIRCESLHYPRHVQRPMGPALWRERIPVPRETPLAALRLPDNPAVHVFAITLLPAEEV--------
|
103 |
+
>UniRef100_A0A4R2JNF3_1213861/ 153 0.455 2.037E-37 43 218 233 11 188 194
|
104 |
+
-------------------------------------------TPVDLGGHFDNRGITRGGELDQGGFNIWDNTFPAEDLPEPGGTVRIGDVPFLFPAPDPAGrDNLRCSGQVIELPTGRYDWLYLLAASERRSEDVITLHHADGSVDPQWLRVSDFWAETPAhFGEQASVRCRSLHYPRHIQRNMAPVIWRTRVPVPRETDLAAIGLPDNVAIHIFALTL--------------
|
105 |
+
>UniRef100_UPI001E5DF7EA_2714955/ 153 0.267 2.037E-37 2 221 233 797 1034 1035
|
106 |
+
--SWLPESlFLNGGTVTVEVGATANTAWGsgaadlpvdrvpsapTPVPNLPAAcVVQGDYCLQSLAGQYDVDGVTTDDDMSQGVFGTNGWSWPAELLPAPG-VGTAAGRPYLFPDTTGtAGNFLSARGQTVYLTPGRYSALHAVTASHNGSfRGDVTVTYSDGTTSKASLLSTDWAAASPAYGEETALDVPTRHRNTGIHDGLRVRMWHVPLAVDSTRTAVSITLPSLPNLKVYALSARTA-----------
|
107 |
+
>UniRef100_A0A1I3MHH7_115433/ 153 0.280 2.037E-37 52 216 233 897 1073 1077
|
108 |
+
----------------------------------------------------NNAGISPDSKPAAANFDGVGFSYSADALAAagakAGSTVTVGGLSYTWPNYPaGSPDNVQAQGQTI-IASGSGRLAFLGAASNGSASGTLTISYTDGSSSTAQLGFSDWTLGaggqQPAFGNQVAFTTPYRNSTGGTPQQINTYVFAsAPITLPAGKTVRSVTLPSNvsgGQLHVFAI----------------
|
109 |
+
>UniRef100_A0A7K2PPX2_2690297/ 152 0.474 2.786E-37 44 218 233 15 189 195
|
110 |
+
--------------------------------------------PVDISGHRNNVAVSAAAETKAGAFNVWGNSFSAEHLPAGESLVNVDGVPFEFPPLCEGPDNIRCAGQFIRVPEGRYDWIHVLAASERRCEDTVELIFADGSVDAEALRVSDFWAAPAWFGEVKAFESLVMHYPHHVQRGIPAVMWAQRVPVTRRAGLTGIVLPRNVAIHVFAVTL--------------
|
111 |
+
>UniRef100_A0A248YPR0_2024580/ 152 0.491 2.786E-37 44 222 233 12 190 195
|
112 |
+
--------------------------------------------PVDISGHRNNTAISSATATKAGHFNVWGNSFAAEHLPAGGSLVHVAGVPFRFPPVCAGPDNIRCEGQFIAVTQGRYDWVHVLAAAERRCEDTVELSFSDGPVDAESLRVSDFWAAPAWFGELLAFRSPVMHYPHHVQRGVPALMWAQRVPVTRRADLVGIRLPRNVAMHIFAVTLQHGA----------
|
113 |
+
>UniRef100_UPI0010FB6E0B_1905/ 152 0.434 2.786E-37 43 222 233 7 187 203
|
114 |
+
-------------------------------------------VPVDLTAHADNTGITPADALDAGAFNLWGNTFPAEELP-PGGPVEVDGIPFLFPrHAPGAPDNIRCAGQLIELPAGRYDWIQVLAAAERRTEDQVLLHYGDGSVDPEWLRVSDFWPETASrLGGTAAYTCGRLHYPRHVERKFGPTLWRHRVPVPREAELTAVRLPDNPAVHLFALTLVPAP----------
|
115 |
+
>UniRef100_UPI0022EA6403_1229659/ 152 0.444 3.809E-37 41 218 233 1 180 191
|
116 |
+
-----------------------------------------KNTVVDISALCDNRGIQPPGTEGEYGFNVWSNTFPAEELPEPGGLVPVAGVPFEFPaRPAPGGDNIRCRGQLVEVPPGEYDWLHLLGAAERRTEDQVLLHYRDGTVREEWLRMSDfWPQTDARFGEPLAFRTSAMRYPRHTHRSHAPSIWQQRVPVTVPGEITAVRLPDNPAMHVFALTL--------------
|
117 |
+
>UniRef100_A0A1B4ZDD3_1213862/ 152 0.413 3.809E-37 45 228 233 26 210 211
|
118 |
+
---------------------------------------------LDLTAFADNVGVTSPDRLSEGAFNIWGNTFPADELPK-GGPVDIHGIPFRFPAvGTGQPDNVRCAGQFIDVPVGRYDWIHVLAAAERRTEDFVRLHYTDGAVDPEWLRVSDFWPETASrFGESAAVSCTRLHYPRHIQRSMGPTLWRQRVAVPREQDLSAIRLPDNPAIHIFAMTLAPATQPETTQ----
|
119 |
+
>UniRef100_A0A5N8VJT9_1609272/ 151 0.468 5.207E-37 42 218 233 7 183 189
|
120 |
+
------------------------------------------YEPVDISGHRNNTAISAATETGAGAFNVWRNSFAAEYLPAGGSLVHVEGVPFEFPPVCDGPDNVRCGGQFIRVPEGRYDWIHVLAASERRCEDTVELSFADGSVDTEPLRVSDFWAAPAWFGEVKAFESLVMHYPHHVQRGVPAVMWAQRVPVTRRAGLTGILLPRNVAVHVFAVTL--------------
|
121 |
+
>UniRef100_A0A229REX4_76020/ 151 0.406 5.207E-37 45 224 233 8 189 192
|
122 |
+
---------------------------------------------LSLLEHVNNTGLSTVGDLAAAGFNIWGNSFPAADLPSPGATSVVGGVPFRFPeRAPDGRDNVRCRGQRIDVPEGRWDWVHVLGAAERRTEDPLLLRYADGTVRPQWLRMSDfWPETEPRFGELLAYRCATMHYPRHVQRTTAPAIWAQRVPLSVPDGVVALELPDNPALHLFAITLQAGERV--------
|
123 |
+
>UniRef100_A0A7K2T631_2690338/ 151 0.423 5.207E-37 36 222 233 1 188 195
|
124 |
+
------------------------------------TTSTTAPVPLDLTALADNVGVTRPDRLSDGAFNIWGNTFPADELP-PAGPVEVHGVPFRWPaTGGGAPDNVRCRGQLVPVPEGRYDWIHVLAAAERRTEDPLLLHFADGGVDPEWLRVSDfWPETSSRFGERPAFSCTRLHYPRHVQHAMGPTVWHQRVPVTREERLRAFRLPDNPAIHLFAITLAPSA----------
|
125 |
+
>UniRef100_UPI0020D28210_193462/ 151 0.423 5.207E-37 42 221 233 6 189 195
|
126 |
+
------------------------------------------STVVDISEVCDNRGITKSGAEESDGFNIWRNTFPAEDLPAPGSSVEVTGVAFEFPaRATGRGDNIRCRGQLLPLPGVRADWLYLLGAAERRTEDEVELHYADGAVRTAWLRMSDfWPETAAWFGEPEAFRGSGLRYPRHTQDGHRPAIWQQRVPVTVPGELTALRLPENPAMHVFALTavLEPG-----------
|
127 |
+
>UniRef100_A0A2N8NVN7_66423/ 151 0.424 5.207E-37 33 221 233 3 194 202
|
128 |
+
---------------------------------LPARADrrQTEAVPVNLAGLADNTGITRADALSEGAFNIWGNTFPADELPT-GGPVVVDGVPFLFPEaAPGRPDNVRCAGQLIEVPTGRYDWIQLLAAAERRTEDQVLLHYADGSVDPEWLRVPDFWPETGSrVGGSPAFTCTRMHYPRHVERKMGPVIWRHRVPVPRESDLGAVRLPDNPAVHLFAMTLLPA-----------
|
129 |
+
>UniRef100_A0A9E3EXL3_2740538/ 151 0.250 5.207E-37 51 216 233 551 729 733
|
130 |
+
---------------------------------------------------YNNAGISDDSNTTSANFDGGGYSYSAQSLQAagitPGGSVTTKGVTFTWPNvASGVADNYQTNGQTIPVTPvsGATTLAFLGSATNGPSSGTATITYTNGSTQSFSLGFSDWTlnanTASASFGNAIAATLSYRNGANG-RDNVNTYVFYADITLQAGKTLKSVTLPsttNQGRLHVFAI----------------
|
131 |
+
>UniRef100_A0A840IWR6_1181879/ 151 0.287 5.207E-37 52 218 233 903 1081 1083
|
132 |
+
----------------------------------------------------DNAGISPDSDPSAGNFDGGGWSYSADALaeagAKPGGTVTSDGIDFTWPSYPaGDPDNVVAAGQTVNVTGSG--KLALLGSSSNGnAEGTLTVTYTDGSTSTATVGLSDWTlgggDAEPAYGNKSVLSTSYRNSSGGDPQEISTVVFATTpITLDAGKTVASVTLPDDvdgGAMHVFALGL--------------
|
133 |
+
>UniRef100_A0A941IQY1_1508375/ 151 0.262 5.207E-37 47 219 233 1034 1218 1510
|
134 |
+
-----------------------------------------------LTAAFNNSAITSDGNRGCANLDGAGASYSQQALAsvgvTPGTALVHDGLTFTWPSGGAcDADNVVAAGQTIDV-SGSGSTLGFLGTSAWGAiSGTGTVTYTDGTTQPFTIGFGDWANGTPPTGDDIAIRAPYGNQP-GNQTSWQTTIEYAPVQLDPSKTVQSITLPPGnpqpsggiPSMHIFAMSIK-------------
|
135 |
+
>UniRef100_C7QK71_479433/ 151 0.480 7.120E-37 44 218 233 15 189 195
|
136 |
+
--------------------------------------------PVDISGHHNNKAISAATETKAGAFNVWGNSFAAEYLPAGGSIVEVDGVPFRFPQAGDGPDNIRTAGQFITVPEGRYDWIHLLAASERRTEDCMDLSFADGSVDAEKVRVSDFWGAPAWFGEVKAFESLTMHYPHHVQRGVPAMMWAQRVPVTRRAVLSGILLPRNVALHIFAVTL--------------
|
137 |
+
>UniRef100_C7PW55_479433/ 151 0.248 7.120E-37 47 220 233 547 726 727
|
138 |
+
-----------------------------------------------LAGAFNNTAITDETNTAPGNFDGDGDSYSAQSLatagATPGATISAGGTTFTWPSaAAGTNDNVAGSGVMVNL-AGQGSKLGFLGSEAGFSTDTVTVAYTDGTSSTGSLGFPNWCCSSPtGYGATPAIVTDHRNTPSGPANFGTAYdVFYNSIAIDATKTVKTVTVPSDPAIHVFAMTVQP------------
|
139 |
+
>UniRef100_A0A1V2PT88_1933779/ 151 0.315 7.120E-37 33 231 233 723 921 924
|
140 |
+
---------------------------------LPPLPNQGTTVTVDLAAVLNNDAFTNEFHMGDGDFDGAGNTYPAAQLPQTGQA-NDDGIPFEFVNGNeGAPNNIVPQAQTIQLPPGKYTTVHLLAASDNGNTDrTVTINYADG-AANAPLRITDW-RTAPAFGETEALRTNQMHTRTGPSPTR-LAIFHQKITLDPVRDLVSITLPaaATPRPHIFAITLQKKSSAAPAEGGD-
|
141 |
+
>UniRef100_UPI00207D370E_2944808/ 151 0.279 7.120E-37 15 221 233 764 967 972
|
142 |
+
---------------IAPVTVTTPAAPWGLPPLPPG----GDMVTVDLEPFYTNDGITNEFYLGDGDFDGTGRTYPSGALPQNG-SLSNDGVPFRFTNGhEGTRNNVTAAGQTLELPEGNYRKLHLLGASDNGNTDaTATLHYTDGGTAPVRLALTDWLSP-AAFGESEPLRTNQIHTRTGPVDRRAT-VFHQVLAADPTRRLRAITLPASakPRSHVFAVTLEKA-----------
|
143 |
+
>UniRef100_A0A1C5JFJ4_47864/ 151 0.457 9.734E-37 44 218 233 15 189 195
|
144 |
+
--------------------------------------------PVDISGHRNNTAVSAATETKAGEFNVWGNSFAAEYLPKGRSTVHVDGVPFLFPPVCEGPDNIRCAGQFIEVPEGRYDWVHVLAASERRCEDTVDLNFADGSVDAEALRVSDFWAAPAWFGEVKAFQSLVMHYPHHVQRGVPAVMWAQRVPVTRRADLTGILLPRNVAVHVFAVTL--------------
|
145 |
+
>UniRef100_UPI00143C2064_1431344/ 151 0.465 9.734E-37 45 218 233 16 189 195
|
146 |
+
---------------------------------------------VDISGHRNNVAISAATRTRDGAFNVWRNSYAAEYLPTGGSTVRVGGVPFTFPPVCDGPDNVRCEGQFIPVRRARYDWIHLLAASERRCEDTVGLDFADGTADAEPLRVSDFWAAPAWFGEIKAFESPAMHYPHHVQRGVPAVMWAQRVAVTRRADLTGILLPRNVAVHVFAVTL--------------
|
147 |
+
>UniRef100_A0A660RIY0_28240/ 151 0.242 9.734E-37 41 221 233 380 568 581
|
148 |
+
-----------------------------------------DFTILDISSFFNNDGIASFKHPRYGNFDnhsgIYGATYPLEELQnymNENGRLEVDDIPFVLKGLEGhMKNNTALQGEKIYLPEGRYSYIYFLGSSEHGSfRAPVTFEYSDGSRQTDDSGFSDWCQG-PQLGEKIACKMPYRYDSNGSRQSITCYLFMQTLKLAPKKELITILLPRKNTMHIFAITLRKS-----------
|
149 |
+
>UniRef100_UPI001CB7FD65_1930280/ 150 0.457 1.331E-36 44 218 233 15 189 195
|
150 |
+
--------------------------------------------PVDISGHRNNTAISAATETRAGKFNVWGNSFAAEYLPEGKSLVHVDGVPFEFPPVCEGPDNVRCAGQFIRVPEGRYDWVHVLAASERRCEDTVELNFADGSVDAEPLRISDFWAAPAWFGEVKAFESLVMHYPHHVQRGVAAVMWAQRVPVTRRAGLTGILLPRNVAVHVFAVTL--------------
|
151 |
+
>UniRef100_UPI0006902850_348818/ 150 0.243 1.331E-36 47 220 233 552 731 732
|
152 |
+
-----------------------------------------------LAHAFDDVGVSDESDTAPGNFDGDGDSYSAQALAAagvtPGATLSANGYTFTWPSaAAGAPDNVAGGSPTIEL-SGSGSRLAFLGAEAGFTSDTVTVHYTDGSSSTGTLGFPNWCCGaTDSYGAKTAFTTDHRNTPTGpANYGVSYRLYTNAIPLTAGKQVASVTLPSSSAIHVFAMAVQP------------
|
153 |
+
>UniRef100_A0A832K4Y2_2282144/ 150 0.279 1.331E-36 41 218 233 25 200 1038
|
154 |
+
-----------------------------------------DFLLLDLTSYFNNNGAATEENPSEGNFDYGGFSYAFELLPKPG-KVSVKEIPFIFPQTSQEGNNIACHSQVIKIQPDRYRAILLLGSSTNGDYEaCLWVQFSDGSRSCLKTGLTDWCRDS-VFGEIPAFSFPYRIGPEG-RQEIQNYIWLQVLRIKEEKFLSAIILPENKNIHIFAITL--------------
|
155 |
+
>UniRef100_UPI000B17B6CC_28444/ 150 0.474 1.819E-36 44 218 233 6 180 193
|
156 |
+
--------------------------------------------PVDISGHLNNTGISAATDTKSGAFNVWGNSFAAEYLPGGGDLVHVAGVPFRFPPVGDGPDNIRCAGQFLTVPEGRYDWVHVLAAAERRCEDVVETHYADGSVDAEPLRVSDFWAAPAWFGEVKAFESLVMHYPHHVQRGVSAVMWAQRVPVTRRADLTGLRLPRNAAVHLFAVTL--------------
|
157 |
+
>UniRef100_A0A7W3TA43_1472722/ 150 0.477 1.819E-36 45 218 233 15 188 194
|
158 |
+
---------------------------------------------VDISGHRNNTAVSAATETKAGEFNVWRNSFAAEYLPAGGSLVHVDGVPFRFPPLCEGPDNVRCAGQFIRVPEGHYDWIHVLAASERRSEDTVQLNFADGSVDAEALRVSDFWAAPAWFGEIKAFESLVMHYPHHVQRGIPAVMWAQRIPVTRRTGLTGILLPRNVAVHVFALTL--------------
|
159 |
+
>UniRef100_UPI0006C193CE_1415555/ 149 0.390 2.487E-36 43 227 233 5 191 193
|
160 |
+
-------------------------------------------TVVNLSAHFNNRGIQPPDTPGDYGFNIWRNTFPAEELPEPGSLVDLVGAVFEFPaRASAAGDNVRCRGQLVELPGGRWDWIGLVGAAERRTEDEVELHYADGTVRREWLRMSDfWPQTAAYFGEPRAFVTGGMRYPRHTHPHHAPSIWQQRVPVRVPAPLAAVRLPDNPAMHVFAMTVTADEESRLA-----
|
161 |
+
>UniRef100_A0A977Y500_2809700/ 149 0.418 2.487E-36 42 218 233 11 187 200
|
162 |
+
------------------------------------------YHAVDLAPYSNNVGISAAGHTSRGAFNVWGNSFPAEHLPESGAPVSVGGVPFRFPRVGVGDDNVRCDGQFIAVEAGRFDWLHLLTASERRAEDVVEMHFANGSVDPEWLRVSDFWAAPAWFGEATAFATPVMHYPHHVQRGVSAMLWSQRVPVTRRADLSGFRLPRNAAVHVFAATL--------------
|
163 |
+
>UniRef100_UPI00189254C3_280293/ 149 0.243 2.487E-36 47 220 233 554 733 734
|
164 |
+
-----------------------------------------------LASAFNNIAISDETNPGPGNFDGDNDSYSAQALAAagatPGASIKAGGTTFTWPsSATGTNDNVAGSGVLVNVT-GQGSKLGFLGAEAGFATDTVTVTYTDGSSGSGSLGFPNWCCSSPtAYGATPTIVTTHRNTPSGPANfGIDYDVFYNSIAIDATKTVKTVSVPNDPAIHIFAMTVQP------------
|
165 |
+
>UniRef100_UPI001E52087D_2714956/ 149 0.259 2.487E-36 2 217 233 772 1005 1010
|
166 |
+
--SWLPESlFLNGGTVTVEVgskantgwgtkaaDLPVDRMPTapAPVPNLPtACVVQGSNCLQSLAGQYDIDGVTTDDNMGQGVFGTNGWSWPAELLPAPG-VGTAAGRPYLFPDTTGtAGNFLSARGQTIHLTPGRYSALDAVTASHNGNyRGDVTVTYSDGTTSKASLMSTDWAAAAPAYGEETAIDVPTRHRNTGIHDGLRVRMWHVALAVDSTRTAVSITLPNLPNMKVYALT---------------
|
167 |
+
>UniRef100_A0A557ZXY0_715473/ 149 0.264 2.487E-36 52 216 233 868 1044 1048
|
168 |
+
----------------------------------------------------NNAGISPDAKPAAANFDGVGFSYSSDALAAagakAGSTVTVNGLSYSWPNYPaGSPDNVIAQGQTVNV-SGSGQLAFLGAAANGNASGTVTVTYTDGTTSTANLGFSDWTLGagaaQPAFGNQVAFRTPYRNSVGGDSQQINTYVFAsAPIALAAGKTVKSVTLPSSvsgGQLHVFAI----------------
|
169 |
+
>UniRef100_A0A0N0T605_1519492/ 149 0.437 3.401E-36 44 218 233 2 175 180
|
170 |
+
--------------------------------------------PIPLAPHLNNTGLTGADGLDGGGFNIWGNTFPAGELPPSGSTTVVHDVPFLFPAA--GADNVRCRGQRVEVPPGRWDWVHVLGAAERRTEDLLEVHYSDGSVRGQWLRMSDfWPQTEPRFGELLAFRCRHMHYPRHVQRTMSPSIWAQRVPVTVPSDVVALVLPDNPALHVFALTL--------------
|
171 |
+
>UniRef100_A0A918SWL6_67368/ 149 0.385 3.401E-36 43 227 233 5 191 193
|
172 |
+
-------------------------------------------TVVNLSAHFNNRGIQPPDTSGEYGFNIWRNTFPAEELPEPGSLVELEGTVFAFPaRDTGAGDNVRCRGQLVELPAGRWDWIGLVGAAERRTEDEVELHHADGTVRREWLRMSDfWPQTAPYFGEPLAFSTSGMRYPRHTHRHHAPSLWQQRVPVRVPAPLAAVRLPDNPAMHVFAMTVTADEESRLA-----
|
173 |
+
>UniRef100_UPI0012EAE0B8_2654677/ 149 0.413 3.401E-36 42 225 233 6 191 195
|
174 |
+
------------------------------------------STVVDISEACDNRGIAKSGADESYGFNIWRNTFPAESLPAPGSSVAVAGVTFEFPaRESGRGDNIRCRGQLLPLPAVRPDWLYLLGAAERRTEDEVELHYADGAVRTAWLRMSDfWPETAAWFGEPEAFRTSGLRYPRHTHDGHRPAIWQQRIPVTVAGELTALRLPDNPAMHVFALTAVREPGVR-------
|
175 |
+
>UniRef100_A0A428Z1R9_2030/ 149 0.327 3.401E-36 45 220 233 733 908 910
|
176 |
+
---------------------------------------------VDLAAALNNDGFTNEFQMNDGDFDGAGNTYPAAQLPQTG-HAEDDGIPFEFVNGNeGAPNNIIPAGQTIQLPPGKYPTMHLLAASDNGNTNTkLTVTYADGTAQ-VPLQITDW-RASPAFGETEALRTRQMHTRTGPAETR-LSIFHQKVPLDPARELLSITLPaaAKPRPHIFAITLQK------------
|
177 |
+
>UniRef100_UPI0019405FD2_566021/ 149 0.472 4.649E-36 41 221 233 2 182 190
|
178 |
+
-----------------------------------------EQVPIELAAHLNNAGITPASDTGRGRFNVWRNSLPGEELPL-GRPFTAGGVRFAFPAAgPGRPDNVRCEGQLVTVPPGRYDWCCLLAAGERRVEDEVALHFADGSVDFEPVRVSDFWAASAVFGEEEAVATSVMHYPQHVQPGVTAHIWCQRVPVTRRAELRRLRLPHNVALHVFAATLCPS-----------
|
179 |
+
>UniRef100_UPI001AEB6BB5_882444/ 149 0.457 4.649E-36 44 218 233 12 187 194
|
180 |
+
--------------------------------------------PVDLSAYFNNTGATAKSDLARGRLNVWQNSFPAEELPDAG-IFVATSVPFEFPAiGPGRHDNIRCAGQRVELPAGRWDWIYLLACSERRSEDVLQLHYTDGTVDAEWLRVSDfWPASPPHFGEVEAIRCEQMHFPRHIQPRVGPRIWQQRVPVPRQHDLAALRLPDNIAIHVFAMTL--------------
|
181 |
+
>UniRef100_UPI001C40B55E_200378/ 148 0.497 6.356E-36 43 217 233 6 182 190
|
182 |
+
-------------------------------------------VPIDLGPLFNNIGATVETDLGRGGLNVWKNSLPAADLPAAGSLFRYTDVPFRFPEvGSGLPDNVRCEGQRVDLPPGRYDWIYLLACSERRSEDVVHLHYASGEADDEWLRVSDFWAAAPShFGEVEAVPCSRIHFPRHVQRGIAPRIWQQRLPVPRQQPLAYLRLPDNIAIHVFAMT---------------
|
183 |
+
>UniRef100_A0A7K2ZJK9_2690253/ 148 0.464 6.356E-36 44 224 233 15 195 197
|
184 |
+
--------------------------------------------PVDISGHRNNTAISAATETKAGAFNVWGNSFAAEYLPAGESLVHVDGIPFRFPPVCEGPDNVRCAGQFFGVPEGRYDWLHLLAASERRCEDTVELHFADGTLDAEPLRISDFWSAPAWFGEVKAFESLVMHYPHHVQRSVSAVMWAQRVPVTRRAVLTGVRLPRNVAVHVFAATVQRTEGA--------
|
185 |
+
>UniRef100_UPI001BA8A0F4_2802641/ 148 0.274 6.356E-36 36 220 233 30 212 765
|
186 |
+
------------------------------------ASDAPNPVPVPLDGLFDNDGI-DTATTHDGNFDGSGYTFPAEGLPS--GQVTVDGVPFTFPSATGK-NNIVAMGQQIPLPQGRYvTALFLTACSYGATGGTATIHYADGSTSEAQLGGADWYSG---SGQLVA---PFRYTPGGRTDQSPVSISTSQVWIDPNRDAVAITLPTTspavegkSSLHIFALSLQP------------
|
187 |
+
>UniRef100_UPI001F2867CD_211113/ 148 0.322 6.356E-36 45 220 233 733 908 910
|
188 |
+
---------------------------------------------VDLAAVLNNDGLTNEFQMNDGDFDGAGNTYPAAQLPQTG-YVEDDGIPFEFVNGNeGAANNIVPIGQTIPLPPGKYPTMHLLAASDNGNtTTTLTVTYTDGAAQ-VPLQITDWRTP-PAYGETEALRTRQMHTRNGTAETRLT-IFHQKLQLDPARDLVSITLPaaTKPRPHIFAITLQK------------
|
189 |
+
>UniRef100_A0A563EK61_2591470/ 148 0.295 6.356E-36 5 223 233 739 949 950
|
190 |
+
-----HPDFTIAPVKITE-PARAWGLPALP----PGGVAA----PVDITGLFDNDGLSNEFTSRDGDFDGAGNTYPAAQVPQTGG-VTDDGIPFEFTNGDEaSKNNVIAAGQTIAMPAGRYAKLHLLAAADTGNVDaPGVATYVDGTTAPIRFAVTAWRSG-PQFGESEPITTTLMHTPSGPQQA-KVAIFHQVIGLDPARDLASITLPKlaGPRLHVFGITLEKAKA---------
|
191 |
+
>UniRef100_A0A7V2UGE7_2026780/ 148 0.264 6.356E-36 41 219 233 33 223 1084
|
192 |
+
-----------------------------------------GQVPLNLEGLFDNDAIADAQRRADGNFdcpdhaaDIPGSVFPAENLPATGSKFSFDGIHFLFPSKErGDLNNVACEGQRIDVPPARYKALHIIGTSENGSfRDSLQLAYKEG-PAEAELTLRDWCQ-KPAAGDRVAFEAPCRYTwspqkRGMIREEIQPRIWRQTIPLDPAKTLEALTLPYNRRMHVFAATLE-------------
|
193 |
+
>UniRef100_A0A919S905_113560/ 148 0.440 8.688E-36 45 227 233 5 188 190
|
194 |
+
---------------------------------------------IDLSGFFTNRGLQPPGTTGDYGFNIWSNTFPAEELPPAGTVADVAGVPFSFPPDAATGDNIRCRGQVIPLPEGDWDWIYLIGAAERRTEDRVELRYRDGSVRPAWLRMSDFWPETPmRFGEPLAFRTRSMRYPRHTHRNHAPALWQQRIGVAQPEDLAAVRLPDNPAMHVFALTLVADEEARCA-----
|
195 |
+
>UniRef100_UPI0019447DF0_113566/ 148 0.445 8.688E-36 45 227 233 5 188 190
|
196 |
+
---------------------------------------------IDLSGFFTNRGLQPPGTTGDYGFNIWSNTFPAEELPPAGTVADVAGVPFSFPPDAATGDNIRCRGQVIALPAGDWDWIYLIGAAERRTEDQVELRYRDGSVRPAWLRMSDFWPETPvRFGEPLAFRTRSMRYPRHTHQNHAPALWQQRIGIAEPGNLSAVRLPDNPAMHVFALTLVADEEARCA-----
|
197 |
+
>UniRef100_UPI001FEC609C_2508722/ 148 0.477 8.688E-36 44 223 233 15 194 197
|
198 |
+
--------------------------------------------PVDISGHRNNTAVSAATDTKAGRFNVWGNSFAAEYLPAGGSLVHVAGVPFRFPPVCDGPDNVRCEGQFLEVPEGRYDWVHVLAASERRCEDTVGMHFADGAVDAEPLRVSDFWAAPAWFGEVRAFESLVMHYPHHVQRNVSALMWAQRVPVTRRAGLTGVRLPRNIAVHVFAVTLQRTEA---------
|
199 |
+
>UniRef100_A0A6G7PB94_2714844/ 148 0.482 8.688E-36 45 218 233 19 192 198
|
200 |
+
---------------------------------------------IDISDLRNNAAISSAVETGAGAFNVWRNSFAAEYLPAGGSLVHVDGVPFEFPPVCEGPDNIRCAGQFIKVPRDRYDWIHVLAASERRSEDTVELTFADGSVDAEPLRVSDFWAAPAWFGEVKAFESLAMHYPHHVQRGVPAVMWAQRVAVTRRADLTGILLPRNVAVHIFAVTL--------------
|
201 |
+
>UniRef100_A0A4R0HF04_1124743/ 148 0.394 8.688E-36 25 218 233 35 228 237
|
202 |
+
-------------------------MPDQMPDQMPATSVQARCRAVNLAPHRNNVGSTPATDTRGGAFNIWGNSFPAEELPAPGQ-FVVDQVAYDFPPTGrGTADNVRAAGQFIEVPSGRYDWLYVLGAAERRVEDELAFHFADGSVDFEQLRLSDFWAAPGWFGETQVRATRSMHYPFHVQAGVPAMLWSQRVPVTRRAALAAVRLPRNPAVHLFAATL--------------
|
203 |
+
>UniRef100_A0A3E0IAZ0_1045776/ 148 0.284 8.688E-36 36 220 233 31 213 766
|
204 |
+
------------------------------------ASDAPNPVPVPLDGLFDNDGI-DTATTHDGNFDGSGYTFPAEGLPS--GPITVDGVPFTFPAATGK-NNIVAMGQQIPLPKGRYvTALFLTACSYGATGGTATVHYADGTTSQAQLGGADWYSG---SGQLVA---PFRYTPGGGTDQSPVSISTSQVWLDPARDAVAITLPTTnpavegkSSLHVFALSLQP------------
|
205 |
+
>UniRef100_UPI000DE266A4_2249762/ 148 0.264 8.688E-36 33 221 233 747 936 938
|
206 |
+
---------------------------------LPPIQPPGETTPVDLSAHFDNDGVSTHENMADGDFDGTGRTYPAEELPPAGPYL-HQGVSFLIPSfADGAHNNLTARGQVIAVPPGRYARLRLVGACAYGSLDTkLIATYADGSTAELPFVMSDW-AGQPAAGGSEVTRCTHRHGKAG-PDALKVALFEVPIALDPARELRSITMPVRvkPQLHVFALSVEKS-----------
|
207 |
+
>UniRef100_A0A7L6B3W5_2749844/ 147 0.422 1.188E-35 45 217 233 14 188 196
|
208 |
+
---------------------------------------------VDLSACFNNTAATTPETTGAGGLNVWRNSFPIEELPDATEVFRPGAVPFRFPRTgPGRLDNVVCAGQRIDLRPARYDWIYLLACAERRIEDVVHLHYRDGSVDEEWLRVSDfWPASPPRFGELEAVRCDRIHFPRHVQPGIGPRIWQQRLPVPRQEELVYLRLPDNIAVHIFAMT---------------
|
209 |
+
>UniRef100_A0A2M7SAS6_1974543/ 147 0.279 1.188E-35 35 220 233 24 215 216
|
210 |
+
-----------------------------------GAGGGAGCVPVDISKLCNNDGIAPDDDATGGDFDAGGASYTAKAWPKVGKTgVTVNGVPFLVAGADKKVNNIACEGEAVQVPAGSYKAILMLASATNVPEGSmedaLTLVYKDGTKSAVNFKLTDWC-VEPKEGEKKAYSFTYRmsTGTEGGHHEIPCYLFLQTVKIDGKKELASIRLPEQKDIHIFAITLQK------------
|
211 |
+
>UniRef100_A0A938MG41_2026780/ 147 0.286 1.188E-35 43 218 233 67 249 254
|
212 |
+
-------------------------------------------TLLDLAAAFNSDGISTEENAKDGNMDApgkpNGASYPKDELPAAHSLLTLKGkpvMTFLFPDGqDGKLNNIACSGQTVHVPAASYVELWVLGAATYGAQtSDLELRYEDGS-ETEPLELSDWC-ETPGFGERQAVVAKHRHGWKGEEEDIPCGLWAQRVPLDAKRKLVALVLPQNAKMHIFALSL--------------
|
213 |
+
>UniRef100_A0A938S8T3_2026780/ 147 0.286 1.188E-35 43 218 233 743 925 930
|
214 |
+
-------------------------------------------TLLDLAAAFNSDGISTEENAKDGNMDApgkpNGASYPKDELPAAHSLLTLKGkpvMTFLFPDGqDGKLNNIACSGQTVHVPAASYVELWVLGAATYGAQtSDLELRYEDGS-ETEPLELSDWC-ETPGFGERQAVVAKHRHGWKGEEEDIPCGLWAQRVPLDAKRKLVALVLPQNAKMHIFALSL--------------
|
215 |
+
>UniRef100_A0A5N0UUJ1_2596893/ 147 0.258 1.188E-35 52 216 233 884 1060 1065
|
216 |
+
----------------------------------------------------NNAGISPDAQPGVANFDGVGFSYSSDALAAagatPGGTVTVDGLSYTWPNYPaGDPDNVLAQGQTVNV-SGSGRLAFLGAAANGNASGTVTITYTDGSTSTAQLGFSDWTLGAGaqqlAYGNVKAVTTSYRNSTGGDTQQIGTYVFAsAPITLDSGKQVASVTLPSSvsgGQLHVFAI----------------
|
217 |
+
>UniRef100_A0A2H5B2X5_2018025/ 147 0.488 1.624E-35 45 218 233 15 188 194
|
218 |
+
---------------------------------------------IDISGHRNNTAVSAATDTGAGAFNVWRNSFAAEYLPAGGSLVEVDGVPFDFPPVCQGPDNVRCAGQYLEVPRDRYDWIHLLAASERRCEDTVELTFADGSVDAEPLRVSDFWAAPAWFGEVKAFESLTMHYPQHVQRGVPAVMWSQRVAVTRRVDLTGLLLPRNVAVHVFAVTL--------------
|
219 |
+
>UniRef100_A0A5C4LYC7_2547244/ 147 0.269 1.624E-35 52 216 233 867 1043 1050
|
220 |
+
----------------------------------------------------NNAGISPDAKPAAANFDGVGYSYSADALAAagakAGSTVTVNGFSYTWPAYPaGSPDNVIAQGQTVNV-SGSGQLAFLGAGSNGNASGTVTITYTDGSTTAASLGFSDWTLGgggaQPAFGNQKAFTTSYRNSVGGDPQQINTYVFASTpITLSAGKTVQSVTLPSSvsgGQLHVFAI----------------
|
221 |
+
>UniRef100_A0A0K3B8B7_703222/ 147 0.259 1.624E-35 47 216 233 898 1082 1086
|
222 |
+
-----------------------------------------------LRSAYNNVGISPDNAMASANFDNVGFSYSANALAngglKPGGTVTVDGLAHTWPITDiGEPDNVVAGGQTVNVQaQSGASKLALLGSAANGtASGQLTITYTDGSTQQAQVGFSDWTLGgggqQPSFGNRIAATSSYRNSVNGSSQGLNTYVFATEpVGLDPSKQVKSVTLPPTvsgGTLHVFAL----------------
|
223 |
+
>UniRef100_A0A8J3I4R3_2778364/ 147 0.284 1.624E-35 51 216 233 1027 1201 1205
|
224 |
+
---------------------------------------------------YNNVGTSDDSAPSFGNFDAQGNSYSAQALQQvgliPSQNVTVNGVTFIWPNmAPGYFNNYQAAGQTIGVTPidGATTLAFLGSASNKGSSGTATITYTDGSTQTFTLGFTDWVTSTLSYGNSIAATMSYHNASTGKQTG-NTYIFYTSVTLQAGKTIQSVTLPatfTGGQPHVFAI----------------
|
225 |
+
>UniRef100_UPI00143BFCD2_2720606/ 147 0.482 2.219E-35 45 218 233 4 177 183
|
226 |
+
---------------------------------------------IDISGHRNNTAVSAATETTAGEFNVWRNSFAAEYLPAGGSLVHVDDVPFRFPPVCEGPDNIRCSGQFIRVPEGRYDWIHVLAASERRTEDTVELSFADGSVDAEALRVSDFWAAPAWFGETKAFESLVMHYPHHVQRGIPAVMWAQRVPVTRRTGLTGILLPRNVAVHVFALTL--------------
|
227 |
+
>UniRef100_UPI00058392A0_703222/ 147 0.312 2.219E-35 33 220 233 726 913 917
|
228 |
+
---------------------------------LPKLPDQGSTVPVDLVAALNNDAFTNEFHMGDGDFDGAGNTYPAAQLPQTGQA-SDDGIPFEFVNGNeGAPNNIVPAGQQIALPPGQYTTLHLLAASDNGNtNAKLTVTYTDGTAQ-VPLQVTDW-RAAPAYGETEALRTSQMHTRTG-PQPVRLSIFHQKIPLEEARHLVSIALPaaATPRPHIFAVTLQK------------
|
229 |
+
>UniRef100_A0A9E9FHN1_2979468/ 146 0.434 3.034E-35 39 221 233 1 183 192
|
230 |
+
---------------------------------------AADFQCIDLSAWYNNVGLTRPDNTSAGAFNVWRNSLPAPELPC-GQRVTVEGVPFLLPPADGnRYDNVRCDGQVAPVPRGAYDWVYLLTTAERRVEDEMSLHFADGAVDFEPLRVSDFWVAPAVFGETAAFTTTVMHYPRHIQRNVPATVWSQRVPVTRRGRLRAVRFPDNLAVHILAMTLCEA-----------
|
231 |
+
>UniRef100_UPI001E6135CD_1617086/ 146 0.480 3.034E-35 44 218 233 18 192 200
|
232 |
+
--------------------------------------------PVDISGHRNNTAISAATETKAGAFNVWGNSFAAEYLPAGGSLVHVDGVPFEFPPVCDGPDNIRAAGQFIGVTPGRYDWIHVLAASERRCEDTIELSFADGSVDAEPLRISDFWAAPAWFGEVKAFESLVMHYPHHVQRGVPAMMWAQRVPVTRRAELTGILMPRNVALHVFAVTL--------------
|
233 |
+
>UniRef100_A0A9E3BI44_2740538/ 146 0.268 3.034E-35 31 217 233 734 926 930
|
234 |
+
-------------------------------PSFPPAATGGNS--------FNNEGTSNDSNTAVGNFDGGGNSYSNNALSaagfASGSTVMVNGISFQWPTvAAGSDDNWQVAGQGIPVNgrSGATTLAFLGAATSGPSSGTIMVTYTDGSTQTFTLAFSDWTlnggRSTILSGDSIAVQMSYRNTPTGQQTNHPTYVFLTSVTLAAGKTVKSINLPssvSQGTMHVFAIS---------------
|
235 |
+
>UniRef100_A0A941IQY1_1508375/ 146 0.278 3.034E-35 33 215 233 1314 1504 1510
|
236 |
+
---------------------------------LSGAATQVPYS--SLAAGFNNVSITDDSDHSptgfDGGLDGGGNSFSAEALAAagltPGTDFTFDGVVFTWPNsAAGTPDNIEADGRAFDVTGTGSTLGFLGAAANGASSGTVTVTYTDGTTQQFTIGFGDWASTTPYAGGQVAVTSAYGNTSSGTS-PWKASVFYDSVTLPAGKTVQSVSLPTAGSapLHVFA-----------------
|
237 |
+
>UniRef100_UPI0018E54A72_714197/ 146 0.443 4.147E-35 45 218 233 1 176 190
|
238 |
+
---------------------------------------------VDLSRYCNNRGIQPPGSSGEYGFNIWGNTFPAEELPEPGTRVGVCGVPFEFPvGAAPSGDNVRCRGQLVEVPPGDYDWVYLLGAAERRTEDHLGLVYADGTELAEHLRMSDFWPETGSrFGEPLAFRTTALRYPRHTHTPHEPSIWQQRSPVSVRGRLRALRLPDNPAMHVFAVTL--------------
|
239 |
+
>UniRef100_A0A1R0KG38_76021/ 146 0.390 4.147E-35 40 224 233 3 189 192
|
240 |
+
----------------------------------------AGMRSLSLLEHVNNTGLSAVDDLAAAGFNIWGNSFPAADLPAPGATSVVGGVSFRFPeRAPDGRDNVRCRGQRIAVPEGRWDWVHVLGAAERRTEDPLLLRYADGAVRAQWLRMSDfWPETEPRFGELLAYRCAAMHYPRHVQRTTAPAIWAHRVPLTVPDGVVALELPDNPALHLFAITLQAGERV--------
|
241 |
+
>UniRef100_A0A1K1S937_546364/ 146 0.289 4.147E-35 40 227 233 46 234 781
|
242 |
+
----------------------------------------PPPVPVALDTWFTNDGIDSASATG-GDFDGSGYTFPAEQLPV-GRTATVGGVPFRLGSaAAGAKNNIAATGQTIDLPKGRYFVAYFLVAASYGtAGGTATVHYADGSTSTGSLSGPDWYTGTG------ALVSPFRYAPGGVVDGNPVSLATGQVWIDPAREAVALTLPTTANpapnvasLHVFALTLQPVAVGRAA-----
|
243 |
+
>UniRef100_UPI001AE5AA53_130796/ 146 0.301 4.147E-35 39 220 233 768 950 953
|
244 |
+
---------------------------------------GGDTVRVDLEPHHTNDAITNEFYLGDGDFDGTGRTYPSGQLPQNGA-LTNDGIPFTFTNGhEGTRNNVLAAGQTIPLPEGAYTRLHVLGASDNGNTDaQATLHYTDGTAATVRLALTDWL-TSAAFGESEAVRTNQIHDRTGPR-PRRASVFHQVLPVDPARRLRALTLPttTRPRAHVFALTLEK------------
|
245 |
+
>UniRef100_A0A7I8EN04_2717365/ 146 0.263 4.147E-35 43 227 233 43 230 968
|
246 |
+
-------------------------------------------TPVSLSSLFNNKGVG--GAPGQANFDGSGYAYPANQLPGAGQQ-TLNGIPYLFPNYsAGANDNVVALGQTVSLSPGeqqQYQQISLLaAASYGPSGGTLTIHYTDGSSSSVSLTVSDWYTPTAG-----LVSTTYRYTPTGTEQ-HAVHIYALSALVDATRTVASLILPNtaqpaagQASLHIFALTLLPSTQVPVP-----
|
247 |
+
>UniRef100_UPI002020D29A_211113/ 146 0.240 4.147E-35 19 214 233 835 1059 1065
|
248 |
+
-------------------SVAVRVEPGTPeatyqvVVQFTGPQGVLQSAPLqilvaqpgSLRSVYNNVGISPDNAMASANFDNVGFSYSANALfnagLKPGGTVTVDGLTHTWPvTAVGEPDNVIAGGQTVNVPaAAGATKLAFLGSAANGtASGTLTITYTDGSTQQAQVGFSDWTLGgggqQPSFGNRKAVTSSYRNSVNGSSQGLNTYVFATEpVALDPSKQVKGVTLPSSvsgGTLHVF------------------
|
249 |
+
>UniRef100_A0A495L449_2183912/ 145 0.437 5.669E-35 42 217 233 17 192 209
|
250 |
+
------------------------------------------WAPVDLRAHVDNRGISPADESAAGAFNIWGNSFPSEHLPRAGSRVEVEGVPFEFPVSSDNGDNVQCAGQLVSVPVERYDWIYVLAAAERRAEDEAALHFTDGWVDFEPLRVSDFWAAPSVFGEVSAYRTPVMHYPHHVQPGVQAALWCQRLPVVRRAALTSIRLPRNNAVHVFAMT---------------
|
251 |
+
>UniRef100_UPI001C636FC7_1291556/ 145 0.410 7.748E-35 55 220 233 1 167 172
|
252 |
+
-------------------------------------------------------GIEFTRRPGDGAFNIWRNTFPAEDLPR-GEIVDVGGVPFCFPAADGRhPDNLRCRGQRIELPVGRVDWLCFLAAAERRTEDTLSVHYADGATRAQWLRVSDfWPETPPRFGDVLAFRTPYLLYQRHAQSGMAPAIWRQRVPVTIPGDVAAVTLPENPAMHIFALTLLT------------
|
253 |
+
>UniRef100_UPI00099B8A4C_89050/ 145 0.275 7.748E-35 32 224 233 836 1038 1167
|
254 |
+
--------------------------------WASGASAAPPSYA--GSPTYTDVGVSDDASPAAADFDGVGNSYSAQALAAagasPGAKITAGGTTFTWPtPAAGQPDNYEANGQSVAVSGSG--SIAFLGAANHGPStGTATVHFTDGTTQSVQLTFSDWTLGggkaTPAAGDTTALTTSYRNKTGASSEQVATYVFAtAPVALPSGKTADSVRLPSDtsaGGLHVFAIGFGGAAAA--------
|
255 |
+
>UniRef100_A0A6H9XE76_1873/ 145 0.427 1.059E-34 33 218 233 0 185 195
|
256 |
+
---------------------------------MPTSATIAAHV-LDLASHFNNVGASRAEDTSAGRFNVWGNSFAAEHLPSAGSQVVVDGVPFQLPPlGTGTPDNVRCDGQFVRVAPGRYDWLYLLASAERRVEDEMALHFAHGAVDFEPLRVSDFWAAPATFGETRAFESPLMHYPHHVQFGVPAAMWCQRVPVVRRADLTAVRLPHNIAVHVFAATL--------------
|
257 |
+
>UniRef100_K0JYB8_1179773/ 145 0.428 1.059E-34 37 224 233 6 193 196
|
258 |
+
-------------------------------------TGHDVFCSVDIAGHRNNVGITSVGGTGVGGLNVWRNSLPAEQMPC-GEVVVLDGVPFEFPTAgSGEPDNVRGDGQLLELPAGYYDWVHLLACGERRVEDELALYFTDGSIDFEHVRVSDFWAAEPVFGETSALRSTVMHYPQHVQPRVPGIIWAQRVPVPRRAELKAVRLPRNVALHVFALTAQRATGA--------
|
259 |
+
>UniRef100_A0A8J3IWF3_2778369/ 145 0.229 1.059E-34 47 217 233 916 1098 1101
|
260 |
+
-----------------------------------------------LLSYYNNIGISNDSSANTADFDGDGYSYSAQQLTsagfTPGATVTVSGIAYTWPNVqPGVYDNIEVNGQTIQTPnaPANAAHLSFLGSATNGDtQGTVTITYTDGSTQTAQLGFSDWTLGagaePVAYNNVVAAKTSYRN----PNDKVTTYVFASeSIALSSGKTVASITLPNPMNMgalHIFTFT---------------
|
261 |
+
>UniRef100_A0A1V2RDC8_1857892/ 144 0.434 1.448E-34 39 221 233 1 183 192
|
262 |
+
---------------------------------------AAGFTCVDLSAWYNNVGLTRPDNTSAGAFNVWRNSLPAHELPC-GQRVTVEGVPFVLPQADGdRCDNVRCDGQVAPVPCGAYDWVYLLTTAERRVEDEMALHFADGAVDFEPLRVSDFWVAPAVFGETAAFTTTVMHYPHHIQRNVPATVWSQRVPVTRRGRLRAVRFPDNLAVHILAMTLCGA-----------
|
263 |
+
>UniRef100_A0A918GZH9_1951/ 144 0.457 1.448E-34 44 218 233 15 189 195
|
264 |
+
--------------------------------------------PVDISDHRNNTAISAATETRAGAFNVWRNSFAAEYLPAGQSLVHVDGVPFEFPPVCDGPDNVRCAGQFIRVPVGRYDWIHVLAASERRCEDTVELNFADSSVDAESLRISDFWAAPAWFGEVRAFESLVMHYPHHVQRGIPAVMWAQRVPVTRRADLTGILLPRNVAVHVFAVTL--------------
|
265 |
+
>UniRef100_A0A918U5A5_68238/ 144 0.295 1.448E-34 44 219 233 48 225 603
|
266 |
+
--------------------------------------------PVPLDRLFDNRAVSDDARPGAADFDGSGASLSAQDLAAagwtPGRSLTVQGARLTWPrPAAGRPDNVRADGQTVRV-RGRGDALAFLAAGTGGEdlTGTGTVRYADGSRSAYRLDVGDWRTGPLA---TKAVALPHVNTP-GGRLAERARLYVVTVPLARGREVASVHLPREPDLHVFALSVR-------------
|
267 |
+
>UniRef100_A0A4R2IR67_1213861/ 144 0.266 1.448E-34 36 225 233 31 220 766
|
268 |
+
------------------------------------TAAAPTPVTVSLTQYFDNNGIDSASAR-DGNFDGSGYTYPAEALPTGG--ITVDGVPFTFPSaAKGVKNNIVAMGQTLDIPRGKYHAGHFLVAgSYGMASGTATVHYADGGTTTASLAGPDWYNSSGP------ISASYRYAPNNGTDQHPVAIAAAQIWLDAGREAVSVTLPVtqpaqagQTSLHVFAMTVQAAAKGR-------
|
269 |
+
>UniRef100_UPI001FE67354_2698649/ 144 0.245 1.448E-34 47 214 233 887 1069 1075
|
270 |
+
-----------------------------------------------LRAAYNNVGISPDNAMSVANFDNVGFSYSANALSnaglKPGGTVTVDGLPHTWPLTEvGEPDNVIASGQTVNVQaAAGATKLAFLGSAANGtASGTLTITYTDGSTQQAQIGFSDWTLGgggqQPSFGNRIAVTSSYRNSVNGSSQGLNTYVFATEpIALDPSKQVRSVTLPSSvsgGTLHVF------------------
|
271 |
+
>UniRef100_UPI002092D67B_79912/ 144 0.259 1.979E-34 47 215 233 906 1090 1096
|
272 |
+
-----------------------------------------------IEWYQNNAGISDDGKAGTANFDGGGWSYSAQALAAaglkPGQPVAWKGFTFTWPNrTPGQLDNVQANGQVVDLPaaPKGASQLAFLGAAGNGDaSSTVKITYTDGSTATAKLELSDWALGadayPPKFGNEIVAKTPYRNTSDGGTQKLNIYVFAATpIALDASKQVQSVTLntPSSgGSLHVFA-----------------
|
273 |
+
>UniRef100_A0A9E9MNL7_2944128/ 144 0.265 1.979E-34 47 222 233 934 1125 1423
|
274 |
+
-----------------------------------------------LLAAFDNRGISDDTAVAAGNFDGGGRSYSAQALAAagltAGQPVTAGGISYGWPlPAPGYPDNAIAAGQAITLdaPSGTAQVGVLGAASNGPSQGIATLTYADGSTDRYWLGLSDWTlnggSAHPSYGNLVAARTTYRNCAgcSGGRDSVDTDIFATSLPADPAKTLTSLTLPsgtTRGQLHVFAIGTSTSA----------
|
275 |
+
>UniRef100_A0A6B2VD27_2706027/ 143 0.471 2.704E-34 42 219 233 6 183 195
|
276 |
+
------------------------------------------FLPVRLDKHWNNRAVSAVGETGTGRFNVWRNSFPAEHLPAPGGELVVGDVPYRFPQTTADGDNIRCAGQYLPLPEGRYDWIHLLASGERRVESELALHFADGEVDFEAVRVSDFWAAPARFGEREAARTPVMHYPHHVQQGVAAVLWSQRVPVTRIATLHGVRLPRHIALHVLALTLE-------------
|
277 |
+
>UniRef100_A0A3R9DPL9_2267691/ 143 0.289 2.704E-34 40 227 233 48 236 783
|
278 |
+
----------------------------------------PPPVPVTLDAWFTNDGIDSASATG-GDFDGSGYTFPAEHLPA-GQTTTVGGVPFKLGSaAAGAKNNIAATGQRVDLPKGRYFVAYFLVAGSYGtAGGTATVHYADGTTSTGSLSGPDWYTGTG------ALVSPFRYAPGGVVDNNPVSLATGQVWVDPAREAVAVTLPTTANpapnvasLHVFALTLQPVAVGRSA-----
|
279 |
+
>UniRef100_UPI0021528536_2945988/ 143 0.284 2.704E-34 26 227 233 40 236 783
|
280 |
+
--------------------------PDTPIVPPP------PPVPVALDTWFSNDGIDSASATG-GDFDGSGYTFPAEHLPA-GRTTNVSGVPFTLGSaAAGAKNNIAATGQTIDLPKGRYFVAYFLVAASYGtTGGTATVHYADGSTSTGSLSGPDWYTGTG------ALVSPFRYAPGGVVDNNPVSLATGQVWVDPARDAVALTLPTTANpapnvasLHVFALTLQPVAVGRSA-----
|
281 |
+
>UniRef100_UPI00210A4B97_68230/ 143 0.387 3.696E-34 43 223 233 17 198 199
|
282 |
+
-------------------------------------------HPVAIEDFLDSVGVEPNSRSGLGAFNIWGNAFPAEALPC-GTLTELDRVPFRFPPADGvRPDHLRCRGQRIPLPAGRADWIHVLGAAERRTEDDVLLEYADGSARRQWLRLSDfWPQTGPRFGERLAFRTGSMLYPRHEQLTMSPSIWHQRIPVAVSDGLRAIVLPHNPAMHLFALTLVEAAA---------
|
283 |
+
>UniRef100_UPI001FB83ACA_1213861/ 143 0.250 3.696E-34 23 217 233 831 1054 1057
|
284 |
+
-----------------------RAEPDTPeatyqvAVTFSGPGGSLGSATLqilvappgSLRASFNNIGISPDNAMSVANFDNVGFSYSANALAnvgvKPGAAITVDGLAHTWPiTGLGEPDNVVAAGQTIVVTaPAGATKLALLGSAANGSaSGTLTVTYTDGTTQQAAVGFSDWTLGgggqTPSFGNRIAAASQYRNSVNGNTQGITTYVFAtAPIGLDATKQIRSVTLPgsvSGGSLHVFALT---------------
|
285 |
+
>UniRef100_A0A399GYA9_2259644/ 143 0.377 5.051E-34 38 223 233 12 198 199
|
286 |
+
--------------------------------------TVGAAHPVAIEDYLDSVGVEPNSRSGRGAFNIWGNAFPAEELPC-GNLTELDRVPFRFPAADGkRPDHLRCRGQRIPLPAVRADWIHVLGAAERRTEDSVLVEYADGTVRRQWLRLSDfWPQTGPRFGERLAFRTTAMLYPRHEQNTMSPSIWHQRVPLAVADGLSALVLPHNPAMHLFALTLVTEAA---------
|
287 |
+
>UniRef100_UPI0015643D0C_2698649/ 143 0.455 5.051E-34 39 217 233 18 196 206
|
288 |
+
---------------------------------------GSRFHLVDLDLHRDNVGVTSSSATGYGAFNVWGNSLPAEELP-SGRAIQVEDVPFTFPaTGGGRPDNVRCAGQHITVEPGQYDWLYLLTAAERRVEDEIAFHYADGAVDFEPLRVSDFWAAPAVFGETSAFTTLTMHYPYHVQANVSAMVWCQRVPLVRGAALAAMRLPRNPAVHIFAAT---------------
|
289 |
+
>UniRef100_UPI00234B0C2D_2944250/ 142 0.465 6.903E-34 45 218 233 0 173 179
|
290 |
+
---------------------------------------------MDISGHRNNAAVSAATETKAGEFNVWGNSFSADYLPAGKSLVHVDGVPFEFPPVCEGPDNVRCAGQFIRVPEGRYDWIHVLAASERRCEDTVELSFADGFVDAEALRVSDFWAAPAWFGEVKAFESLVMHYPHHVQRGIPAVMWAQRVPVTRRAGLTGILLPRNVAIHLFAVTL--------------
|
291 |
+
>UniRef100_UPI000A54E855_1710355/ 142 0.464 6.903E-34 45 222 233 3 182 196
|
292 |
+
---------------------------------------------IDLSPLLDNVGTSTTADTSAGRFNVWGNSFAAEDLPAPGSVVMVDAVPFTIPPvGTGAPDNVRCAGQYLALPGGPatADWIHVLAAGERRVEDEIALHFADGSVDFEPVRVSDFWAAPPAFGETRAFAT-LMNYPIHTQFGVPASVWSQRVPVTRRAPVVGLGLPRNLALHIFAVTLQPVA----------
|
293 |
+
>UniRef100_UPI001CFA9BF6_146923/ 142 0.387 6.903E-34 43 223 233 17 198 199
|
294 |
+
-------------------------------------------HPVAIEDFLDSVGVEPHSRTGRGAFNIWGNAFPAEELPC-GTLIEHDRVPFRFPPADGEwPDHLRCRGQRIPLPAARADWIHVLGAAERRTEDSVLLEYADGSVRRQWLRLSDfWPQTGPRFGERLAFRTSAMLYPRHEQRTMSPSVWHQRVPVAVPDGLRALVLPHNPAMHLFALTLVEEQA---------
|
295 |
+
>UniRef100_UPI0022AC71CD_47760/ 142 0.284 6.903E-34 31 219 233 18 209 585
|
296 |
+
-------------------------------VTVPAAAApGAEDRPLPLERLFDNTAVSDDTRPGEADFDGSGGSLSAQDLTAagwtPGRSLTVQGARLTWPrRVPGRPDNVRADGQSVRV-RGHGDALAFLVAGTGGGdeSGTGTVRYRDGSRSSYRLTASDWRGGPLA---TKAVALPHVNTP-GGQLTEKARLYVVTVPLVRGRSVDSVELPHNSDLHVFALSVR-------------
|
297 |
+
>UniRef100_UPI0007C6D0A7_105422/ 142 0.283 6.903E-34 47 216 233 1003 1180 1184
|
298 |
+
-----------------------------------------------LAAAFDNVGVTDDSATAAGNLDGSGSSYSAQALAAagvtPGTALSHGGVSFSWPGaASGRPDNVVAQGQPIDLT-GSGTTLGFLAAGTSGAGaGTGEIVYTDGSTQSYTVTVPDWYATPPA-GSDVAIATAYRNRAGNVQQVHATNVYYTGVPLTAGKTVAMVVLpaptPASGALHVFAL----------------
|
299 |
+
>UniRef100_A0A5J4L056_2607529/ 142 0.254 9.435E-34 31 217 233 145 346 349
|
300 |
+
-------------------------------VKLPISAlTVVVAKPGNLLGLFNNAGISND-GQGNADFDGDGYSYSAQQLAAsgykPGATVTVNGTQYLWPNvAPATFDNVQVAGQTIQTPdakAGATHLTFLGSATNGPSSGNVTITYTDGSTQTAQLGFSDWTlgagNSQPSYGNVVAVKTSYRNAGSG-QDQVGTYVFAsAPIALNASKQVASITFPssvDQGALHIFALT---------------
|
301 |
+
>UniRef100_A0A5P9PZG3_2653857/ 142 0.291 9.435E-34 34 221 233 769 953 954
|
302 |
+
----------------------------------PGGAS----VPVDLTAFLTNDAITSEFYLGDGDFDGAGNTYPAAQLPQTG-KVVDDGIEFVFVNGhEGTPNNVVPGGGPIALPEGAYGVLHLLGASDNGNtNSTMTITYTDGSTAELPLKLTDW-RASAAFGESEAITTSQLHAKDGPK-PVKLAIFHQAIELDPARSPKSLTLPATakPRPHLFAITLEKG-----------
|
303 |
+
>UniRef100_A0A7K2QB93_2690325/ 141 0.385 1.289E-33 43 227 233 5 191 193
|
304 |
+
-------------------------------------------TVVNLSAHFNNRGIQPPETSGDYGFNIWRNTFPAEELPEPGSLVDLVGAVFEFPaRESAAGDNVRCRGQLVELPVGSWDWIGLVGAAERRTEDEVELHYADGTVGREWLRMSDfWPQTGAYFGEPRAFATGSMRYPRHTHLHHAPSIWQQRVPVRVPAPLAAVRLPDNPAMHVFAMTVTADEESRLA-----
|
305 |
+
>UniRef100_UPI001E589C24_1915/ 141 0.271 1.289E-33 36 225 233 45 234 785
|
306 |
+
------------------------------------WVSVPDPVPVPLDALFDNDGIDTATARG-GDFDGSGYTFPGEELPA--GRIDVDGISFLFPsSAAGAGNNVVALGQRVDLPKGRYmSALFLTAGSYGDASGTATVHYADGSSTTAALGGADWYAAGGP------LSAAYRYGPDGTKDEHSVGIGVSEVWTDPRREAVALTLPKTnpvevgkTSLHVFALSLQPAAQGR-------
|
307 |
+
>UniRef100_UPI00167AA00B_33897/ 141 0.259 1.289E-33 33 220 233 770 960 961
|
308 |
+
---------------------------------LPEPAQSGTVTRIDLTGFFDNDGITTEMYYGDGDFDGSGRTYPMAQLPQTGET-TDNGIAFLFTNgSEGTTNNVVVAGQEITVPRGAYAKLHVLGAGDTAAvTVPAVVRYADGTSETARLTLTNWLASGPENGETEAVTTSQIHTPTGPV-ATKAAVFHQVIELDASRQLTTITLtaPDgTARAHVFALSLEQ------------
|
309 |
+
>UniRef100_A0A1H5IBI8_1855349/ 141 0.259 1.289E-33 47 216 233 906 1086 1090
|
310 |
+
-----------------------------------------------LLAAYSNTGISDDSgDHDEADFDGGGWSYSRQALAAagltPGTEATVDGLAYTWPNAPaGRPDNATATAQTIELPTPAATLSFLGSATNGNQSAPATVTYTDGTTGTLDLSFTDWTigggGGTVQYGNTIVARTAYRNVSGADKDPVATYVFATRpFTAPAGKTVRSVRLPENSDLHVFAL----------------
|
311 |
+
>UniRef100_UPI001FFEE4A7_2620835/ 141 0.276 1.762E-33 15 220 233 754 956 962
|
312 |
+
---------------IAPVKVTTPAPPWGLPPLPPG----GDMVTVDLEPFYTNDGITNEFYLGDGDFDGTGRTYPSGALPQNG-SLTNDGVPFRFTNGhEGTRNNIIAAGQTVELPEGNYRRLHILGASDNGNTDsTVTLHYTDGTATPVKFALTDWL-ASAAFGESEALRTNQIHTRTG-PAPLRAAVFHQVLAADSTRRLRAITLSANakPRSHVFAVTVEK------------
|
313 |
+
>UniRef100_A0A919K8V0_1050105/ 140 0.292 3.291E-33 33 224 233 4 198 752
|
314 |
+
---------------------------------VPGGARAAiftDPVPVPLDDLFDNNGIGVAA--GDANLDGSGHGFPAAGMPT--GSVTVDGVPYRIAatSAAGQNDNVVALGQSVAVPAGQYVAGFLLAASAYGSaGGTVTVHYADGTTSSAEVSAPDWFS-----GDAGAVTAPYRYGPGGT-DQHAVALYAVQVWMDPARTAQSITLPSTaapaegvSSLHVFAFSLQPAVAV--------
|
315 |
+
>UniRef100_UPI001A8F7A74_2814588/ 140 0.287 3.291E-33 34 220 233 762 945 946
|
316 |
+
----------------------------------PGGAS----VPVDLTAFLGNDAITSEFYLGDGDFDGAGNTYPAAQLPQTG-KVVDDGIEFTFVNGhEGTPNNVVPGGGPIALPSGAFRALHLLGASDNGNTNTkMTITYTDGSTAELPLRLTDWRN-SAAFGESEAITTSQLHAKDGPK-PVKLAIFHQTIELDPARSPKALTLPaaTRPRPHLFAITLEK------------
|
317 |
+
>UniRef100_A0A7C3IRJ5_2026780/ 140 0.248 3.291E-33 41 219 233 54 244 1084
|
318 |
+
-----------------------------------------GQVPLKLEALFDNDAIADAQRRSDGNFdcpdhaaDIPGSVFPAENLPATGSKFSFDNVSFLFPSKErGDFNNVSCNGQRIEVPPGRYKALHVVGTSENGSfRDRLSLAYKEG-PAEADLALKDWCQ-KPTEGDRVAFEAPCRYTWSSekrtmVREEVQPRLWLQRIALDPQKTLEAIALPYNRRMHVFAATLE-------------
|
319 |
+
>UniRef100_UPI0019308D5B_2282478/ 140 0.250 4.497E-33 52 217 233 916 1093 1096
|
320 |
+
----------------------------------------------------NNVAVTDDANTNLGDFDGGRASLSAQALAsvgvRPGGAVDFSGLRFAWPDaAPGTPDNVVASGQVLKLAGSGGRLGFLVSSTWGPASGTGIVRYTDGTRQPFALSSPDWY-GAPKPGGVAAVVVPYQNRPNNQRQNTPATIYFAEVALQAGKQVRSVELpnvsaaarPNTPALHVFAVT---------------
|
321 |
+
>UniRef100_A0A401ZWU8_2014871/ 140 0.252 4.497E-33 31 217 233 752 959 1369
|
322 |
+
-------------------------------VTLPATAGPGtmhifavSTKYVPQAAVYNNIGTSDDSAPATGIFDGT-NSYSAQALAAvnitPGATVSYNGVNFIWPNvMVGTPNNYAAKGQMIPVnPVPNATTLAVLGSStDGNSTGTAVITYTDGTTQSFPLGLSDWTLGggysTPAYGNKVVATTAYRNTPSGQQSTDKPTILYADVTLNyPGKTVQSLTLPatmTGGQLHVFAVS---------------
|
323 |
+
>UniRef100_A0A5S5C606_582686/ 140 0.276 4.497E-33 42 228 233 2516 2703 2715
|
324 |
+
------------------------------------------YAVVPLEAYMNDQGFGSLA-SGRANLTGMGtYFLPDDAITVEG--FRVDDRPFGIADlIDGQNDNISCHGQIIRLPQGRYDEIALLGCSEWGNYiEPCTAVYKDGTKEVFRLRFTDWT-GTPRFGEQIAWEGRiaERNEIRTFVIDQPGRIFMQTYALDGRKEAATLQLPYSPGMHIFAITLRQSQAALLPD----
|
325 |
+
>UniRef100_UPI0021AB612D_2867961/ 139 0.422 6.145E-33 52 223 233 1 175 183
|
326 |
+
----------------------------------------------------DNTGITRPDDLGAGAFNVWSNTFPAQHLPpaDPDGTVDVGGVPFRFPAQLTGPDNLRCAGQRLDAPKGRYDWIYLLAAGERRTEDVLHLHFADGTTDPEWLRVPDfWPQTPPHFGFTTGITFPVMHYPRHIQRDMSPSVWRVRVPVPREAELTGIHLPDNPAIHVFALTLCGPAA---------
|
327 |
+
>UniRef100_UPI001C9D367C_1827978/ 139 0.403 6.145E-33 43 227 233 5 195 197
|
328 |
+
-------------------------------------------TVIDLSAHYDNRGIQPPDEPGEFGFNIWRNTFPAEELPEPGSLVEVAGTAFVFPPrPTAGGDNIRCRGQLVELPdgaaGGRYDWIGLVGAAERRTEDEVELHYRDGSVSRAWLRMSDfWPQTAAYFDEPLAFRTASMRYPRHTHRHHAPALWQQRIAVVRPEPLAAVRLPDNPAMHVFAMTAVVDEESRLA-----
|
329 |
+
>UniRef100_UPI00099FEB85_1841249/ 139 0.280 6.145E-33 33 222 233 765 957 958
|
330 |
+
---------------------------------LPELPQTGQGTQVDLVGVFDNDAITTEMYYGDGDFDGTGRTYPMVQLPQTGQT-EDDGITFAFTNgSEGSNNNVIAAGQKIDVPAGGYANLHVLGAGDSGNVTiPAVATYADGSTAKADIRLTAWLSG-PAYGETEAVRTSQIHARSGPV-GTKAAIFHQKVALDPAKELSSITLtaPASGtaRAHVFALTLEKKS----------
|
331 |
+
>UniRef100_A0A401ZJF4_1936993/ 139 0.245 6.145E-33 46 216 233 922 1106 1110
|
332 |
+
----------------------------------------------DLLGLFNNAGISND-GQGNADFDGDGYSYSEQQLTaagyAPGATVTVNGIQYSWPNVPaAAYDNVQVAGQTIQTPDAKagATQLTFLGSATNGPSvGNITITYTDGSTQTAQLGFSDWTLGagtsQPSYGNVVAVKTPYRNAGAGT-DNVGTYIFAsAPIALNTSKQVASITFPsslNQGALHVFAL----------------
|
333 |
+
>UniRef100_UPI00048B7B40_311244/ 139 0.270 8.397E-33 41 220 233 1 180 183
|
334 |
+
-----------------------------------------DYYKINLNSYYNNNAFTYEESSSKGDLTSFGSSYPAEYLPNEHE-ITVNGIPFIFPTKENKFNNIELENQSIIVPDDSYSTIYILGSSENGSYkEYMELRQNNDLKSRNIFALTDWISNYPMFMEQVAFECGYVHTKDSSIHSLKPKIWIQTLHLDPATEFNTIRLPDNPCIHIFSLTLQK------------
|
335 |
+
>UniRef100_A0A229H6X1_1945643/ 139 0.271 8.397E-33 31 220 233 762 950 953
|
336 |
+
-------------------------------PALPETGAA---TPVDLAGHFDNDAITTEMYYGDGDFDGTGRTYPMAQLPQTGQT-EDDGITFSFTNgSEGSNNNVIAAGQKISVPAGGYARLHVLGSGDTADvTVPAELGYADGSTAKVDVKLTAWLSG-PKYGETEAVRTSQIHTRTG-PLGTKAAIFHQKVALDPAKELATVTLgaPSgTARAHVFALTLEK------------
|
337 |
+
>UniRef100_A0A4D4LI39_68280/ 139 0.269 8.397E-33 33 220 233 763 952 955
|
338 |
+
---------------------------------LPELPQAGAVTQVDLTGHFDNDAITTEMYYGDGDFDGTGRTYPMAQLPQTGQT-EDDGITFAFTNgSEGSNNNVIAAGQKVTVTAGGYTKLHVLGAGDTGNvSVPAEATYADGSTGKLTIQLTAWMSG-PAYGETEAVRTSQIHTRTG-PLGTKAAIFHQVVELDPGKELSAITLgaPSgTARAHIFALSLEK------------
|
339 |
+
>UniRef100_UPI0016620FE8_1896314/ 139 0.259 8.397E-33 48 217 233 961 1136 1139
|
340 |
+
------------------------------------------------AAAYNNKGISADSDPGTGTLDPAGYSFSATQLAAvgytPGATVTAGGLPYTWPDTrPGQPDNVAAAGQVIRV-QGRGARLGLLGTGiSSTHAGTVTVTYTDGTSTDLAVVLPDWYSNAASGNSQLAVTTANWNRPPGDTLGdHAVSLYTTGGALDPAKTVATVTLPNDSGFHVFALS---------------
|
341 |
+
>UniRef100_A0A919QKH3_1070424/ 139 0.459 1.147E-32 38 221 233 10 193 209
|
342 |
+
--------------------------------------TAEGHHLLDLAGHRNNVGATAPDHLGAGAFNIWGNSFPAGELP-SGRPVLVDGVPFDLPvCGAAAPDNVRADGQFIEVAPARYDWLYVLAAAERRVEDEIAFHFTDGSVDFEPLRLSDFWAAPAVFGESAAVVTEVMHYPLHVQADVPAMLWCQRVPVTRRAVLRAARLPRNPAVHVFAATLREA-----------
|
343 |
+
>UniRef100_A0A5J4KKR6_2607529/ 139 0.250 1.147E-32 32 217 233 166 359 362
|
344 |
+
--------------------------------TLP-KTTGGQMHIFDFSyrvGPFNNIGGTIDEDLQQiQNFDGQHNGYSYDALSAaglPKGSVTINGVNFNWrSAADGNADNYQASGQVVPVtPVAQAKTLAFLGASTGGaASGTATITYTDGSQQTFTLGLTDWCAPTVAYNNRVAAAATYRRTPHG-NQTIKTSVYYTDVALQSGKTIKSVTLPTNGQIHIFDIS---------------
|
345 |
+
>UniRef100_E2PX58_1901/ 139 0.288 1.147E-32 33 221 233 751 941 944
|
346 |
+
---------------------------------LPPLGPTGGVTPVDLAAHFDNDAITTEMFFGDGDFDGTGRTYPMAQLPQTGRT-TDDQITFSFANgSEGSKNNVVAAGQRVAVEPGSYARLHVLGSGDTGNvTVPAVLSYADGTAATVPVRLTGWLSG-PAYGETEAVRTSQIHTRGGPV-GTKSAIFHQRVPVDPGRRLVSVTLGRpsgTARAHVFALSLEAA-----------
|
347 |
+
>UniRef100_UPI001C0E2853_2842201/ 139 0.268 1.147E-32 6 220 233 745 952 955
|
348 |
+
------PDFR-----IAPVQVTTPAEPWG-LPELPQAGAVTQ---VDLTGHFDNDGITTEMYYGDGDFDGTGRTYPMAQLPQTGQT-EDDGITFAFTNgSEGSDNNVIAAGQKVAVPAGGYTKLHVLGAGDTGNvSVPAVATYADGSTGPLTIQLTGWMSG-PAYGETEAVRTSQIHTRTG-PLGTKAAIFHQVVEVDPAKELSAITLtaPTgTARAHVFALSLEK------------
|
349 |
+
>UniRef100_D2PSN6_479435/ 138 0.394 1.568E-32 12 218 233 11 216 225
|
350 |
+
------------MVAVRPYSGPVTVGPDS-VTEPPPESPHVGCRPVDLAPHRNNVGSTPASGTRNGAFNIWGNSFPAEELPPPG-LCAVDQVVYDFPPTePGTADNVRAAGQFVEVPAGRYDWLYVLGAAERRVEDELAFHFADGAVDFEQLRLSDFWAAPAWFGETQVRATRSMHYPFHVQAGVPAMLWSQRVPVTRRSVLSAVRLPRNPAVHLFAATL--------------
|
351 |
+
>UniRef100_A0A1H6EW60_1144553/ 138 0.264 1.568E-32 52 215 233 201 378 384
|
352 |
+
----------------------------------------------------NNAGVGDDTEPGQADFDNGGWSYSAQALAAgarPGGTVTWKDHTFTWPNrKPGEWDNVQAAGQTVDLTaPAGAGTLALLGAGASGDIETgVTITYTDGSTQETKVGFSDWALArdayPPRFGNEIVLRTPYRLDGGGGRQDINVYVFAVTpIRLDPAKQVKSLTLarPAGAAtAHIFA-----------------
|
353 |
+
>UniRef100_UPI00143AD3F7_2720023/ 138 0.262 1.568E-32 52 215 233 905 1083 1089
|
354 |
+
----------------------------------------------------NNAGIGDDDEAGLANFDGVGWSYSAQALAAagarPGGTVTWKGYDFTWPDrEPGEWDNVQASGQTVDLaAPAGAKTLALLGAGASGDiQTDVTITYTDGSTQQVKVGFSDWALArdayPPRFGNEIVLRTPYRLDGGGGRQDINVYVFAVTpIELDASKQVKSLTLakPTGAAtAHVFA-----------------
|
355 |
+
>UniRef100_A0A2T5BN31_2135609/ 138 0.460 2.142E-32 45 219 233 17 190 194
|
356 |
+
---------------------------------------------VDLTPYRNNIALTTDKTLHRGMLNVWGNSLPAEVLPHDG--LTVGGIPFRgVPGGGAEPDNVRCAGQYLELPRTTADWLHLLATSERRCEETAFVHYASGAADPEWVRVSDFLPARAHFGEVLAARSDALHYPHHRQENLGGRLWAVRIPVTRREPVCGLRLPDNPALHVFALSLE-------------
|
357 |
+
>UniRef100_UPI001915634B_2778368/ 137 0.250 5.464E-32 31 216 233 907 1107 1111
|
358 |
+
-------------------------------VKLPISAlTVVVAKPGNLLGLFNNAGISND-GQGNADFDGDGYSYSAQQLNAagytPGATVTVNGIQYTWPNVAvATFDNVQVAGQTIQTPDakvGATHLTFLGSATNGPSSGNVTITYTDGSTQTAQLGFSDWTlgagNSQPSYGNVVAVKTPYRN-AGPGRDQVATYVFAsAPISLNASKQVASITFPasvDQGALHVFAL----------------
|
359 |
+
>UniRef100_A0A8J3HZW2_2778364/ 137 0.241 5.464E-32 51 222 233 775 958 1149
|
360 |
+
---------------------------------------------------YNNVATSDDGSPAGGSFDGI-NSYSSQATQSlgliPGASVNVYSTTFIWPAVnPSYKNNYVAQGQVLPIkPVNNAGTLAFLGsSSYGPSSGTLTVAYTDGSTETFTLGFSDWTlaggKASPSFNNLVAMSMPYRNTPHG-KQNINTYIFYSEIPLAVDKTVQSITLPsdvKNGQLHVFAISTRTGS----------
|
361 |
+
>UniRef100_UPI001EF7C8F6_2807632/ 136 0.486 1.393E-31 38 218 233 15 194 208
|
362 |
+
--------------------------------------GSPLFRTVDLTGHLNNKAVTTESRKGEGRLNVWRNTIPAQHFSAAGSRVEVDGIPYLLAP-ESAYDNVRCAGQYVTVPEGRYDWIRILATGERRAETEIAVHFADGHIDFEALRVSDFWHAPPRFGESVAYRTPVMHYPHHVQERVNAGVFSTRVPVSRQAAVCALRLPRHVGVHVFALTL--------------
|
363 |
+
>UniRef100_A0A938MH69_2026780/ 136 0.231 1.393E-31 40 221 233 21 217 906
|
364 |
+
----------------------------------------PQQLPLNLVELFDNDGISSEKNRKDGNFDcpdhpahVPGSTYPAEFLPESGAIFTappLPDVSFLFPNkADGEKNNFSCAGHKFEPPVNAYAALYILGTAENGKQeGEIGLNFEDG-QLNLPLKFSDWCEP-AQFGEVEAIAVPFRYswqerGGRMEKEDITCRLWVQKIPLPEKRRLESFVLPYNARMHVFAITLVAA-----------
|
365 |
+
>UniRef100_A0A8J3HZW2_2778364/ 136 0.232 1.393E-31 51 215 233 965 1144 1149
|
366 |
+
---------------------------------------------------YNNMGTSNDNTPGSGNFDGGHMSYSAQALQskgyKPGSVVVFNGTSFVWPaSTPGTVDNYVAQGQVIPVNSVyNANYIAFLGSASHGPlQGNVLVTYTDGTTETVSLGFSDWTLGagkmKPSYGNKVALTTTYRNGPKG-KQNVATYMFYCEATLgALNKQVQSITLPTLPagpgQLHIFA-----------------
|
367 |
+
>UniRef100_A0A1B2HKE2_1586287/ 135 0.257 1.903E-31 52 215 233 910 1080 1085
|
368 |
+
----------------------------------------------------NNVGASDDA-TGDGDFDGGGYSYSRQALAtaglVPGQTGTVDGLTFTWPGSPaGRPDNVTANKQKLTL---SGTKLAFLGSAANGARtRTATVTFTDGTTAPVEIGFSDWTLGgggaAPSYGNVVVANTPYRNQLGGGSEKVATHIFATkTYVAPEGKVLQSVVLPEDVNLHVFA-----------------
|
369 |
+
>UniRef100_A0A7G8KM49_2763006/ 135 0.264 2.600E-31 15 223 233 138 363 747
|
370 |
+
---------------VGPTAAPAgRAEPTAAPAVVPpavgegAPATGRAGRPVPLERLFDNVGTSDDADPGAADLDGAGNSLSARDLAAagwtPGRALTLDATRLEWPrSAPGRPDNVRANGQHVAL-SGTGDTLTFLVTGSSpgrvgpGAGGSGTVHYRDGSRSTYTLTAPDWRGGPLA---TKAVALPHHNTPAGQR-REAVRLYAVSVPLARGAAVASLTLPADPgpdaDLHVFALAVRPPAA---------
|
371 |
+
>UniRef100_A0A2T0TAZ4_84725/ 135 0.262 2.600E-31 52 216 233 920 1092 1096
|
372 |
+
----------------------------------------------------NNIGASDDAGSEVADFDGGGYSYSRQALAAaglsSGATGTVDGLTFTWPNSPNdRADNVVTNGQSVDVTGTKLS--FIGAAADGGRTAAATVTYTDGTTGTVDLGFSDWTLGgggqNPSYGNVVVAKTPYRNLLGGGNEQVVTNIFATkTFTAPEGKVLKSVQLPTNEGLHVFAI----------------
|
373 |
+
>UniRef100_S5TLH7_1366598/ 134 0.453 3.552E-31 41 220 233 38 216 219
|
374 |
+
-----------------------------------------EYFTVDLSGHLDNIGLTMPDQLAAGALNVWGNSLPAGALPA--GAVEVGGVPFVTAGGDGsRPDNVRCAGQLLDLPPVAGSWLHVLATSERRCEEELHVHYADGAVDPEWLRVSDFWPAAAHFGEVAAARTGAMHYPHHIQGDLGGQIWATRVPATRGGTLAALRLPDNPALHLFALTVET------------
|
375 |
+
>UniRef100_UPI00218069CC_1654476/ 134 0.250 6.629E-31 52 216 233 920 1092 1096
|
376 |
+
----------------------------------------------------NNIGVSDDKGSEEADFDGGGYSYSRQALAAagltSGGTGTVDGLAYTWPNSPeDRADNVIAAGQSIDV---SGTKLSFLGAAAGGARtAAGTVTFTDGTSAPIDLGFSDWTlgggSQNPAYGNVVVAKTPYRNQIGGGSEQVVTNIFATkTYTAPDGKVIKSVKLPTNGGLHVFAI----------------
|
377 |
+
>UniRef100_UPI001EF8F640_2911966/ 133 0.269 9.055E-31 43 217 233 5 184 188
|
378 |
+
-------------------------------------------VCVDLSKLFNNKGVSWIGLGIAGSFDHSGISVPGEILP-DGITVTWCDIPFQFPKTSSiENDHICCEEQTIEVDPNSYRQIYVAGFSlYGDYSDRVWIHYEDGSSEDKEFGLSDWYSRTEAgnglkHSEQVAMMIPY-YYENGIKIQAMRGIWIQRLLIDPNKKLHSIRLPDNPYMFIFAIT---------------
|
379 |
+
>UniRef100_UPI001A9FE9C4_2530381/ 133 0.413 9.055E-31 45 232 233 3 192 221
|
380 |
+
---------------------------------------------IDISPYLDNVGATTAGRTATGRLNVWNNSFAAENLPPGGSNVMVGGVPYVMPPFGGiDPDNIRCAGQYLELpgPGRAVDWLHLLATGERRVEDEIAVHFADGSVDFEPVRVSDFWAAPAAFGEVPAFET-LMHYPAHTQFGVPASVWSQRVPVTRRLPLIGIGLPVNAALHVFAVTLQPPDGTRRPAAVST
|
381 |
+
>UniRef100_A0A193C823_31958/ 133 0.259 9.055E-31 21 221 233 734 943 945
|
382 |
+
---------------------PRGKRPDTTIAPVPVSVPAPPWglpalppagdlVPVDLAAVLNSDGVTSEFYLGDGDFDGGGNTYPAAQLPQTGQ-VTDDGVPFLFVNgSEGTPNNVV--GATIPLPAGKYATLHVLGAADtGNAVTTLKVSYVDGS-AEVPLRLTGW-RAAAAFGESEAITTNQLHARAG-VQSVKLAIFHQRVPLDPAREVVSVTLPSaaTPRPHVFAVTLEKG-----------
|
383 |
+
>UniRef100_A0A6G3RW39_2706073/ 132 0.402 1.690E-30 47 223 233 21 198 199
|
384 |
+
-----------------------------------------------IEDFLDSVGVEPSSRSGSGAFNIWGNAFPAGELPC-GTLTAHDRVPFRFPAADGvRPDHLRCRGQRIALPAGRTDWIHVLGAAERRTEDGVLLEYADGSVRRQWLRLSDfWPETGPRFGERLAYRTTAMLYPRHEQYTMSPSIWHQRVPVAVPDGLTALVLPHNPAMHLFALTLVTEAA---------
|
385 |
+
>UniRef100_A0A1B1MEJ8_1915/ 132 0.285 2.308E-30 50 216 233 931 1111 1115
|
386 |
+
--------------------------------------------------YLDNNGVSADDNDPSGDFDGGGGSYSAKALAdqklTPGATVDAGGFSFTWPKvGPGAPDNIVVGGgeQVLDVPGGATKLAILGSASNGPSTGTLTLTFTDGSTQQATVGFSDWTLGggaqKPSYGNVVAARTAYRLY-SGSTDDVDTYVFAtAPIAVPAGKRLASVTLPSStsgGRMHVFGL----------------
|
387 |
+
>UniRef100_A0A7X3GHH6_2682849/ 132 0.266 3.152E-30 43 222 233 343 522 523
|
388 |
+
-------------------------------------------VPIDISGICAIRGI--HNSLGTADVDGDGHSYSREGLPVDG-LLRIGDLSFVFPASQNEAdcDNTACDGQAIPIPPGHYHGISVLASSqYGGSADAFTVEYADGTSEQVQLGFADWWSRFPIAGEKVAWSA-NLNRPSLGKTEEIVHLFANEAPLSRtGSTAVRIVLPNLPNLHVFAVSMWKQA----------
|
389 |
+
>UniRef100_A0A7V2U765_2026780/ 132 0.279 3.152E-30 34 218 233 722 916 923
|
390 |
+
----------------------------------PPPAANVECVHLNaLAGHFNNDGISWRANPADGNFDfpsrASGASFIADLLPKKGALLAVpgnEGVTFRFPDKDDRLrNNVLCDGQRLQLarPYGSFEAAWFLGACHDGArSALLTIDYEDGKAQ-GELRLADWLS-RPAAGEIDVLRLSARHAGDGKEEARDCGLVAWRVPLDPSRKLMALTLPRERNMHVFAVTL--------------
|
391 |
+
>UniRef100_UPI00224096B5_202862/ 132 0.275 3.152E-30 57 221 233 867 1044 1047
|
392 |
+
---------------------------------------------------------TSTATRPVGDFNGYGGSFVAEELAedglVPGQDVTVDGIGYRWPDaAPGSPDNINAHGQTVTVHAAPGqSRLGLLGAGNEGaATGEVTVHYTDGTTRRASVTLGDWhldGATAPPPGNTAAATMPERQMAGGTPEKMTVYIWSTSVPVDPHRTVASVTLPERttgGQMHVFAVGVGEA-----------
|
393 |
+
>UniRef100_UPI00068FB531_1463857/ 132 0.264 3.152E-30 52 215 233 910 1093 1098
|
394 |
+
----------------------------------------------------NSKGVSDDAKP-QGNFDGEGWSYSAQALAAggavPGGTVSAGGFSFTWPDVqPGDPDNIQvtpgqpATGQVVAVqPAAGATKLSLLGsAAEGTAKGTVTLTYTDGTTQQADIGFSDWTlgggSQQPSFGNTVAVKTSYRDTLSSGPDPVGTDVFAtAPIALQAGKQLASVTLPSTVSggvMHVFA-----------------
|
395 |
+
>UniRef100_A0A7I8EN04_2717365/ 131 0.262 4.305E-30 46 223 233 230 407 968
|
396 |
+
----------------------------------------------PLPSLLNNQGIGSAA--GQANFDGSGYGYPSDQVPA-GGIIGLAGVSYLFPtHGSATKDNVVALGQSITLPQGHYQQASLLtASSYGPASGTVTVHYTDGSSSTATLNAPDWYN-----GTSDVINTTYRYTPTGT-DQHAVHIYVSQVWIDASRVAASLTLPqtalpaaNTASLHVFALTLQLPSA---------
|
397 |
+
>UniRef100_A0A5A5TI23_2014874/ 131 0.229 4.305E-30 49 215 233 103 283 1208
|
398 |
+
-------------------------------------------------PAYNNAGSSNDSNPNEGSFDG-NNSYSVQALQGtgllPGQNFTFSNVTFVWPNaAAGTPNNYLVNGQVIPVAPsiaGAANLGFLGASTNGNASGNATVTFTDATTQTFSLGLSDWTlggnpQSTPAFSNQIAATMTYRNTP-VDQQTITTFLFSAQLALPAGKTVQSVTLPtttSSGQMHIFA-----------------
|
399 |
+
>UniRef100_A0A3N1SWC4_2485152/ 131 0.418 5.880E-30 45 220 233 17 191 194
|
400 |
+
---------------------------------------------VDLAPHLNNVALTAMQNLGDGRLNVWGNSLQFETIPS--GRLSVDGVPFeTFNEVDSEPDNIRCDAQYLKLPESKADWLHLLTAAERRCEETVHIHYSSGAVDPEWIRVSDFWPARAHFGESLAATSPSMHYPHHRQGNLSGQLWAVRIPVTRREPVRALRLPDNPALHIFALTMET------------
|
401 |
+
>UniRef100_UPI0022597B98_2903819/ 131 0.269 5.880E-30 52 215 233 907 1088 1093
|
402 |
+
----------------------------------------------------NSTAISTDDDNPQANFDGEGWSYSAKALAAagatPGGTVSSGGFDFTWPKvAAGDPDNIEVAGdtpQVLNVPSAQgATKLSLLGsAAEGSASGTATLTYTDGTTQKADIGFSDWTLGggadKPSFGNTVAVHTPYRDVQGGGTDPVGTEIFAtAPVTLQAGKQLASVTLPSTTDggvLHVFA-----------------
|
403 |
+
>UniRef100_A0A6L5AS91_2575830/ 130 0.243 8.031E-30 42 219 233 2 182 190
|
404 |
+
------------------------------------------YYLVNLEEYYCNCGFSFINQRENGDFTGSGSSYPAEQLPSSEKVIKIHEVPFHFPSKeKGKFNNIEFNEQKIKVKEDSYQAIHVLGAADNGSFlEPISLIDQQGSVSKVMLGFTDWVDYEPKFNDLKAIVCKGVHSAQlGCLEGMQCTIWYQKVKVPTGVLFNEIRLGDNPGMHIFSLTLE-------------
|
405 |
+
>UniRef100_UPI000D13FE3E_1463889/ 130 0.241 8.031E-30 52 215 233 907 1088 1093
|
406 |
+
----------------------------------------------------NSTGISTDDDDPRANFDGEGWSYSAKALAAagvtPGGTVSSGGFDFDWPKvGAGDPDNIEVAGagpQVLNVPSGPAdTKLSLLGsAAEGSASGQVTLTYTDGTTQQAEIGFSDWTLGggtqNPSFGNTVAVHTTYRDVQGGGKDPVGTEIFAtAPITLQAGKQLASVTLPSATDggvIHVFA-----------------
|
407 |
+
>UniRef100_A0A543N991_405555/ 130 0.437 1.498E-29 45 219 233 17 190 194
|
408 |
+
---------------------------------------------VELQRYKNNIALTTLQTLHLGALNVWGNSLPSETMP--HEQITVDGVPFSVSPATGEaPDNIRCAGQYLELPETAADWLHLLATSERRCEETVHIHYSSGAADRERLRVSDFLPARSHFGELLAARSPAMHYPHHRQDNLSGQIWAVRVPVTRRETLRGFRLPDNPAIHIFALSTE-------------
|
409 |
+
>UniRef100_UPI001CB70AD5_1896314/ 130 0.250 1.498E-29 50 215 233 902 1085 1090
|
410 |
+
--------------------------------------------------YVNNNGISADDSNPAANFDGEGWSYSAAALAAagatPGGTVSSGGFDFTWPQvAPGAPDNIVVGGgdQVLDVskSAAGATTLSLLGsASEGPTTGTVTLTYTDGTTQQADIGFSDWTlgggSQQPSYGNVVAVHTAYRDVQGGGKDPVGTEIFStAPIALQAGRQLASVTLPSSTNggdMHIFS-----------------
|
411 |
+
>UniRef100_UPI002258EC57_2903886/ 130 0.263 1.498E-29 52 215 233 934 1115 1120
|
412 |
+
----------------------------------------------------NNTAISADDDNPQANFDGEGWSYSAKALAAagatPGSTVSANGFDFSWPEvTAGDPDNIEVAGstpQVLTVPPAPgATELSLLGsAAEGSASGTLTLTYTDGTTQQADIGFSDWTlgggSDKPSFGNTIAVHTDYRDVQGGGQDPIGTEVFAtAPIALQAGKQLASVTMPSSTKggvIHVFA-----------------
|
413 |
+
>UniRef100_UPI001940CD9B_113564/ 129 0.462 2.045E-29 60 231 233 1 174 176
|
414 |
+
------------------------------------------------------------SNTSSGRFNVWGNSFAAEDLPAGGTGVVVDAVTFTMPPTgTGAPDNVRCAGQYLDLGPQAdlADWLYLLAAGERRVEDEIALHFADGSVDFEPVRVSDFWAAPPAFGETKAFAT-LMHYPIHTQFGVPATIWCQRVPVTRRAPLTGIGLPHNVALHIFAATLQPAAARPAALEVT-
|
415 |
+
>UniRef100_A0A2T3VKX0_2135446/ 129 0.416 2.045E-29 36 219 233 3 186 198
|
416 |
+
------------------------------------AAGTGRHRVLDLTDHLDSTGFTRPDATDAGALNIWGNTFATGDLPAGHPHWLVDGVPFRTAEA-GGPDHLRCRGQYLEVPIGRYDWLHLLATAERRTEDPVLLHFADGAVDPEWLRVSDlWPGGQAHFGEAPALRGSRLHYPRHVQADMRPTLWAQRVAVPRRAPLVGLRLPENPALHVFAVTLE-------------
|
417 |
+
>UniRef100_A0A4S2RGE1_2563103/ 129 0.241 2.045E-29 52 215 233 907 1088 1093
|
418 |
+
----------------------------------------------------NSTGISADDENPQANFDGEGWSYSAEALAAagaePGGTVSSGGFDFAWPQVhAGDPDNIEVVGsgpQVLNVPSAQGdTKIAFLGsAAEGEASGTVTLTYTDGTTQQAEIGFSDWTLGggtqQPSFGNTVAVHTGYRDVQGGGKDPVGTELFAtAPITLQAGKRLASVTLPSNTEggiIHVFA-----------------
|
419 |
+
>UniRef100_UPI001CD27338_2877242/ 128 0.264 5.209E-29 52 215 233 900 1083 1088
|
420 |
+
----------------------------------------------------NSRGISDDTNPT-ANFDGEGWSYSAQALadagAAPGGTVSANGFDFTWPDvKTGDPDNIQvtpgapAPGQAVSVtPVSGATKLSLLGsAAEGSAEGTVTLTYTDGTTQQADVGFSDWTlsggADQPSFGNTVALKTTYRDVQGSTADPVGTDIFAtAPIALQAGKQLASVTLPSAVSggvMHVFA-----------------
|
421 |
+
>UniRef100_A0A4U0SM40_2571141/ 128 0.254 7.113E-29 50 216 233 920 1104 1108
|
422 |
+
--------------------------------------------------YLNNNGVSSDDDSPAANFDGGGYSYSAKALAAagvtSGSTVSSGDFRFTWPKVNaGDPDNIAvgGGGQVLDAPaaPAGASRLSLLGsASNGAASGTVTLTYTDGTTQEEQIGFSDWTLGggsrQPSYGNSVVVRTDHRDVSGGGTQTVETDLFStAPIVLASGEQLQSVTLPDTvsgGTIHVFAM----------------
|
423 |
+
>UniRef100_A0A1Q5MVC9_1703920/ 127 0.247 9.712E-29 52 215 233 915 1096 1101
|
424 |
+
----------------------------------------------------NSTAISTDDDNPQANFDGEGWSYSAEALaaagAAPGGTVSSGGFDFGWPEvAAGDPDNIEVAGtepQVLNVPSASGdTKLSLLGsAAEGSASGQATLTYTDGTTQQAEIGFSDWTLGggaqKPSFGNTVAVHTAYRDVQGGGTDPVGTEIFAtAPIALQAGKQLASVTLPSATEggvIHVFA-----------------
|
425 |
+
>UniRef100_A0A1C4K485_1839759/ 127 0.244 9.712E-29 50 215 233 916 1099 1104
|
426 |
+
--------------------------------------------------YVNNAGISADDQNPAANFDGGGWSYSAKALAAagavPGGTVSAAGFDFTWPQVqPGAPDNIVVGGgdQVLDVsaTSAGHTRLSLLGSADDGDtSGTVTLTYTDGTTQQAQIGFSDWTlgggAGQPSFGNITAVHTTYRDVMDGTTDPVGTDVFAtAPIDLQAGKQLASVTLPattSGGDMHVFA-----------------
|
427 |
+
>UniRef100_UPI001575013E_227866/ 126 0.233 2.472E-28 40 220 233 1 182 187
|
428 |
+
----------------------------------------GKYEMINLEELFNNKGISRMNNPQDANFTGFGSSYISENLPSSGSVLELYGIPFLFPNKDPlVVDNLEFMNQEIQIKEAYYSNIHVLGAADNGSFiEPIKLVGND--IKEVKIGLTDWIESSPKFNNKKGIICEGINsNKSGYTRTVKTNIWYSVVGIPSNNLFKSILLVDNPSMHIFSLTLEK------------
|
429 |
+
>UniRef100_UPI00068E7F42_1444769/ 126 0.349 2.472E-28 27 231 233 6 214 219
|
430 |
+
---------------------------QTAQTVQTAPTDQTAHKVLALEPFMNNQAATTPANLADGRLNVWRNSLPAQDAPLE---LLVDGVPLRTARLDGtGPDNVRCAGQRIEVPERRWDWLYLIGCGERRVRDVLTWHFSDGTVDRDHLALSDLWEGRSDFGEELALRTDVIHYPHHVQERIGITLWCQRVPVTSRKPLTAMSLPRNPAVHLFALTLvgrhlgAGADGEPLIQGAS-
|
431 |
+
>UniRef100_UPI000F0856B3_2483798/ 126 0.419 3.375E-28 25 221 233 2 194 202
|
432 |
+
-------------------------ETTTRVTTAGGSAG----TLVDLSGHRNNLSCTTTDSLREGRLNIWRNSLPQVEMPTAG-LLEIHGVGFSLPRFDGiNPDNVVCSGQHLACASGSYDWIYLLATSERRSEDQFSLHFADGSVDFESFRVSDFWHAEPAFGDRRAFGTTQMHYPFHVQPNLRGDVWFQRVPVTRMVPLSGVTLPDNVAIHLYAMTLIPS-----------
|
433 |
+
>UniRef100_UPI001890D6EE_2705253/ 126 0.288 3.375E-28 50 217 233 456 631 634
|
434 |
+
--------------------------------------------------ALNATVISDDDAATEGCFDRACDSFSADALAAGGATaggkVTVAGQSFDWlDNGSGRFDSITADGQTIDY-YGSGTTLGILGASSGGDtSGTITLTYSDGTTSTATLGFPSWVTGHPtAFGDTTAVTTLHRDTPHGPGQARAAHyvVSYAPVAITPGKSLVSITLPNKPALHVFSIT---------------
|
435 |
+
>UniRef100_UPI00203463BB_2888348/ 125 0.258 4.608E-28 52 215 233 907 1088 1093
|
436 |
+
----------------------------------------------------NSTAVSTDDDNPQANFDGEGWSYSAKALAAagatPGGTVSSGGFDFGWPKvAAGDPDNIEVAGaapQVLNVPSKEGStKLSLLGsAAEGSASGQATLTYTDGTTQQAEIGFSDWTLGggadKPAFGNTVAVHTAYRDVQGGGTDPVGTEIFAtAPIALQAGKQLASVTLPSSTDggvIHVFA-----------------
|
437 |
+
>UniRef100_A0A7W9IM19_1816182/ 125 0.437 6.291E-28 45 219 233 13 186 190
|
438 |
+
---------------------------------------------VDLSEHRNNVAFTTPVALDRGRLNVWGNSLPAGTLPS--GRHLLGDVLFDLPEADGRaPDNVRCAGQFVTLPRVRADWIHLVATSERRCEEFVHIHHSSGAVDVEWLRVSDFWSADPHFGESVAGRSADMHYPHHVQRGLIGLVWLVRVPVPRREEITGIRLPDNPALHVFGLTLE-------------
|
439 |
+
>UniRef100_A0A3N1SMM6_2485152/ 124 0.252 8.589E-28 52 215 233 930 1111 1116
|
440 |
+
----------------------------------------------------NSTAISADDDNPQANFDGEGWSYSAKALAAagatPGGKVSSGGFDFGWPEvAAGDPDNIEVAGtepQVVNVPAAPGdTKLSLLGsAAEGSASGQATLTYTDGTTQQAEIGFSDWTLGggadKPSFGNTVAVHTAYRDVQGGGTDPVGTEIFAtAPIALQAGKQLASVTLPSTTDggvIHVFA-----------------
|
441 |
+
>UniRef100_A0A7W3ZVE1_2213162/ 124 0.284 1.172E-27 34 223 233 32 231 614
|
442 |
+
----------------------------------PAALRAPAAEPRPLRSYFDNRAVSHADRPEAADFDGSGNSLPASGLTAagwtPGSRLLLDATPLTWPDrTPGEPDNVRADGQLVAVDGRGPALTMLLAAtSPHGPTDPVTatgrVHYRDGSSARYTVTAHDWRTGPAA---TSALSLPHHHTPDGVS-TRRARLYAVTVPVDPTRSVRALALPADPgpraDVHVFAVGLRAAAA---------
|
443 |
+
>UniRef100_UPI0005AAA057_405782/ 124 0.404 1.601E-27 47 218 233 8 177 187
|
444 |
+
-----------------------------------------------LEPFLNNQAATTPENLGDGRLNVWRNSLPAQSAPLE---IVVDGVPLRSARLDGRgPDNVLCAGQRIEVPGRRWDWLYVIGCGERRVRDTVTWHFRDGTVDRDHFALSDLWEGRSDFGEELALRTDVIHYPFHVQERIGITLWCQRVPVTSRKPLSAVSLPRNPAVHLFAMTL--------------
|
445 |
+
>UniRef100_UPI000FCC90E2_2487275/ 123 0.262 2.185E-27 52 216 233 914 1096 1100
|
446 |
+
----------------------------------------------------NNKGISADDTNPAANFDGGGYSYSAKALasagATPGATVTSGGFGFTWPKvKPGDPDNIAVSGgdQVLDAPAGsaNATRLSLLGsASNGAADGTMTLAYADGTTQEAQIGFSDWTLGggnrQPSYGNTIALRTAYRDISGGGSQQIATDVFAsAPITLAAGKQLKSVILPTavaGGTLHVFAL----------------
|
447 |
+
>UniRef100_A0A553ZJZ9_2595054/ 120 0.264 2.631E-26 43 219 233 38 221 606
|
448 |
+
-------------------------------------------HALPLTHLYDNTAVGGGSRPGAADFDGAGNALSAPDLSAagwtPGRALDLDAAPLTWPRAtAGRPDNVVANGQTVAV-GGTGPALTFLAAATGtgaPATGNGTVRYRDGSVSGYRLTAPDWRSGPPA---TKAVALPHLVTPHGTVDG-PARLYAVTVPLRPGRAVASVQLPRAAAsgaeLHVFALAVR-------------
|
449 |
+
>UniRef100_UPI00225B5EAE_2975715/ 119 0.387 6.686E-26 47 223 233 8 182 198
|
450 |
+
-----------------------------------------------LDPFLNNQAATTPENLTDGRLNVWRNSLPAQSAPLD---IVVDDVPLRSAPLDGRgPDNVLCSGQRIEVPERRWDWLYVIGCGERRVRDVVTWHFSNGTVDRDQLALSDLWEGRSDFGEDLALRTDVIHYPHHVQERIGITLWCQRVPVTSRKPLGAMSLPRNPAVHLFAMTLVGRQA---------
|
451 |
+
>UniRef100_UPI00147FFFB7_81569/ 119 0.342 9.123E-26 45 217 233 36 210 220
|
452 |
+
---------------------------------------------VPLLSYFNNIAFAGLNDLSKGQLNIWRNSYPQEAVANLPGYIDVGRVKFSFPQTDGEtPDNIRCDGQRIVLPVDQYDWIYMLCSAERRVEDEATLFFSDGSVDFVPFCVSDfWPGAKGRNGEIEALRFDSLNFPRHRQVRIEPAIWRSRIPVARETPLSAIRLPRNVAAHLFAVT---------------
|
453 |
+
>UniRef100_A0A1M5CPJ9_1206085/ 118 0.451 1.245E-25 44 218 233 10 181 187
|
454 |
+
--------------------------------------------PVGLDAIRDNVAMTGADELGAGGLNVWRNSLPAGTYP--GAPVEVDGVPFT-GSPHTGPDNVRCAGQTLPVEIGRWDWLWLLATGERRVEDEIAMLFTDGAVDLEAVRVSDFWAAPAAFGETVAFRSEVMHYPHHVQSRLPGTIWCQRVPITRRADLVAIRLPDNLALHVFAATL--------------
|
455 |
+
>UniRef100_UPI0003616C8A_239974/ 118 0.437 1.699E-25 45 219 233 17 190 194
|
456 |
+
---------------------------------------------VDLSTHRNNIALTTPETLRNGMLNVWGNSLPAETLRTDC--LRVGRVRFAGVRANGtEPDNVRCAGQYVDLPEIEADWIHLLATSERRCEEEVGVHYASGAATTEWVRVSDFLPARARFGELAAARSSALHYPHHRQEDLSGQVWAVRVPVTRWEAVRGLRLPDNPALHVFAISVE-------------
|
457 |
+
>UniRef100_UPI000A606C17_1911/ 117 0.427 2.317E-25 42 219 233 20 197 201
|
458 |
+
------------------------------------------QLPLRLDEFFNNVAFTGPETLGNGRLNVWGNSLPIGTLPT--GRIRVSGVEFDVSAPDGvAPDNVRCTGQLVHLPDGtAADWLHLLITSERRCEETVHLHYASGAVDPEWIRVSDFWPATGHFGEVLAARSAGMHYPHHIQRDLGGQVWSVRVPAVRREPLRSVRLPDNAALHLFAITVE-------------
|
459 |
+
>UniRef100_UPI0016035FF4_2448886/ 117 0.273 2.317E-25 19 223 233 17 231 611
|
460 |
+
-------------------SLPSLVPPAGAAAPGPTGVRAAAAEPRPLHRYFDNRAVSRPDRPDAADFDGAGNSLSASELTaagwAPGSRLLLDATPLTWPDrAPGEADNVRADGQLVTVTgRGPAVTLLLAATSPYGPAAPVTasgrVHYQDGSSTRYTVTAHDWRTGPAA---TSALSLPRHHAPDGVKAGR-ARLYVVTVPVDPTRTVRALALPADPgpdaDVHVFAVGLRASAA---------
|
461 |
+
>UniRef100_UPI0020C07DD2_2707767/ 117 0.398 3.162E-25 47 218 233 8 177 198
|
462 |
+
-----------------------------------------------LEPFLNNQAATTPDNLGEGRLNVWRNSIPAQ--PGPLELV-VDGVPLRSARLDGdGPDNLVCAGQRIEVPERRWDWLYVIGCGERRVRDVITWHFADGSVDRDQLALSDLWEGRSDHGEELALRTDVIHYPYHVQERIGITLWSQRVPITSRQPLTALSLPENPAVHLFAMTL--------------
|
463 |
+
>UniRef100_UPI000E22B0D5_871959/ 116 0.382 8.028E-25 45 218 233 6 177 181
|
464 |
+
---------------------------------------------LPLEPFLNNQAATTPANLADGRLNVWRNSIPAQ--PGPLERV-VDGVPLRSTRLDGnGPDNLVCSGQWIEVPRRRWDWLYVIGCGERRVRDYVTWHFADGSASRDRFELSDLWEGRSDYGEEFALRTDTIHYPQHVQERIGITLWCQRVPVTRRQPLCALSLPENPAVHLFAMTL--------------
|
465 |
+
>UniRef100_UPI00068A9E4F_1463917/ 115 0.376 1.095E-24 47 223 233 8 182 198
|
466 |
+
-----------------------------------------------LEPFLNNQAATTPDNLADGRLNVWRNSIPAQ--PGPLERV-VDGVPLRSARLDGnGPDNVLCAGQRIEVPERRWDWLYVIGCGERRVRDVITWHFSDGSVDRDHLTLSDLWEGRSDQGEELALRTDVIHYPYHVQERIGITLWCQRVPITSRQPLSAMSLPMNPAVHLFAMTLVGQSA---------
|
467 |
+
>UniRef100_UPI0003469B67_225762/ 114 0.426 3.791E-24 43 219 233 1 176 180
|
468 |
+
-------------------------------------------VPVDLSQHLNNIALTDFETLSNGRLNVWSNSLPMETLRADH--IQVGRVGFAGSKATGtEPDNIRCAGQYVDLPEMNADWIHILATSERRCEEEVGVHYTSGAVAGEWIRVSDFLPARAHFGEMAAARSFAMHYPHHRQEDLRGQVWCVRVPVTRWEAVRGLRLPDNPALHIFAASVE-------------
|
469 |
+
>UniRef100_V6K9D2_1352941/ 113 0.429 5.170E-24 91 221 233 0 134 140
|
470 |
+
-------------------------------------------------------------------------------------------MAFEFPaRATGRGDNIRCRGQLLPLPGVRADWLYLLGAAERRTEDEVELHYADGAVRTAWLRMSDfWPETAAWFGEPEAFRGSGLRYPRHTQDGHRPAIWQQRVPVTVPGELTALRLPDNPAMHVFALTavLEPG-----------
|
471 |
+
>UniRef100_A0A022MHX4_1883/ 112 0.369 1.311E-23 47 218 233 3 172 189
|
472 |
+
-----------------------------------------------LEPFLNNQAATTPDNLADGRLNIWRNSLPARSEPLE---AVVDGVPLRSAPLDGRgPDNVLCSGQRIAVPERRWDWLYVIGCGERRVRDVLTWHFTNGSVDRDHLALSDLWEGRSGYGEELALRTDVIHYPYHVQERIGITLWCQRVPITSRQPLGAMSLPKNPAVHLFAMTL--------------
|
473 |
+
>UniRef100_UPI001672C615_67368/ 111 0.384 4.533E-23 87 227 233 1 143 145
|
474 |
+
---------------------------------------------------------------------------------------ELEGTVFAFPaRDTGAGDNVRCRGQLVELPAGRWDWIGLVGAAERRTEDEVELHHADGTVRREWLRMSDfWPQTAPYFGEPLAFSTSGMRYPRHTHRHHAPSLWQQRVPVRVPAPLAAVRLPDNPAMHVFAMTVTADEESRLA-----
|
475 |
+
>UniRef100_UPI001C593FAE_2749826/ 109 0.259 1.149E-22 35 221 233 12 198 203
|
476 |
+
-----------------------------------GRRQSVRYSTVELKSHFNNIGFSYPDIY--GNFTGFGSSYPGDQFPhREDNIVNVKGIPFFLYNAKGRSNNIEFAGQSVEVLPDMYSALHVLGSSDNGSFSEYLKFYLKKSpVGQYKLELSNWVAGVPAYNEHEAYRCSALCAGQQVIETVKPVLWLQSVRFRQPIVFDEISLPDNMCMHLFSLTFEGA-----------
|
477 |
+
>UniRef100_A0A5B8M4I2_2599293/ 109 0.270 1.149E-22 52 232 233 890 1077 1311
|
478 |
+
----------------------------------------------------NTVGTASESDTGVGDFDAAGNSYSREQLAsvglAPGATGKVGSLHFTWPSSPeGAPDAVNPTGQTIDL-QGEVHSIAFIGAGiNGGASDTAVATLDDGSTMPVDFSFGDWVlptsDGSPAFGNSVVAKMSHRN---AVTQVQGAYLFATTpATAPDGRTIVSVQFPTDSKERVFAIATDVAPATNTATQLTT
|
479 |
+
>UniRef100_A0A838EJ16_2740538/ 109 0.237 2.135E-22 91 217 233 15 153 156
|
480 |
+
-------------------------------------------------------------------------------------------VVFTWPDVPaGAPDNYQAAGQVIPVQSmSNATILAFLGSATNGaSSGTATIKYTDGSTQDFTLGFSDWTlngnTNQPSYGNAISYTTSYRNNAHltNGKDTIQTYIFYSSVNLEAGKQVASVTLPTTvtgGQMHVFAVT---------------
|
481 |
+
>UniRef100_UPI000B595552_1501230/ 107 0.318 1.004E-21 42 219 233 3 182 196
|
482 |
+
------------------------------------------FHAMDLRPYFNNRGFTYESRYGEGRLTMGSSSFPAESIRF-GRMYRFGGIPFRY-QASEDGDNIETSGQAVALPwmPGKLDCVHALGvSANGDSFDCVSFVAGDRLLHTARLALSDFVSDRPAFGDRLAMTLPYMHMVSGRYAHVRPNLWICSIPYPgEAGAARALVFEDNPSMHIFAMTLE-------------
|
483 |
+
>UniRef100_A0A2T0PYA0_1144618/ 105 0.433 3.463E-21 70 218 233 3 150 155
|
484 |
+
----------------------------------------------------------------------WRNSLPAEEFPS--GELVIEGVPFHPPRPrRGADDHVTCDGQLLAVAEGGYDWLYLLACSERRVEEEIALHFTGGQVDFEPLRVSDFWAAVPFFGESAAFTSAVMHYPHHVQPRVPGTIWCQRVPVVRRAPLAAVRLPRNAAVHVFAATL--------------
|
485 |
+
>UniRef100_A0A838DPV7_2740538/ 105 0.260 3.463E-21 52 221 233 3 194 583
|
486 |
+
----------------------------------------------------NNIGItgTGYAALTAGGFDTAGDSYSAALLAAltpavtAGSTVTVNGVAFTWpgPDITTNYDNWkTAAAQTITFaPVSNATAVAFLGAASDGsSSGTATINYTTGVPQTFTLGFTDWKTGTPAFSNTLALTLSNYDTSAGVVKSPaaPVYVYYadsSSVTLDPTRIVASVTLPAAVTggglVHVFAAAIKTS-----------
|
487 |
+
>UniRef100_A0A081P3V2_1501230/ 104 0.296 6.429E-21 42 219 233 3 182 196
|
488 |
+
------------------------------------------FHAMDLRPYFNNRGFTYESRCGEGRLTMGSSSFPAESIRF-GRMYRFGGIPFRY-QTSEGGDNIETSGQTVALPwmPGALDCVHALGvSANGDSFDRVSFVAGDRLLHTARLALTDFVSDRPAFGDRLAMTLPCMHMVSGRYAHVRPNLWICSIPYPGEAGAApALVFEDNPSMHIFAMTLE-------------
|
489 |
+
>UniRef100_A0A2T6FTL9_189691/ 104 0.307 8.759E-21 42 219 233 3 182 196
|
490 |
+
------------------------------------------FHAMDLRPYFNNRGFTYESRCGEGRLTMGSSSFPAESVRF-GRMYRFGGIPFRY-QTTEDGDNIETSGQTVALPwmPGTLDCVHALGvSANGDSFDRVSFVAGDRLLHTARLALSDFVSDRPAFGDRLAMTLPYMHMVLGRYAHVRPNLWICSIPyLGEAGAARALVFEDNPSMHIFAMTLE-------------
|
491 |
+
>UniRef100_A0A838XCG5_1507437/ 103 0.269 1.626E-20 43 218 233 6 185 189
|
492 |
+
-------------------------------------------HIIELASIYNTKGVSIQDPKLVAQLDHIHSSIPGEIFP-SDEVISYQGIPFAFPLTEAvDNDVISCDGQIVEVNAQRsFQTLAFLGFSLFGDyQDKFTIHYADGVEEEVRFGLTNWKQyktGTPLFGEQIAMSLPY-YVENMILTELPRTVWLQKVTLQHSGHIKQVILPRNPYLMVIALTL--------------
|
493 |
+
>UniRef100_A0A060CMX9_487357/ 102 0.278 4.108E-20 44 153 233 73 186 187
|
494 |
+
--------------------------------------------YANLAAAYNNVGVTSGDDPKPGNFDGTGNSFNAELLAgqglTPGATVSANGYSFQWPNvAPGVADNVQTAGQLIKL-SGSGNTLAFLGSEAGDRTDTVTVHYTDGTTSTGTVGFP-------------------------------------------------------------------------------
|
495 |
+
>UniRef100_UPI0020C7B24C_2810306/ 102 0.256 4.108E-20 47 160 233 159 279 401
|
496 |
+
-----------------------------------------------LASAADNVGVTEQDDPGPGDIDGGGSSFIAERLAQkgvtPGAAVKANGFSFTWPDaAPGTPDNVTGKGQTIQVSGDeKGNALAFLGTGTSGSaEGTATVHYTDGTTAEAKLGFPNWAVCPP------------------------------------------------------------------------
|
497 |
+
>UniRef100_UPI000B9A8B31_1465/ 102 0.252 5.595E-20 43 218 233 6 185 189
|
498 |
+
-------------------------------------------HIIGLASIFNAKGVSIQDPTLAAQLDHVHSSIPGEIFP-SDEVISYQGIPFAFPLTETvENDVISCDGQIIEVNaPSSFQTLAFLGFSLFGDyQDKFTIQYVDGIEEEARFGLTNWKQyktGTPLFGEQIAMSLPY-YVENMILTELPRTIWLQKVALRHSEHIKQVILPRNPYLIVIALTL--------------
|
499 |
+
>UniRef100_UPI001942DCDF_926357/ 101 0.233 1.038E-19 52 175 233 739 875 1109
|
500 |
+
----------------------------------------------------DNHGISADRTEGQSDYDGWGSGYSRSALAgagvVPGNRFAAGGVSYVWPNTrPGQPDNVVAAGQRLTVaSPAGATRLGLLGSAEGqpaGAAGTLTVHYADGSTTPAEVGFSDWTlaggTATARSDNVVAARMPYRND---------------------------------------------------------
|
501 |
+
>UniRef100_A0A652KEP5_1827978/ 100 0.395 2.620E-19 90 227 233 2 145 147
|
502 |
+
------------------------------------------------------------------------------------------GTAFVFPPrPTAGGDNIRCRGQLVELPdgaaGGRYDWIGLVGAAERRTEDEVELHYRDGSVSRAWLRMSDfWPQTAAYFDEPLAFRTASMRYPRHTHRHHAPALWQQRIAVVRPEPLAAVRLPDNPAMHVFAMTAVVDEESRLA-----
|
503 |
+
>UniRef100_A0A931JBU5_114683/ 99 0.271 4.857E-19 75 220 233 0 147 150
|
504 |
+
---------------------------------------------------------------------------PMAQLPQTGQT-EDDGIAFAFTNgSEGSNNNVIAAGQKVTVAAGGYTKLHVLGAGDTGNvSVPAEAAYADGSTGKLTIQLTAWRSG-PAYGETEAVRTSQIHTRTG-PLGTKAAIFHQVVELDPAKELSAITLgaPSgTARAHIFALSLEK------------
|
505 |
+
>UniRef100_A0A942KCA5_2478917/ 97 0.304 2.270E-18 114 227 233 0 113 114
|
506 |
+
------------------------------------------------------------------------------------------------------------------MPEGRYSELLLLGASEQGSyQATVRFVYQDETSDELTLGLSDWCQ-LPRFGEAIAYEFIQRRGATGAMERITCRIYFQTLPLRPEAVLTRIVLPDRDTMHLFALTLRQAESEETP-----
|
507 |
+
>UniRef100_A0A838DN80_2740538/ 95 0.283 1.059E-17 51 157 233 41 160 458
|
508 |
+
---------------------------------------------------YNNVGItasTSTAAMALGNFDNAGDSYPSENLVAdgfiPGTIVTEYGIDFVWPNvAAGQPDNWKAAGQTIAVNAAPGDSvLAFLGAAtdtfNRGAAGTATIHYRDGSSQNFRLNFTDWLN---------------------------------------------------------------------------
|
509 |
+
>UniRef100_A0A973VFE2_1873460/ 94 0.295 1.441E-17 45 223 233 5 185 187
|
510 |
+
---------------------------------------------VDLSGLVDRIGATYDTDRDSGEFNPWGNSFPAEELPF-GGTTAVGGVPYALVDKPPRgPDHLEALGQVIDCAAtGPAHGLALLAAGEQGPQElRVHVHLDGGRRLSLPVTVPGWSVRPDAPMTPDQLRAGHLHYPGDYGLARlLPALWSRVLRL-PGVPVTAIELTANPLVHVFAVTLEIGAA---------
|
511 |
+
>UniRef100_A0A951HER9_2800791/ 92 0.273 1.241E-16 43 222 233 879 1050 1255
|
512 |
+
-------------------------------------------TPLLLDALFN--------SQSTGDFDTEKRGYDSASLPPPGLYhLGANHIPFRLTGHIGS-DNIACNGQVIQLPPsSRGKTLYLLGAAAPGDQgGLFSVEGTQGHLTRLPVRMNDWVLGGSLHNE-AAFTFERQHTAEGLR-AMTTHFWITSLQLPSDQRPVRLRLPYNTNLHLFAATLAAAS----------
|
513 |
+
>UniRef100_UPI001EF5A1EE_1795630/ 89 0.271 7.834E-16 31 142 233 879 995 1009
|
514 |
+
-------------------------------VTAPGrcAIQTATSCAVDLSGAYNNDGVATLTDPGQGNFDGTGLSFAANLLPAPGAT-TIGGATYQAPPTTGtAKNFVKSTGQAITLPSGNYTSLDIVAAdngSTGSAAATAVVTYSD------------------------------------------------------------------------------------------
|
515 |
+
>UniRef100_UPI001EF302F3_2706031/ 85 0.282 2.280E-14 82 220 233 8 147 527
|
516 |
+
----------------------------------------------------------------------------------PGAELTVAGAPLRLPDpAPGAPDNVRADGQRVRVT-GRGDALAFLVTATGGrALGRGSVEYADGHRSSFRVEAPDWRTGTLA---TSAVALPRISTTDGPRKARAA-LYVVTVPLRHDAAVRAVTLPRDPlgpaSLHVFALGVRP------------
|
517 |
+
>UniRef100_UPI001AD6B2B7_67304/ 81 0.293 3.568E-13 35 125 233 42 130 243
|
518 |
+
-----------------------------------GWVPVPDPVPVPLDSLYDNDGIDTATARG-GGFDGSGYTFPGEELPA--GRVEVDGVPFDFPsSTAGTKNNVVALGQRVDLPRGRYLSAVFL-----------------------------------------------------------------------------------------------------------
|
519 |
+
>UniRef100_UPI001C8AAAD7_1490222/ 71 0.311 1.299E-09 47 118 233 906 982 993
|
520 |
+
-----------------------------------------------IEWYQNNAGISDDGKAGQANFDGGGWSYSAQALAAeglkAGQPVAWNGFTFTWPNrRPGQLDNVQASGQVVDLPSAP------------------------------------------------------------------------------------------------------------------
|
521 |
+
>UniRef100_A0A2N5XH94_2593676/ 70 0.286 3.213E-09 16 120 233 30 143 151
|
522 |
+
----------------APVSATATASPGTARDASPGSGvapTSADPSPLPLHRLFDNTGTAPDGpvAPGQGGLDGAGNALSRDDLAaagwAPGSRLTLDGTPLTWPDSrPGQPDNVVADGQAVR-PDGHGD----------------------------------------------------------------------------------------------------------------
|
523 |
+
>UniRef100_A0A4Y8YB55_1883/ 61 0.420 3.184E-06 155 221 233 4 72 78
|
524 |
+
-----------------------------------------------------------------------------------------------------------------------------------------------------------WPETAAWFGEPEAFRGSGLRYPRHTQDGHRPAIWQQRVPVTVPGELTALRLPDNPAMHVFALTavLEPG-----------
|
525 |
+
>UniRef100_A0A942KBR5_2478917/ 60 0.275 4.289E-06 34 109 233 642 720 721
|
526 |
+
----------------------------------PVSFEEGVFVALDLGGAFNNDAFSDPSNP-RGNFDnrsgVLGATYPAERAPAENSIVEVSGTLFRFPPTSADANNIALKG---------------------------------------------------------------------------------------------------------------------------
|
527 |
+
>UniRef100_UPI001F0BFC40_2203210/ 57 0.268 4.622E-05 47 108 233 130 196 287
|
528 |
+
-----------------------------------------------LAAAYNSVGVTDESNTAPGDFDGGGNSFSAQKLAdvglSPGAAVTALGAELTWPDvPPGVKDNVSSR----------------------------------------------------------------------------------------------------------------------------
|
529 |
+
>UniRef100_UPI000A7FA51B_860235/ 46 0.307 1.287E-01 33 84 233 723 774 827
|
530 |
+
---------------------------------LPPLPDQGTTVTVDLTAAMDNDAFTNEFHMGDGDFDGTGNTYPAAQLPQTGQ----------------------------------------------------------------------------------------------------------------------------------------------------
|
531 |
+
>UniRef100_A0A1U9KA27_1471761/ 44 0.254 5.502E-01 170 222 233 0 57 146
|
532 |
+
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------MPYRNSPSG-QDGLATYVYYTYVPLDhPDKMVRSVTLPdadtiDKGRIHIFDIAIKYAA----------
|
533 |
+
>UniRef100_UPI001915FEBF_2778368/ 44 0.318 9.823E-01 69 128 233 10 78 79
|
534 |
+
---------------------------------------------------------------------GCGESYSAQALlqapphIAPGNAVVVNGVTLTWPNvAAGSQDNYKTKGQTVPVTPvnGATTQAFLEGSS--------------------------------------------------------------------------------------------------------
|
535 |
+
>UniRef100_A0A2A3HR12_1938860/ 151 0.439 5.047E-37 35 231 233 6 203 212
|
536 |
+
-----------------------------------PQASAPRYRVVELADHRNNRAATRVHTTAAGGFNVWRNSFPAEHLPPGGSQVEVGGVPFSFPPVGEGDDNVRCDGQFIAVPAGRYDWVHLLAAAERRTEDTVELHYADGSVDTEWLRVSDFWAAPAWFGELPAYRTPVMHYPYHVQPGVSAHLWAQRVPVPRRTELAGLRLPRNIAVHVFAATAqEPPPGTAGLPAPD-
|
537 |
+
>UniRef100_C7PY52_479433/ 147 0.267 2.151E-35 47 230 233 1074 1269 1548
|
538 |
+
-----------------------------------------------LPAAFNNDAITNDSNRGGADLDGAGASFSAQALAsvgvTPGAPLVHDGLTFTWPDRQvGQSDNVVAAGQTIDI-SGSGSTLGLLGTSTWGaSSGSGTIAYTDGSTQPYTIAFGDWANGTPPTGGDVAIRAPYGNQP-GNQTGWAATIDYFPITLDATKTVQSITLPpgsaqphgGTPAMHIFAMSIKSDQLSVTAPTA--
|
539 |
+
>UniRef100_A0A1B4ZDD3_1213862/ 141 0.403 1.250E-33 45 228 233 26 210 211
|
540 |
+
---------------------------------------------LDLTAFADNVGVTSPDRLSEGAFNIWGNTFPADELPK-GGPVDIHGIPFRFPAvGTGQPDNVRCAGQFIDVPVGRYDWIHVLAAAERRTEDFVRLHYTDGAVDPEWLRVSDfWPETASRFGESAAVSCTRLHYPRHIQRSMGPTLWRQRVAVPREQDLSAIRLPDNPAIHIFAMTLAPATQPETTQ----
|
541 |
+
>UniRef100_A0A4R0HF04_1124743/ 133 0.394 1.199E-30 25 218 233 35 228 237
|
542 |
+
-------------------------MPDQMPDQMPATSVQARCRAVNLAPHRNNVGSTPATDTRGGAFNIWGNSFPAEELPAPGQFV-VDQVAYDFPPTgRGTADNVRAAGQFIEVPSGRYDWLYVLGAAERRVEDELAFHFADGSVDFEQLRLSDFWAAPGWFGETQVRATRSMHYPFHVQAGVPAMLWSQRVPVTRRAALAAVRLPRNPAVHLFAATL--------------
|
543 |
+
>UniRef100_A0A2T6FTL9_189691/ 98 0.302 1.187E-18 42 219 233 3 182 196
|
544 |
+
------------------------------------------FHAMDLRPYFNNRGFTYESRCGEGRLTMGSSSFPAESV-RFGRMYRFGGIPFRYQTTEDG-DNIETSGQTVALPwmPGTLDCVHALGVSANGDSfDRVSFVAGDRLLHTARLALSDFVSDRPAFGDRLAMTLPYMHMVLGRYAHVRPNLWICSIPyLGEAGAARALVFEDNPSMHIFAMTLE-------------
|
examples/7wux/msa/2/non_pairing.a3m
ADDED
@@ -0,0 +1,436 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
>query
|
2 |
+
MGSSHHHHHHSQDPNSTTTAPPVELWTRDLGSCLHGTLATALIRDGHDPVTVLGAPWEFRRRPGAWSSEEYFFFAEPDSLAGRLALYHPFESTWHRSDGDGVDDLREALAAGVLPIAAVDNFHLPFRPAFHDVHAAHLLVVYRITETEVYVSDAQPPAFQGAIPLADFLASWGSLNPPDDADVFFSASPSGRRWLRTRMTGPVPEPDRHWVGRVIRENVARYRQEPPADTQTGLPGLRRYLDELCALTPGTNAASEALSELYVISWNIQAQSGLHAEFLRAHSVKWRIPELAEAAAGVDAVAHGWTGVRMTGAHSRVWQRHRPAELRGHATALVRRLEAALDLLELAADAVS
|
3 |
+
>B4XYC1 250 1.00 4.608E-69 19 351 352 4 336 337
|
4 |
+
-------------------APPVELWTRDLGSCLHGTLATALIRDGHDPVTVLGAPWEFRRRPGAWSSEEYFFFAEPDSLAGRLALYHPFESTWHRSDGDGVDDLREALAAGVLPIAAVDNFHLPFRPAFHDVHAAHLLVVYRITETEVYVSDAQPPAFQGAIPLADFLASWGSLNPPDDADVFFSASPSGRRWLRTRMTGPVPEPDRHWVGRVIRENVARYRQEPPADTQTGLPGLRRYLDELCALTPGTNAASEALSELYVISWNIQAQSGLHAEFLRAHSVKWRIPELAEAAAGVDAVAHGWTGVRMTGAHSRVWQRHRPAELRGHATALVRRLEAALDLLELAADAVS
|
5 |
+
>SRR5215472_11685424 196 0.515 1.144E-50 1 346 352 63 412 423
|
6 |
+
-GHSRAGRDAGDGAVTAVSIEPLELWTRDLGSCLQAVFATLLLREGQDPVTVLGSAWDFRYEPGDWRSEEFYYPCAGRPLAEALAPLHPLTSVWHRapaDGPDPLRPLRAQLAAGRLPIAAVDNFHLPFRPAFGDVHAAHLLVVAGIDDERgqVLVSDAQPPEFQGPIPVADFLRAWGSANPEDVQDAFFSNSPLDRRWLEVRFTGEFPKPTRAWLAEVLARNVACLRWPDAGQLWSGLPGVERYVDRVLSQ-ACGPGGSAVLEELYVLSWGLQAHADLHAEFLRRHGVAQDLPALREAAEVVDRVAHAWTGFRMTGAHGRLDRPDFGGDLARHGGVLVRAHHEAASALELA-----
|
7 |
+
>3300005562.a:Ga0058697_10000188_20 192 0.458 4.730E-49 22 350 352 9 336 338
|
8 |
+
----------------------VRQWYRDPLSCLQSTLATVLLGAGVEPLPVLGLAWEFLFKPGDVRREEFYYPCRfEGDVARSLAPYHPIRSSWWSPAPgeDPLAELARRVEDGELPVAGVDNYHLPFRPAFRDVHAAHLVVVYAIDreRDEVGVSDAQPPAFRGSIPTEDFLAAWASPNRADAEDAFFSDSEIGRRCLSVEIEGPIAPLDPVRLEAALAENLARF----GAGGWSGLAGLRRYLDDLS--ARAFAGERRPLEELYPFGWGMQAQTYLHGELLREVGADWSFPELAEAGRAVQEVASAWTGLRMTGAHALAAPAAAAAELRRHGNRLRRRYETALERVAQAVESL-
|
9 |
+
>SRR6266536_566964 191 0.460 8.792E-49 21 346 352 170 497 519
|
10 |
+
---------------------PVRLWYRDLVSCLQATFATVLLHAGRHPLAALGAAWGFYCWQGEVTTEEFYYGCRHGSLGASLAPHHPVASRWHAGPDGSdpLAPLRDQVGAARLPIAVGDNYHLPFRPAWHDVHAAHLLVVYGFGDGTVEVSDAMPPAFQGPIPVEDFLRSWGSTNPRDEQDDFFSNAAIGRRWLEVTLGEPWPVEDRPWLRSVLQSNLDDLAGGGDglAGGLLGLPGLRGFLDELLARAGRGDSSVLL--ELYVFGWGVQAQASLHGELLRERGRAWDLPALSEAGRAVESVAHAWTGLRITGAHGRRDPAAVAADLARHGRAVEHAYRHAADLVALA-----
|
11 |
+
>B5HTY9 190 0.432 1.199E-48 20 350 352 14 349 350
|
12 |
+
--------------------PEPELWYRDLISCLQSTFGSVLARAGADPLAVLGAGWRFLHLPGDVRSEEFYYPCPADesggtDLGAALAPHHELHARWWQpaDEDDLWREVRETLAEDRLVIAAVDNFYLPFRPAYQDVHAAHLVIVYGLDETRgvVHVSDAMPPAFRGAVPIEDFLRSWGSANPSDVQDAFFSDSGIGRRCLDVRLDAPAGPLTPELLGGFLRTDVDGFTTATPA--RTGLAGYDEFAAELLDRCRAQDAG--ALRELYPFGWGMQAQASLHGELLRRCGSRWDDPALAAAGRAVESVAHAWTGLRFTGAHGLADPRAAAPDIARHVTRLRGAYACAVDAVGAAAGRL-
|
13 |
+
>A0A1G8BDD9 184 0.361 1.702E-46 22 350 352 7 341 344
|
14 |
+
----------------------LDFWFHDLCSCLQDCFGTLLLRHGQDPVAVMGAAWEFHHAPGPVATEEFYHPAPRPTLGDNLMPHHPVRATWreQEDVESSWQDIRASIIDGRPAIAAVDNFHMPIRPAYGDVHAAHLMVVWGFDDEagEVYVLESTPPQYSGPVSLADFQRARSSANdsRPDTRDYFFAGAGIRGRWIDVTVDGAFPAVEREWVADVVTANARGFAEPAPGPGWSGLTGLTTWLEGVCDRADDIEDAGVALAELYTAGWAAQSAAALHADFLRDMGNRFGCDPLVHAGRQVDRLANQWTPLRILGAHGSTTGHRHTDQLRDRVRRFTAGHRDAVARLEAAAAAL-
|
15 |
+
>GraSoiStandDraft_9_1057307.scaffolds.fasta_scaffold4049564_1 183 0.356 4.309E-46 20 350 352 5 338 339
|
16 |
+
--------------------PGIRPWRHDLTHCLHTTFGVLVGFYGLDPLHVLGAGWGFGYRLDDVRREEYYFPCLDGSLLAGLAPHHGLDSHWHEPEDAAhgWEQVRACVAAGQPVAVAADNFHLPFRPAFSDVHTNHLLAVYGFDDEegTVYVADPVPPRFDGPITVDALTNARDSNNPItHERDMFFTANPIANRWLTVELTGPQPTFDRDFVRRVLLSNLDGFARGKVEGPTMrGVAGLREF---LTACLPRIADEPQRIDEVFIVAGVTLAITGLHADFLADSGRRLGLPALVELGREVDRVAHHWTALRIGVANSRSAPIDDLPALLRRGRNLVADQERVLDRMAQTAAQL-
|
17 |
+
>SRR5581483_8883662 181 0.405 2.024E-45 21 319 352 13 314 353
|
18 |
+
---------------------PMAHWHHDLCSCLQCCMASVMHYYGRDPILTLGAVWDFYYSPEDLRKEEFYSPCRWSGLAESLLPYHPVTSRWHQPDDPevGWLQVKEVVQNGSPVIVAVDNFYLPFRPAYQDIHAGHLILVYGFDEetDQVYVLDSMPPAFIGPIALTDLKASRSSLNPADERDYFFSNAPVANRWLELNIEAPFPQFTEKWVMDVINANLRRFTTPCDGSALSGMSGLARYF-DLLDQNMANPAGHHALEELYVLGWSIQAATALHADFLMQVGKQLNWPRLGSIGRQVAHLAHHWTALRMLGAHGRAHPR--------------------------------
|
19 |
+
>K0K7T6 179 0.411 6.974E-45 21 349 352 4 332 334
|
20 |
+
---------------------QVEQWYRDPVSCVHATLAEVVRHAGAEPLEVLGLGWEFRHLPGDVRPEEYYWPCRvPGDLAGSVLPHHRVRSVWRTaPEPDPLTALTGPLAVGRLPVLAVDNYHLPFRPAYHDVHAAHLILVRDLDLDRgvALVSDAMPPAHRGELPVADLLRAWRSTMPPDEQDVFFSGRGGQARWLEVIVEAVPPPLTPEALRSALRANVDGFQSQGP--ERTGLAGFGEFLAEVVDRSAAGEAAAP--AEVYTFGWSMQAQSAVHGELLRTCGMRWAEPELAEAGRRVEQVAHHWTALRVTGAHGRTDPVAAAPGLHRHGERLRRSYEQAVESLALASAA--
|
21 |
+
>A0A1L7NQD0 179 0.423 1.294E-44 20 350 352 14 344 345
|
22 |
+
--------------------PEPELWYRDLISCLQATCGSVLAREGVDPLDVLGAGWQFLHVPGDVRPEEFYYPCSEGELGRALAPHHDLGARWWQPADENdvWREVREHLADDRLVIAAVDNFHLPFRPAYGDVHAAHLVVIYGLDETRgvVYVSDAMPPAFRGAVPIEEFLRSWGSVNPTDVQDAFFSNSKIGRRCLDVSLGTRRGPLTPELLGGYLRTDIEGFTTETSA--RTGLAGYDAFVGELMERCRAGDA--DALRELYPFGWAMQAQASLHGELLRRRGRDWDDAVLAGAGRAVETVAHAWTGLRFTGAHGHGDPRAAAPDIAHHATLLRAAYARAVDAVGAAAARL-
|
23 |
+
>SRR5919197_4477474 179 0.452 1.294E-44 21 350 352 224 552 553
|
24 |
+
---------------------PPRQWYRDPISCLHSTLAAVVGYEGADPLEVLGLSFEFLYKPGDVRPEEFYFPCRfGDDLARSIAPYPPVRSEWWRaTGEDPLAELAARISNGELPIVAVDNYHLPFRPAYHDVHAAHLVVVYGIDAGRreVSVSDAMPPAFQGAIADGDFLRSWSSPNPRDDQDAFFSDARIDRRCLSVRFETPIPPLDGARLKDALESNSQRLSAG---EGWSGLPGLRRYLDELVGRADAGERRP--LEELYPFGWAMQAQAYVHGELLRVCGADWSVPELREAGRAVESVAYAWTGLWITGAHGLSAPAEATSGLRRHAGRLRRRYEEALEAVEQAVEAL-
|
25 |
+
>SRR5215211_2955382 175 0.442 1.534E-43 31 350 352 1 330 348
|
26 |
+
-------------------------------SCLQATFATLLLHRGHDPLEVLGAHWEFRYRPGDVRPEEFYFPERvAGDLGASLAPHHPVSSRWASGPAErPLDGIAAELRAGRLVIAAVDNFHLPFRPAFGDVHAAHLLVVYGIDERHGQVHlsDAMPPAFAGPIPVEDFMRSWSSQNPMDAQDAFFSDAGIGRRYLRTTVGEPSRDLDPRHVLACNRRLFDGGAQSAgaqpagaqPAGDWTGRSGRARYLDQLGRRARAAD--TRALEEVYPFGWGMQAQASLHGELLGRWGRQHAVPRLREAGRLVEAVAHGWTGLRMTAAHGRTDPVAAAPDLARHASRLRHAYDLAHEALAEAEAAL-
|
27 |
+
>A0A1M5CPU0 173 0.391 7.186E-43 24 343 352 7 328 364
|
28 |
+
------------------------GWYRDPLSCLHVTLAVLLERAGRDPLDALSRDFGFRWIPGDVRGEEFYWPVdEPDDPVAGMAPSAGIRSRWRTAAADPLAALASALAAGRPPIVAVDNFHLPFRPAYHDVHAAHLVVVTALDvaGGRAFVVDHMPPAHAGWLPVEHLLAAWGSVNPPDAQDDFFSGEPIERRWLEVDLGPGLAPLTAADAAASAVENCARFgPLLGAPAPSSGRADLHALAA--AAVAAARDADGGALAEVYTFGWSMQAQAAVHGELLRRTGAAAGDLVLAAAGRKVEAVAHVWTGLRVTAAHGRLDPAAVAPAVGRHAARLVTAYETAFAAL--------
|
29 |
+
>GraSoi013_1_40cm_1032412.scaffolds.fasta_scaffold160357_1 172 0.404 1.814E-42 20 332 352 5 320 339
|
30 |
+
--------------------PGIGHWRHDLTHCLHTTMGVLLGFYGLDPLHALGAGWGFAYPAGDVRREEYYFPTVDRSLLGSLAPHHPLSSSWHEPAGaeQGWHDVREAVSGGVPVAVAVDNFHLPFRPAYSDVHTNHLLAVYGFDDDRheAYVADPVPPRFQGPIPLAALAAARDSANPiRHDRDLFFTANPLANRWLTIHLDGPQPALDLDFVRAAMLRNVAGLADGTAAGaVLRGLAGQRAF---LESALADLDAGEHRVDEVFVVAGAILAVTGLHADFLGAAARRLDRPALAELGREVDRVAHHWTALRIGVANARSRPGAEVPALQARAAAL-------------------
|
31 |
+
>SRR5919197_654015 172 0.393 2.471E-42 42 347 352 1 309 311
|
32 |
+
------------------------------------------AHRQLDPLVVLGSGWTFAYSPGEWAPAEFFYPALDGSLAATLAPHQPLSIVWREpgSPEEAEQELVAALAGGRPSIVAVDNYWLPFRPAFRDVHAAHLVVVYGHDAERgsFAVLDAVPPAFRGWISRETLARARGSDNPAADDEAFFRGAPIANRWLDVDVDGAVSEVEPEWLGRVVAGNLAHMLGPSGETTWTGLAGVRRYVEWLAERAAAEDGEP-ALKEAYGFGWAPQASAALHGAFLHAAGLRLDAPGLREAARTVDLVAHAWTPLRVGAGHAVSDPPAAAGYLARRGRALVTAYEAAIDRLRELA----
|
33 |
+
>A0A1C4Y1U0 170 0.438 1.156E-41 25 350 352 39 368 369
|
34 |
+
-------------------------WYRDPLSCLQTTFAAVVDAAGGDPLEVLGAGWEFRHLPGDVRTEEFYHPSRhPEDLGRSIAPHHHVRSRWTTPaaDRDPLSELADEIAAGRLPIAAVDNFHLPFRPAYHDVHAAHLVVVYGVDlaRDLVLVSDAQPPAFTGPIRAEDFLAAWGSVNPADDQDAFFSATRIDRRYLRVEI-GDSRPLDRDGLRAALRANVDGFTAAPASDGgaWTGLAGLRRYLDEVVAAAAAGD--SATVRDVYPFGWSMQAQACLHGELLRTVGVDRDLPCLREAGRAVEAVGHTWSGLRVTAAHGWPAPQAAAESLEHHGARLQQRYQCAVEAVDRAVGRL-
|
35 |
+
>SRR5919197_910194 169 0.457 2.916E-41 22 346 352 151 470 475
|
36 |
+
----------------------IRQWYRDPVSCLQSTLATVLLAADTEPLPVLGLRWEFLFVPVDVRPEEFYYPCRfADDLARSLAPYHPLRSSWWSPAPEedPLAELARRVEGGELPIAAVDNYHLPFRPAFHDVHAAHLVVVYAVDreKEEVGVSDAQPPAFRGSIPAKDFLAAWGSANPADTEDAFFSDSEIGRRCLSVEIEGPIPPLDAERLRTALEENLAGFVT--------GLADLGRYVDDLE--ARALADERRPLEELYAFGWGMQAQTYLHGELLRETGAGWSVPELAEAGRAVHEVASAWTGLRMTGAHALGARAAAAADLRRHGNRLRRRYEAALEHVAQA-----
|
37 |
+
>SRR5919204_1883335 168 0.472 3.969E-41 22 323 352 124 424 427
|
38 |
+
----------------------VRQWYRDPLSCLQSTLATVLLAADAEPLPVLGLAWDFLFVPGDVRPEEFYYPCRfEGDVARSLAPYHPVRSSWWSPalDEDPLAELARRVEDGELPVAAVDNYHLPFRPAFHDVHAAHLVVVYAVDreRDEVHVSDATPPAFQGAIAAADFLASWSSANPADDEDAFFSDSRIGPRCLAVEIDGPLPPLDRERLRAALCENLARF----GAVGWSGLAGLRRYVDDLTARAQAGERRP--LEELYAFGWGMQAQTYLHGELLRETGAEWSAPELAEAGRAVQATASAWTGLRVTGAHGPAAPGDPGP----------------------------
|
39 |
+
>SRR5919202_365103 167 0.449 1.362E-40 22 350 352 157 480 481
|
40 |
+
----------------------VRQWYRDPVSCLQSTLATVLLAAGAEPLPVLGLRWEFLFVPGDVRPEEFYYPCRfEGDVARSLAPYHPLRSSWWSPaaGEDPLAELARRVEGGELPIAAVDNYHLPFRPAFHDVHAAHLVVVYAVDRERgeVAVSDAQPPAFRGAIPAEDFLAASGSPNPADAEDAFFSDSEIGRRCLSVEIDGPIPSLDAERLRAALAENLT--------DFATSLAELGRYVDDLE--ARALDGERRPLEELYAFGWGMQAQTYLHGELLREMGAGGSVRELAEAGRAVQEVASAWTGLRMTGAHGLAAPAAAAADLRRHGNRLRRRYEAALEHVTRAVESL-
|
41 |
+
>A0A2A2D0D6 158 0.369 8.748E-38 22 348 352 88 416 422
|
42 |
+
----------------------VRPWRHDLGGCLHGCLATLLEHRGVPALPVLGAAWTFRHFPGGVRREEYYYPcADGESLLTALAPHHPVRSVWHEPADAAtgWEQVRDAVAAGTPVAVAVDNYHLPFRPAYRDVHSNHLVVVHGYDEErgTVRVLDAVPPAFHGDITLAELTASRDSGNeLVHERDMFFTGVHIDNRWLSLEldaAPGDFPALDRAEVARVLALNLAHFAAPPDGDACTGLAGQQAF---LEGTVKRLTAGEDIRDELFVAAGAALACTAVHADWLALAGRTLDLPGLVEQARSVERVAHHWSAVRIMAALTRDGGLTP----RRLAGRVHALLKDHESALTALDD---
|
43 |
+
>688.fasta_scaffold2401073_1 156 0.357 7.519E-37 22 308 352 11 304 347
|
44 |
+
----------------------VVSWRHDLVGCLWTSAATILAFHSAPVLPTLGAAWGFRHQPDDLRREEYYYPCKPGvSLYEAIAPYHPVRSRWHEptDAEQGWIQVRDMVLSGEPVVVAADNFYLPFRPAYQDVHTNHLIVLYGFDTERgsATVLDAVPPRFNGEITITEFTAARDSGNPqLHDRDMFFTANPLGNRWLEVEvETAAFPPFDLDMIKYVIRRNLDGFAASPDGGVRDGYSGMAGQAAYLDDLAGRLEVGENVRDELFLVAGAVLANTALHADWLALAAGVIRIPALAEAGRDVERVAHHWTAIR-------------------------------------------
|
45 |
+
>SRR5580658_171216 156 0.357 7.519E-37 22 308 352 56 349 392
|
46 |
+
----------------------VVSWRHDLVGCLWTSAATILAFHSAPVLPTLGAAWGFRHQPDDLRREEYYYPCKPGvSLYEAIAPYHPVRSRWHEptDAEQGWIQVRDMVLSGEPVVVAADNFYLPFRPAYQDVHTNHLIVLYGFDTERgsATVLDAVPPRFNGEITITEFTAARDSGNPqLHDRDMFFTANPLGNRWLEVEvETAAFPPFDLDMIKYVIRRNLDGFAASPDGGVRDGYSGMAGQAAYLDDLAGRLEVGENVRDELFLVAGAVLANTALHADWLALAAGVIRIPALAEAGRDVERVAHHWTAIR-------------------------------------------
|
47 |
+
>SRR3569833_1969934 149 0.341 1.019E-34 22 308 352 167 457 480
|
48 |
+
----------------------VRPWRHDLAGCLHACAGTLLDHQDIPPLDALGAHWGFYYPPGDFRQEEYYFPCRPGtSLLGSLAPYHPISSRWHlpADAEEGWSDVREQRVAGWPTAVGVDNFWLPLRPAHQDVHANHLVIVYGFDDenETVRVMDTVPPRFDGDLPLSVLTAARGSgTEAHHARDMFFADSAIAHRWLEISVNhARRRPPDRPTIAAYLCRNLAGFAAPDDENDHDGLAGLESFLRDMETRLSRGDEIAD-----ELFVVAGAALadTALHADWLAEAGRAVGIPSLTEAGRSVERIAHHWTALR-------------------------------------------
|
49 |
+
>SRR6266536_1184334 149 0.466 1.881E-34 38 286 352 2 252 493
|
50 |
+
--------------------------------------ATVLLHAGREPLAALGAAWGFYCRPGEVTTEEFYYGCRHGSLGASLAPHHPVASRWHAGPDGSdpLAPLRDQVGAARLPIAVVDNYHLPFRPAWHDVHAAHLLVVYGFGDGTVEVSDAMPPAFQGPIPVEDFLRSWGSTNPRDEQDDFFSNAAIGRRWLEVTLGEPWPVEDRPWLRSVLQSNLDDLAGGGDglAGGLLGLPGLRGFLDELLARAGRGDSSALL--ELYVFGWGVQAQASLHGELLRERGRAWG-----------------------------------------------------------------
|
51 |
+
>A0A022MLG6 147 0.352 4.714E-34 23 339 352 23 341 358
|
52 |
+
-----------------------PQWYEDPLSCLQTTLGSILLEHGLRPVEVLGRACEFAFAPDDVMCEEFYRPAqSSRGVAADLCPYH--DVESAWTSGKDADDLIGLIEEHAAVIVAVDNYHLPFRPAYHDVHAAHLVVVpawrrTGGGQLEFYVSDAQPPGFQGWLAAEHLVESWTSGNPTDTQDVFFSSREIGGRVLTVKVRPEPGPLTDDQVVRALRGNLDRWDTgtaGPAEGVWTGRSGLRRFVERLAEHSAD----PARLRPAYTFGWAMQAQAYLHGRFAQECALRTRPGHLAEVAASADRVVSAWSNVRLLSAHAAHTP-GAPELIRRRGEELSHAYEQL------------
|
53 |
+
>SRR6185503_6371475 140 0.318 1.158E-31 25 293 352 68 340 342
|
54 |
+
-------------------------WRHDLVGCLWTCAASILDFYDVPALETLGAAWTFRHCPQDVRREEYYYPCPEGtSLYEAIAPYHPIRSVWHvpADAEEGWQQVRDQVADGTPVVVAVDNFYLPFRPAYQDVHTNHLSIVYGYDEqaGTVRVLDAVPPRFDGDIRIDELTAARNSANPeLHERDMFFTNRPIANRWLEIELdAAAFPPFTLDTVRSTLRRNLQGFYAAASEVEYLGIQGEQEYLASQANWLDKGDDIRDGLFLAAGAAL---ANTALHAEWLALAGRQFYQPRLAEL----------------------------------------------------------
|
55 |
+
>4772|scaffold_11782_c1_3|+1184|00 137 0.296 1.329E-30 22 308 352 7 327 369
|
56 |
+
----------------------VRPWRHDLAGCLHACLGTLLDHAGRRPLEVLGAGWRFYYRLGDLRAEEYYHPCPdGRSLVASVAPGQGISSRWHRpaDAEQGWQQVREQVLAGVPVAVAVDNFELPFRPAYRDVHSNHLVVVYGFDDErgTVRVLDAIPPFFDGDLPLGVLAAARDSGNRsSHERDMFFADNPIGNRWLELVADGAPSAEPRspagylAANLAALRAGASGNPdssagpnsgagpnssaqpnsdaepdRGAEPNSYAGRDGIARFLTDMSDRLADGHSIADELFVVSGVALAGTA---VHADWVGDTGRRLGLPGWPELARRLDRLAHHWSAVR-------------------------------------------
|
57 |
+
>SRR5258708_5638915 137 0.508 1.803E-30 104 347 352 13 257 264
|
58 |
+
--------------------------------------------------------------------------------------------------------VRQARGAGRRPTAAVDNSPLPFRPAFGDVHAAHLLVVNGVDDERGLVHvsDAQPPEFQGPIPVADFLRAWGSTNPEDTQDAFFSNSPLDRRWLEVRFTGDFPVADRGWLAGVMAENVRRLRRPEPGPLWSGLAGVERYVERVLEQ-AAGPGRSDALEELYVLSWGLQAQADLHAQFLRGFGVAEDLPAMREASCLVDGVAHAWTGFRMTAAHGRLDRPDFGQELRWHGRRLVRAHDEAVDAIELAL----
|
59 |
+
>GraSoi2013_115cm_1033766.scaffolds.fasta_scaffold685313_1 134 0.764 1.521E-29 115 305 352 0 190 191
|
60 |
+
-------------------------------------------------------------------------------------------------------------------IAAVDNFHLPFRPAFHDVHAAHLLVVYRITDTDVYVSDAQPPAFQGPIPIADFLRSWDSANPAHDADVFFSSSPSDRRWLHARMRGAGPRTDRGWLAQVIRDNVARFRAGSPDGIETGVEGLRRYLDELCTHEPGTQAADAALSELYVISWNIQAQTGLHAEFLRAQGLHWRIPELAEAAARVDAVAHGWT----------------------------------------------
|
61 |
+
>ERR671931_1062055 130 0.480 2.348E-28 21 243 352 108 326 335
|
62 |
+
---------------------PPRQWYRDPISCLHSTLAAVVGYEGADPLEVLGLSFEFLYKPGDVRPEEFYFPCRfGDDLARSIAPYHPVRSEWWRaTGEDPLAELAARISNGELPIVAVDNYHLPFRPAYHDVHAAHLVVVYGIDreRDEVGVSDAQPPAFRGSVPAEDFLAAWGSANPADTEDAFFSDSEIGRRCLSVEIEGPIPPLDAERLRTALEENLAGFVT--------GLADLGRYVDDL------------------------------------------------------------------------------------------------------------
|
63 |
+
>SRR5439155_7605724 130 0.404 3.181E-28 24 201 352 9 191 192
|
64 |
+
------------------------FWFHDLCSCLHDCIATVLIYQDQDPTLTLGASWEFYYSPADVRREEFYHPLPRPTLAESMMPFHPVRSSWHasDDPDAAWSDVKALVADGQPVIVAVDNFYVPFRPAYGDVHAAHLIVVFGFDDatDEVYVLDSTPPTKRGPMPMSEFLRARYSDNPVSGeRDFFFAGAPIANRWLQLEIGTP------------------------------------------------------------------------------------------------------------------------------------------------------
|
65 |
+
>SRR2546423_1207057 128 0.432 1.071E-27 22 266 352 174 413 417
|
66 |
+
----------------------VSQWYRDPVSCLQSTLATVLLAAGAEPLPVLGLAWEFLFIPGDVRPEEFYYPCRfEGDLARSLAPHHPLRSSWWSPAPgeDPLAELARRVERGELPIAAVDNYHLPFRPAFHDVHAAHLVVVYAVDRERgeVGVSDAQPPAFLGSLPAEDFLAAWSSANPADAEDAFFSDAEIGRRCLAVEIEEPIPPLDGERLRAALAENLACFVT--------GLADLRRYVDDLE--ARALAGERPPLEGLYALGLG-------------------------------------------------------------------------------------
|
67 |
+
>SRR5215471_7738639 128 0.443 1.451E-27 21 193 352 168 343 344
|
68 |
+
---------------------PTRAWRHPLGGCLFAAVGALLWHRGLDPLEALGAAWRFRYRLGDVRREEFYYPCGEDSLVEALAPHHPVRSRWHspSSPAAAWPEVRERLLAGVPVAVVVDNFHVPFRPAFGDVHSSHLVVVTGFDEGTGTVVDAVPPGFQGPLPLAQLALARGSDNRPrHARDMFFAGDPIGARW--------------------------------------------------------------------------------------------------------------------------------------------------------------
|
69 |
+
>SRR5919198_860872 128 0.456 1.966E-27 115 344 352 0 226 920
|
70 |
+
-------------------------------------------------------------------------------------------------------------------VAGGDTRLLPFRPAYHDVHAAHLVVVYGIDAGRreVSVSDAMPPAFQSAIADGDLLRSWSSPNPRDDQDAFFSDARIDRRCLSVRFETPIPPLDGARLKDALESNAQRLSAG---EGWSGLPGLRRYLDEVVGRAGAGERRP--LEELYPFGWAMQAQAYVHGELLRVCGADWSVPELREAGRAVESVAYAWTGLRMTGAHGLSAPAEATSGLRRHAGRLRRRYEEALEAVE-------
|
71 |
+
>ERR1700691_1894743 126 0.322 8.949E-27 4 235 352 55 293 308
|
72 |
+
----HRHRTSGMSRPATALIEEVVSWRHDLVGCLWTSAGTVLGFHGAPVLETLGAAWGFHHFPDDLRREEYYYPCPPGmSLYEAIAPYHPVSSRWHEPADAAqgWAQVRDIVLSGSPVVVAADNFYLPFRPAYQDIHTNHMVVVYGFDEasGTARVLDAVPPRFDGDITMEELTAARDSTNPvLHERDMFFTNRPIANRWLEITVdTAAFPPFNLEPARPTLRRNLPGSAAPAPAEAYAGTAG--------------------------------------------------------------------------------------------------------------------
|
73 |
+
>DeetaT_8_FD_contig_21_3401058_length_202_multi_3_in_0_out_0_1 122 0.400 1.364E-25 24 203 352 9 193 195
|
74 |
+
------------------------FWFHDLCSCLHDCIATVLIYQDQDPTLTLGASWEFYYSPADVRREEFYHPLPRPTLAESMMPFHPVRSSWHasDDPDAAWSDVKALVADGQPVIVAVDNFYVPFRPAYGDVHAAHLIVVFGFDDatDEVYVLDSTPPTKRGPMPISEFLRAIYSDNPVSGeRDFFFADAPSASRSRHLEIGTPSP----------------------------------------------------------------------------------------------------------------------------------------------------
|
75 |
+
>SRR6266704_4620549 119 0.447 1.527E-24 22 189 352 1 172 175
|
76 |
+
----------------------VRMWRHDLGHCLHATMGVLLGCYGADPLSVLGAAWGFGYRSGDLRREEYYYPLGSESLLGVMAPHHPVSSRWHRPPDaeTAWTEVREAVASGRAVAVAVDNFHLPFRPAYRDVHTNHLLTVYGFDDerDEVLLADPVPPRFQGAITRAEFAAARRSVNPEDHDRDLFFTRNP------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
77 |
+
>EndMetStandDraft_4_1072995.scaffolds.fasta_scaffold2302776_1 118 0.370 3.774E-24 108 327 352 0 209 217
|
78 |
+
------------------------------------------------------------------------------------------------------------LASGVPVAVAADNFFLPFRPAYRDVHSNHLLLLYGFDDDagHVYVVDPVPPSYQGPIPLETLSLARGSVNPvRHDRDMFFTANPIANRWLTVRVGPVQPVMDRAFVARAIEANVA----------GFGLPGLRAFlAAALAKLPDDGT----VIDEIFLVAGPLLAVTGLHAEFLDRAGQAFGVSALRELGRRVDRIAHHLSALRIAVASARHDRAAAVPGLRR------------------------
|
79 |
+
>ERR687887_509227 117 0.470 6.896E-24 22 233 352 115 331 332
|
80 |
+
----------------------LRQWYRDPISCLQSTLATVLLAADEEPLPVLGLAWEFLFVPGDVRPEEFYYPCRfEGDVARSLAPYHPLRSSWWSPAPEedPLAELARRAEGGELPIAAVDNYHLPFRPAFHDVHAAHLVVVYAVDrrREEVHVSDATPPAFQGAIAAADFLASWSSTNPADDEDAFFSDSRIGQRCLAVEIDEPLPPFDAERLRTALETNLARFAAEEDDAGRGGL----------------------------------------------------------------------------------------------------------------------
|
81 |
+
>SoiMetStandDraft_5_1073268.scaffolds.fasta_scaffold824170_1 116 0.332 1.702E-23 78 328 352 3 256 278
|
82 |
+
------------------------------------------------------------------------------SLLGSLAPYHPISSRWHlpAGAEQGWSDVREQLVEGRPTAVGVDNFWLPFRPAHQDVHANHLVIVYGFDDEnkTVRVMDTVPPRFDGDLPLSVLTAARGSGNeAHHDRDMFFADSAIAHRWLEISVNhARLRPPDRPTIAAYLCRNLAGFAAPDDENDHDGLVGLESFLRDMETRLSRGDEIAD-----ELFVVAGAALadTALHADWLAEAGRAVGIPPLTEAGRSVERIAHHWTALRIIAALTRKGDVSTARLRRRH-----------------------
|
83 |
+
>SRR5262245_57182801 115 0.435 2.300E-23 31 219 352 2 194 228
|
84 |
+
-------------------------------SCLQATVGSVLAYWGHDPLEILGAGWDFTYIPGEVPFEEFYFPSPPGiDLGQSLAPHHPLSVRWATGVrDDPLGELAGAIEAGQLPIVALDKYHLPFRPAYHDVHAAYLLVVYGVDKRRglVLVSDSTPPHFTGPIAAEPFLAAWQSRNPADEQDAFFSESEIGMRYLVVEPRDRIPRLTQRQLGTALRANAA------------------------------------------------------------------------------------------------------------------------------------
|
85 |
+
>SRR3954469_16970431 113 0.377 1.034E-22 22 243 352 47 268 283
|
86 |
+
----------------------VRQWYRDPVSCLQSTIATLLIEAGWDPVETLGLGWQFPYLPGDVRAEDFYWPCRvPGDPVASMLAHHQVSSSWrQETGSDPLAALEAALDRGQLPVVAVDNFYLPFRPAYHDVHSAHLIVVFDVDrDGTVGISDAMPPAFQGELAVEDLLRSWMSAN-PRENEAFFSGAHIGGRWLDVRLGPAPPRLDPSTLAGAL--LADAVAFRKTDVAGAGLAGLADLSAAI------------------------------------------------------------------------------------------------------------
|
87 |
+
>SRR5262249_26821897 112 0.471 2.547E-22 21 158 352 6 147 149
|
88 |
+
---------------------PIDHWRHDLGHCLHTTAAVLLAYHRLDPVEVLGAGWGFRYPPGDVRREEYYFPGHPDDLFSGLAPSHGVSSRWrpREGAEDGWRQVRAAHADGAPVAVAVDNFHLPFRPAYRDVHTNHLLAVYGFDDDTgqVFVADPVPPSF-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
89 |
+
>SRR5688572_1544331 111 0.420 4.641E-22 56 264 352 0 211 212
|
90 |
+
--------------------------------------------------------WEFLYKPDDVTAEEFYYPCRVDgDIARSVAPYHEITSRWWraESGGSALDELAGCLNAGALPIAVVDNFHLPFRPAYRDVHAAHLLVVYGVDQQRgeVWVSDAMPPAFSCPIATEEFLASWSSENPLHAGDAFFSGSRIDRRCLLMDVRSQRPPLNRHVAHAALRSNLARFARADDPAGWNGRAGLGRFLQQLLERARAAD--PEALAETYRFG---------------------------------------------------------------------------------------
|
91 |
+
>MGYP000725067659 108 0.373 5.102E-21 115 341 352 6 228 245
|
92 |
+
-------------------------------------------------------------------------------------------------------------------IVAVNNYHLPFRPAYHDVHAAHLFVLnrLGTPGGVARVHDPQPPAYRGPLSREVLDIARASLRVGDESDPFIAGANPNWRWLEVRVEGPQPSPSLTWVHAIMVENLDALLRQ-----TQGPAALATFLDSLPNRVR--EHGPRALREIYVLGWPAQAEAGLHARFLASVAARHRKPRLAETARAVDHVANGWTGLRVSAAHASTLSAPtAPADVFRQGATLLRRWRRCMD----------
|
93 |
+
>SRR5262249_47284095 106 0.432 3.067E-20 25 167 352 10 157 159
|
94 |
+
-------------------------WRHELAGCLHACAGALLGFHGIEPLEALGAAWGFHYAPGDARREEYYFPCRPGWsLLRSLAPYHPVGSTWHHPAGdaEGWAEVRSAVASGRPAAVAVDNFHLPFRPAYRDVHSNHLVLVYGFDDDRrlAWVLDAVPPRFDGPIGLHQL----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
95 |
+
>SRR5690242_2363108 104 0.414 1.364E-19 22 147 352 18 145 148
|
96 |
+
----------------------VDHWRHDLTGCLHQSMALLLAHEGLDPLEVLGAAWGFYYDPRDLRREEYYFPTPYESLLSGLAPFHPVRSRWHlpADAAAGWAEVRAAVADGRPVVVAVDNYELPFRPAYQDVHTNHLVTVVGFDDER------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
97 |
+
>SRR5262245_28891133 103 0.492 3.334E-19 21 144 352 23 148 156
|
98 |
+
---------------------PVTVWRHDLAGCLHACAATLLAHHGVDPLDAVGAAWGFTYVPGDVRREEYYFPQAGRSLLESLAPYHPVRSEWHRpgSAEEGRRQLLSQLAEGRLVAVAVDNFHLPFRPAFGDVHTNHMVVVYGYD---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
99 |
+
>SRR5215468_4727865 99 0.449 4.843E-18 21 164 352 181 329 330
|
100 |
+
---------------------PVNAWRHDLVGCLWTSAATMLACHGAPVVETLGAAWGFRHLPGDLRREEYYYPTPPGvSLYQAIAPYHPVRSVWHEpaDAGEGWTQVRDEVLAGRPTVVAADNFYLPFRPAYRDVHTNHLVVVHRLDEaaGTVGVLDAVPPHFDGDITL-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
101 |
+
>SRR5215467_9451402 98 0.413 1.179E-17 161 337 352 0 176 179
|
102 |
+
-----------------------------------------------------------------------------------------------------------------------------------------------------------------PITVADFLRAWSSANPPDPQDVFFSDTRIGRRFMTVTAGEPFPRLDAPMLRRILAANLAGFSGEDASDqaGWTGLPGLKLYLMSLLDACAR--PDPAQLASAYPLGWGMQAQASLHAELLRRWGTQAGLPELREAARLVETAAHAWTGLRMTAAHGRGDPRACAGELARHAAGLRRSYE--------------
|
103 |
+
>ERR1700761_2897183 97 0.376 2.869E-17 96 294 352 0 202 203
|
104 |
+
------------------------------------------------------------------------------------------------TSGKDADDLIGLIEEHAAVIVAVDNYHLPFRPAYHDVHAAHLVVVpawrrTGGGQLEFYVSDAQPPGFQGWLAAEHLVESWTSGNPTDTQDVFFSSREIGGRVLTVKVRPEPGPLTDDQVVRALRGNLDRWDTgtaGPADRVWTGRSGVRRVAERLDEHSAD----PARLRPAYTFGWAMQAQAYLHGRFAQECALRTRPGHLAEVA---------------------------------------------------------
|
105 |
+
>SRR3954453_9001413 95 0.398 9.369E-17 25 153 352 6 138 139
|
106 |
+
-------------------------WRHDLGHCLQTTMGVLLQHHGLDPLQGLGAAWGFHHIPGEVRREEYYFPLHRGGLLESMAPYHPVRSRWHrpEDAGSGWQQVRAAVLAGSPVAVGADNFLLPVRPAYRDVHTNPLLTVYGFDDERdlVLIADP------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
107 |
+
>SRR5213080_3547182 95 0.373 9.369E-17 133 295 352 0 164 167
|
108 |
+
-------------------------------------------------------------------------------------------------------------------------------------HAAHLIVVFGFDDatDEVYVLDSTPPTKRGPMPMSEFLRARYSDNPVSGeRDFFFAGAPIANRWLQLEIGTPSPELTREWVTEVIATNLRRFREPDASPGWAGMAGLTHYLHSICART-LGADAGDALQELYTVGWTAQGSTALHADFLMLAGRRLGWDRLVEVGR--------------------------------------------------------
|
109 |
+
>SRR6266536_4448340 93 0.430 7.400E-16 95 227 352 3 139 147
|
110 |
+
-----------------------------------------------------------------------------------------------RDAAEVWAQVRERLAAGVPVAVAVDNFHLPFRPAYQDVHTNHLVVVHGCDErrGTVRVLDAVPPGFDGAIGLEQLTAARSSANPvRHDRDLFFTDSPIANRWLEVEVArERLPRFGPAGVSSGIQGNLAGFATPPPP----------------------------------------------------------------------------------------------------------------------------
|
111 |
+
>SRR5215216_3959313 92 0.600 9.936E-16 20 139 352 6 123 124
|
112 |
+
--------------------PPVQLWCRDLVSCLQATFATVLLHHGHDPVEVLGAPWQFRYRPGDVRTEEFYFPERAGDLGAAIAPYHPVSSRW--CTGRSLDELDAQLRAGRLVIAAVDNFHLPFRPAFCDVHAAHLLV--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
113 |
+
>SRR5947209_19054265 92 0.427 9.936E-16 106 229 352 0 123 137
|
114 |
+
----------------------------------------------------------------------------------------------------------AAIDAGRPPIVAVDNYHLPFRPAYHDVHAAHLVVVRGYDGDTVQVLDPMPPAFDGPLAGSVLATSRAAATVTDPTDPFFAGSSLRRRWLEVYPTGRQPAWSWAWVQQTLRDNLAALAGPARDDS--------------------------------------------------------------------------------------------------------------------------
|
115 |
+
>ERR671931_2548281 89 0.317 7.793E-15 136 349 352 2 215 219
|
116 |
+
----------------------------------------------------------------------------------------------------------------------------------------HLLTVYGFDDEarHALVADAVPPRYQGPLALSDFVAARGSSNPiRHDRDLFFTANPIAHRWLEVDVPARMPALDAEFLRFVVESNLRGFGARGADDAYEGIEGAHRFLVDALARLGRGEAVVDEV--FIVSGTALAA-AGVHADYLLLAADRFGSFALLEAARAVERVASHWAALRIALAELRDVSAPRLEGIRRRGEALVRDHRSALARLEHALEA--
|
117 |
+
>SRR5205809_3347963 88 0.503 1.880E-14 69 216 352 3 154 156
|
118 |
+
---------------------------------------------------------------------EFYFPCSDDDLGRVLAPHHTLEIvtrRAPDDDARALSMLGERLEAGALPIAAVDNLHLPFRPAFGDLHAAHLLVVFGVdtDADAVWVSDAIPPAFQGPIPSATFLRSWGSANPPDGQDPFFSSSPLERRWYDVRLVD-APADNADWIARAIAD---------------------------------------------------------------------------------------------------------------------------------------
|
119 |
+
>MGYP000871191451 88 0.370 2.521E-14 21 186 352 11 179 182
|
120 |
+
---------------------PLPGWYDDLLSCLQTTIGVSVQAHGWDPVQALAAGWRFSLPAAPVEPVEFYHPA-GDQIGERLCLHHPVQLRWHHPASRAEADagIAAATATGSHSIVAVNNYHLPFRPAYHDVHAAHLFVLnrLGTPGGVARVHDPQPPAYRGPLSREVLDIARASLRVGDESDPFFAG---------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
121 |
+
>ERR1051325_7439640 87 0.568 3.380E-14 110 239 352 0 128 129
|
122 |
+
--------------------------------------------------------------------------------------------------------------AGELPILAVDNYHLPFRPAYHDVHAAHLLVVYGIDAGRgeVFVSDAMPPAFQGAIACDDLLRSWSSPNPRDDQDAFFSDARIDRRGLSVRFEAAAPPLDRDGLERGLRGNAERL---AAEDEWSGLPGLRRY----------------------------------------------------------------------------------------------------------------
|
123 |
+
>SRR5438874_1021933 87 0.443 4.531E-14 126 274 352 1 149 150
|
124 |
+
------------------------------------------------------------------------------------------------------------------------------RPAYRDVHAAHLVVVYGveRDRDEVLVSDAVPPAFRGAIPAETFLRAWSSANPRRDEDGFFTDARIDRRCLSVEVDGPFPRLDPDRACEALRANARGFRDRDDGVGWSGLAGIARYLDWLLERARTGGA--RALEELYPLGWAMQAQAYLH-----------------------------------------------------------------------------
|
125 |
+
>SRR2546423_1612834 86 0.731 8.139E-14 192 351 352 0 159 163
|
126 |
+
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------RWLRAAMRGPVPDTGLDWLGHVIRENVARYRQGSAGGTQTGVLGLRSYLDELYRAVPGTDAAAELLSELYVISWNIQAQAGLHAEFLRREGVRWRIHELAEAAAGVAAVAHGWTGVRMTGAHSRVWGRHRFDELRRHAVELVRRLDNALDQLELAAEVVS
|
127 |
+
>SRR5581483_10036454 86 0.401 1.091E-13 2 131 352 11 141 143
|
128 |
+
--CSRSRPSPTRRPAMRVELQPVRYWRHELGHCLHTAAAVLLAHRGLDPVGVLGAAWGFHY-PGDLRREEYYLPGAAGSLFTALAPYHGIRSVWHRpaDAEPGWQQVRAQVIAGNLVAVAADNYHLPFRPAYRD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
129 |
+
>CryGeyStandDraft_13_1057135.scaffolds.fasta_scaffold438374_1 86 0.343 1.091E-13 183 345 352 9 169 184
|
130 |
+
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------FFAGTEIGGRWIDVEIGEPFPKPTEEWVGTVVAENLRGFVEPPIGDGWAGLAGLRRYLADVGDRADDRRD--PVLGELYTVGWASQSVTALHADFLRSTATALGWPEGLEAARRVEHLASLWTPLRILTAHGSTNGIELGDRLRRRMRALIRHYELAIEEVER------
|
131 |
+
>GraSoiStandDraft_48_1057284.scaffolds.fasta_scaffold433089_1 83 0.408 1.128E-12 82 217 352 7 148 149
|
132 |
+
----------------------------------------------------------------------------------DLAPFHPVRAAWVgpSHPDEAWEQMRAALRDGRPLIAAVDNFHMPNRPAYGDVHAAHLVVVSGFDDDAgmVDVVESTPPAYRGPVPRECFLLAIGSRNDARSATqaVFFAGTEIGGRWIDVEIGEPFPKPTEEWVGTVVAEN--------------------------------------------------------------------------------------------------------------------------------------
|
133 |
+
>SRR5919197_3773559 81 0.555 2.704E-12 115 220 352 4 111 119
|
134 |
+
-------------------------------------------------------------------------------------------------------------------IVAVDNYHLPFRPAYHDVHAAHLVVVYGIDGSRreVSVSDAIPPAFQGAIAADDLLRSWASANPRDDQDAFFSDARIDRRSLSVRFDAPIPPLDLDRLEGALRSNAER-----------------------------------------------------------------------------------------------------------------------------------
|
135 |
+
>SRR5688572_26547208 80 0.366 8.655E-12 138 288 352 1 150 151
|
136 |
+
------------------------------------------------------------------------------------------------------------------------------------------LVVYDVDagAGTVGVSDAMPPAYAGPLASDDLLRAWWSANPQDAQDVFFSGEGIGGRWLDVRLGTPFGALTVERLRDAVRANLRELTEPSDPD-TAGLAGLARFAREVAGRARRGEAG--RLAEVYTFGWSMQAQSALHGELLRRCGSAWSLP---------------------------------------------------------------
|
137 |
+
>SRR5207248_1268395 79 0.514 1.157E-11 22 156 352 61 200 201
|
138 |
+
----------------------VRQWYRDPISCLQSTLATVLLAAGAEPLPVLGLAWELRFQPGRVGREEFAYPCRfEGDVARSLAPYHPIRSSWWSpaDDDDPLVELARRIEGGELPIAAVDNYHLPFRPAFHDVHAAHLVVVYAVDwqRDEVGVSDAQPP---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
139 |
+
>SRR5882724_12307322 79 0.377 1.547E-11 121 308 352 1 192 205
|
140 |
+
-------------------------------------------------------------------------------------------------------------------------YHLPFRPAFHDVHAAHLVVVPGWRRTThgaveFYVSDAQPPQFQGWLSAEHLMNSWTSGNTSDTQDVFFSSREIGGRVLTTKVRSPPDELTTGQVAQALQGNLDRWatgVAGPSDRVWTGRTGLCRFIERLQE----SCDDPEGLRSAYTFGWAMQAQAFLHGRFAQEFAHRSKQTVLLGVAASADRVVSAWSNVR-------------------------------------------
|
141 |
+
>SRR5687768_15959820 78 0.362 4.936E-11 148 296 352 0 146 147
|
142 |
+
----------------------------------------------------------------------------------------------------------------------------------------------------VYLADAMPPVFHGHMPTENFLRAWTSVNPNDEQDAFFSDTAIDRRYLTVELGRRFPKFDPGFLHRVLDANLAGFAVEDDPAGWGGLRGLDRFLDDLVTRSRAGDAAG--VAAAYPFGWGMQAQASLHGELLRGWGARYEVPEVSEAGRL-------------------------------------------------------
|
143 |
+
>SRR4051794_15511058 76 0.471 1.177E-10 114 245 352 0 139 143
|
144 |
+
------------------------------------------------------------------------------------------------------------------VIVAVDNYYLPFRPAYQDVHAAHLVVVPAWRRTpgggvEFYVSDAQPPAFQGWLSAEHLVASWTSGNPSDTQDVFFSSREIGGRVLTTKVRQRPEEPTTEQVVRALRGNLDRWDNgvaGPTDRLWTGRSGLRRFVDRLHE----------------------------------------------------------------------------------------------------------
|
145 |
+
>SRR3712207_6013657 75 0.379 2.801E-10 141 295 352 0 156 162
|
146 |
+
---------------------------------------------------------------------------------------------------------------------------------------------YGFDEeaDEVYVLDSTPPLHKGPITMQDFLAARNSSNPVSGeRDFFFAGAPIANRWLHLEVQSSFPELTRDWVAEVIATNLRRFEASSDGPTFSGMDGLARYLRGVCER-AAGAEGGRALDEMYTVSWVSQAAAGLHADFLMEAGRRLHWYELAEVGR--------------------------------------------------------
|
147 |
+
>SRR5919199_5445869 75 0.276 3.740E-10 90 221 352 4 123 149
|
148 |
+
------------------------------------------------------------------------------------------RWRHEADPGTAWREVRARIAGGRPVALAVDEFHLPFRPGYRHAHARRALVAYGFDDEAelVFVADPTPPRFQGSLALSALAAAREAEDGQP--------------WLELELAGPQRAFGPTVVGEALERNLRRF----------------------------------------------------------------------------------------------------------------------------------
|
149 |
+
>SRR3982074_821911 74 0.432 4.992E-10 24 131 352 26 136 138
|
150 |
+
------------------------PWRHDLATCLQSCMATLLDANGVAALDVLGANWSFYHRPAALRRAEYYFPCREGvSLLASLAPYHPVRSQWHEpaDAEQGWSQVRAAVAVGRPVAVAVDNYELPFRPAYHD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
151 |
+
>SRR5436305_665040 74 0.325 4.992E-10 150 348 352 6 204 208
|
152 |
+
------------------------------------------------------------------------------------------------------------------------------------------------------VSGPVPARSVGCIPRDQLTPARDSGNRsKHERDMFFADQPIGNRWLEAEIdAEHYPAFDRGSIRYAIRRNIDGFTEPGPlgAHRYAGLAGLRAFLDD---SAAKLAAGGQVADEVFVAAGAVLAATALHADWLARAARMLGTIELAEAARQVARVAHHWTAVRIMAALSRDGQVTAPRLARRAANLLADQ-ERALTALGDVLD---
|
153 |
+
>SRR5437870_817573 74 0.401 6.662E-10 53 175 352 1 127 131
|
154 |
+
-----------------------------------------------------GASWEFFYAAEDVRSEEFYHPAPRPTLGASMMPFHPVSTAWHESTDgeAALAEIEAVIAGGRPVIAAVDNFYMPIRPAFGDVHAAHLVVVTGFDEGTgeVVVLEETPPLYHGPIAVSDFLSARGSGN--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
155 |
+
>9576|scaffold_589529_c1_1|-3|11 74 0.299 6.662E-10 102 225 352 3 129 140
|
156 |
+
------------------------------------------------------------------------------------------------------DDAFDCVAQGHAVVVAVDSFELPYRPAWRRVNSGRSLIVTAIDRTTGVaqVIDGWMPHYTGSVALADLARARASSVPQDlRREPLYAGTPLRRRWWRIALASEIPHGGRDNVADALAQLTAQATEAP------------------------------------------------------------------------------------------------------------------------------
|
157 |
+
>SRR5919199_1533857 74 0.376 6.662E-10 196 341 352 9 148 191
|
158 |
+
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------VGGGAPTPPLDSERLEAALAENLDGFAGT----GWSGLAGLARYVDALET--RALAGERLPLEELYAFGWGMQAQTYLHGELLRETGAAWSVAELAEAGRAVQEVASAWTGLRMTGAHALGAPAAAAADLRRHGNRLRRRYEAALE----------
|
159 |
+
>SRR5581483_6019569 73 0.452 1.582E-09 25 127 352 2 107 108
|
160 |
+
-------------------------WRHELSNCLHSCIGVALTRHGFDALVVLGSRWQFYYRPGHLRREEYFWPCPPGvSLAETLMPYHPVSSRWHqpEDAGEGWQQVRDRILAGEPALVAVDNFWLPFRP--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
161 |
+
>SRR4029453_12411764 73 0.453 1.582E-09 20 136 352 87 205 207
|
162 |
+
--------------------PEPELWSRALISCLQATFGSLLVRVGADPLVVLGAAWRFLHLPGDVRFDEFYYPCHDGDLGAALAPHHELRSRWWQPADEDdvWREVRESLADDRLVIAAVDNFPLPFRPAYGDVHAAH-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
163 |
+
>SRR5688572_10121371 72 0.428 2.110E-09 126 239 352 1 126 131
|
164 |
+
------------------------------------------------------------------------------------------------------------------------------RPAYHDVHAAHLIVVGAIDPAAGTadVSDAMPPAYAGPLALDDLLRAWRSTNPQDGQDVFFSGADIGGRWLDVRFDLPFPDLTADRLADMLRANVRGFVAASGADTapdgtaaetcWSGLAGLRRF----------------------------------------------------------------------------------------------------------------
|
165 |
+
>SRR5690348_7358098 72 0.400 2.815E-09 25 123 352 41 140 152
|
166 |
+
-------------------------WRHELTGCLHVAMGVLLKHRGHDPLEVLGASWGFYHDPADLRREEYYFPARRGSLLADLAPYHPVSSRWHaATGAEAWSRVRDTIAAGQPVAVAADNYYL------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
167 |
+
>SRR5258708_3518620 72 0.299 3.753E-09 52 194 352 71 215 216
|
168 |
+
----------------------------------------------------LGARWGFAPPRQEAWGElcEYSLPLGSNPYAATLAARTGLQIAQHSGIAAA--DLSGHLASGAPAIVAVDSFYLPYRPAFGRVHSQRTILVRqGQGRTDLRVEDPWPPTYHGSLGAAHLERARRSTVPLDRRaEPVFAGRPIDFEWW-------------------------------------------------------------------------------------------------------------------------------------------------------------
|
169 |
+
>SRR5437870_3049837 71 0.420 6.672E-09 20 124 352 36 142 144
|
170 |
+
--------------------PEPELWYRDLISCLQATFGSLLLRVGADPLAVLGAGWRFLHLPGDVRSDEFYYPCHDADLGAALAPHHDLRSRWWQPADEDdvWREVRESLGDDRPVIAAVDNFHLP-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
171 |
+
>DeetaT_8_FD_contig_31_513410_length_211_multi_4_in_0_out_0_1 71 0.352 6.672E-09 115 239 352 28 180 181
|
172 |
+
-------------------------------------------------------------------------------------------------------------------VIQADVFHLPFRPAYHDVHAAHTVIVHGFDDEagTVSVLDSMPPAFDGTVSQEdlanargsgnpseeadpffggtisqrDLANARGSGNPFEEADAFFGGTPIAGRWLRFEVVGEMPSLDREWVGRVVTGNLERFAAATDGPALSGLAGVDRF----------------------------------------------------------------------------------------------------------------
|
173 |
+
>SRR5438105_1867171 70 0.453 1.185E-08 210 350 352 1 140 141
|
174 |
+
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------LRHVLRENVRALRAPVTGSEWFGLAGLERFVDRLLTR-AAGPDRRTALEEQYVLSWGLQAQADLHAAFLLAAGGRWGLPCLSEAGCLVDRVAHAWTGLRMTGAHGRLDRPEFGLELRWHAGCLVRAHEEAVDAIELALDEL-
|
175 |
+
>ERR1700687_5501718 70 0.429 1.580E-08 40 156 352 8 127 128
|
176 |
+
----------------------------------------VLQRHGWEPARALGAGWRFVAAKNPVEPVEFYHPA-GEMLADHLCLHHPVLLRWHQPAHDAAAhcDIGESLAHGTAPIVAVNNFHLPFRPAYHDVHAAHLVVVTGYDEhhDNYQILDLMPP---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
177 |
+
>SRR5262249_51622373 70 0.418 1.580E-08 20 115 352 51 148 149
|
178 |
+
--------------------LPVEGWYHDLCSCLQVDAACVLSAFGQEPLAVLGAGWGFQFRPGDWEPVEFYCPAPDGDLARALAPHHRLCCRWHHPDGPeaALAALVEALRAGLPAI--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
179 |
+
>12671|scaffold1085376_1|-94|01 69 0.310 2.105E-08 169 350 352 0 184 190
|
180 |
+
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------AARGSDNPvRHGRDLFFTDNPIGNRWLTIETADVMPAFDQDFVAAAVGRNLADFgdtsTSGASTVSYAGAEGQRRFLGDIADRLAAGDEG--AVDEAFVVAGPVLAQTALHASWLSRAARDFAEPGLAEAARRVERVAHHWSAIRIVVASSRAAPAAAAGPLRRRATALADDHWRALDDIARIIGAV-
|
181 |
+
>SRR4051794_8300831 69 0.350 2.804E-08 4 116 352 6 119 120
|
182 |
+
----HCRQEYGERAPMNVALSPMTFWYHDLCSCLHNTIATVLHYHHQEPTQTLGAVWDFYYAPAQFHKEEYFFPSRHPTLAENLLPYHPISADWRdSSAADSWPAVREAVARGTPAIV-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
183 |
+
>SRR5277367_2659287 68 0.500 6.627E-08 21 123 352 29 136 137
|
184 |
+
---------------------PVQHWCRDLVSCLLATLATVLLRAGHAPLEVLGAHWEFRYRPGDVRTEEFYYPcAVPGDLAASLAPHHPLSSSWREPgagaGTEALAEISDELAAGRLVIAAVDNFYL------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
185 |
+
>SRR5579872_5282090 67 0.350 1.175E-07 136 246 352 1 114 115
|
186 |
+
----------------------------------------------------------------------------------------------------------------------------------------HIILVYGVDQHRglVWLCDPTPPGFNGPLPIGDFQQARNSANPADVQDAFFSASEIRGRYLRIVADPGLAPLSPLELGAALKANLAAFGADSPADgPWTGNIGLRRYLDSVTSH---------------------------------------------------------------------------------------------------------
|
187 |
+
>SRR5205809_7151332 66 0.289 2.082E-07 104 209 352 0 106 107
|
188 |
+
--------------------------------------------------------------------------------------------------------LREYLHAGRTAIVAVDSYYLPYRPAFGHVHSSRTILVRRGDSQAAEVEDVWSPGYSGPLGWDHLERARYSCMPYAAlTEPVFSGCPLSGEWFAVDLEPVMLPDPTVW----------------------------------------------------------------------------------------------------------------------------------------------
|
189 |
+
>SRR5919204_5113640 66 0.452 2.082E-07 21 124 352 194 299 301
|
190 |
+
---------------------PVRLWCRDTISCLHATLATVADHHGWSPLSAVGPLWEFRYFPGDDRGEEFYYPLCGRPPGEALMPYHPvsIEWRAAKSARQGLAELRVALAEDVLPIVAVDNYHLP-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
191 |
+
>SRR5690242_5973043 66 0.305 2.771E-07 69 200 352 4 135 139
|
192 |
+
---------------------------------------------------------------------EYSLPGGVLSFSERIASRTGVKIVQQDGAHSS--SLYSFLATGQTAIAVVDSFHLPYRPAFGRVHSHRTILVRqGLDPTDVLVEDEWPPAYHGPVPVRSLEAARYSAVPLDPvREPVFAGGKIRGEWFHLEMDG-------------------------------------------------------------------------------------------------------------------------------------------------------
|
193 |
+
>SRR5882724_5907471 66 0.394 2.771E-07 20 121 352 93 196 197
|
194 |
+
--------------------PEPELWCRDLISCLQATFGTLLARLGADPLAVLGAGWRFLHLPGDVRFDEFYYPCPDADLGAALAPHHELRARWWQPADEDdvWREVRESLVDDRPVIAAVDNF--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
195 |
+
>SRR5919198_2188180 65 0.556 3.688E-07 115 193 352 5 83 99
|
196 |
+
-------------------------------------------------------------------------------------------------------------------IVAVDNYHLPFRPAFRDVHAGHLVVLCAVEDDAVVVSDALSPAFQGRVALADFARSWESANGARGESGFFSGSPIGGAL--------------------------------------------------------------------------------------------------------------------------------------------------------------
|
197 |
+
>SRR4051794_17814528 65 0.418 3.688E-07 103 226 352 0 128 136
|
198 |
+
-------------------------------------------------------------------------------------------------------ELIELIEAHASVIVAVDNYHLPFRPAYHDVHAAHLIVVpawrRTVDGDvEFYVSDAQPPRFQGWLSEEHVMNACTSGNPADHQDVFFSSQAIGGRVLTVQVRSQPAELTVDRFTEAIAGNVQRWITSEG-----------------------------------------------------------------------------------------------------------------------------
|
199 |
+
>ERR1700744_2712043 65 0.342 4.908E-07 21 124 352 5 108 109
|
200 |
+
---------------------PVP-WYRPAINCLAAGVGTVLATAGHDPLDILGSGWDFTHVPGQLPFEEFYWSVTEAsDLGSRLAPHHRVTVEWSDDDAGDLSGLMSELEQDRIPVAAVDKYFLP-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
201 |
+
>ERR1051325_11437074 64 0.327 1.156E-06 62 179 352 4 120 127
|
202 |
+
--------------------------------------------------------------PLRSQLVEYSLPFGPSSFIEALLFRTGLRICSHTGPDP--NNLRRYLAEGGEAIIAVDSFLLPYRPAFGRVHSHRTIIVRkGQNGDEVWVEDAWPPSYEGSLPLAVIENAQHSQVSLDP----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
203 |
+
>SRR6266404_4313130 63 0.313 1.537E-06 100 197 352 2 100 112
|
204 |
+
----------------------------------------------------------------------------------------------------SESELRQHLALSEPAIVAVDSFYLPYRPAFGKVHSHRTLLVVSSREDLVWVEDDWPPCYQGPLQWSCLQRARYGDVPLQPRFePVFAGGTLSGIWFSVR----------------------------------------------------------------------------------------------------------------------------------------------------------
|
205 |
+
>MGYP000377277604 63 0.341 2.044E-06 100 175 352 8 89 135
|
206 |
+
----------------------------------------------------------------------------------------------------SVDDLRAMLDAGKPVIIGLDSDDLygaGDAPFTDDVVAGHAVVITGIDDETglVYINDPGFPDGAGVaIPLETFEDAWQDAD--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
207 |
+
>SRR5690242_18701050 63 0.616 2.044E-06 1 105 352 51 157 158
|
208 |
+
-GDRHRRAPRGRRAGMIETAPQVQLRPRDLRRCLHGSLATALLHDGHDPVTVLGAPWEFRRRAGAWTTEEYYYLPEPDSLAKRLAPYHPFESTWHrgDGPGDPLDELR------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
209 |
+
>DeetaT_20_FD_contig_41_1501015_length_280_multi_3_in_0_out_0_1 63 0.335 2.044E-06 188 348 352 0 159 163
|
210 |
+
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------PIGNRWLEAEIdAEHYPAFDRGSIRYAIRRNIDGFTEPGPlgAHSYAGLAGLRAFLDD---SAAKLAAGGQVADEVFVAAGAVLAATALHADWLARAARVLGTIELAEAARQVERVAHHWTAVRIMAALSRDGQVTAPRLARRAANLLADQ-ERALTALGDVLD---
|
211 |
+
>SRR5256714_1899555 63 0.418 2.044E-06 232 348 352 2 116 218
|
212 |
+
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GPAALRRYVDDLE--ARALAGERRPLEELYAFGWGMQAQTYLHGELLRETGAGWSVAELAEAGRGVQEVASAWTGLRMTGAHGLGAPAVAAADLRPHGNPLRRRHQEALQRVGRAGG---
|
213 |
+
>SRR4051794_37091874 63 0.333 2.718E-06 189 323 352 1 133 134
|
214 |
+
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------IDRRCLLMHVQSVQPPLQRDAVGAALRSNLARFARSDAPEGWNGRAGIDRFLEQLVERARAGD--RQALSDAYRFGWSMQAQSYLHGELLRTKGREWQVAALSEAGRVVESVASAWSGLRVTAAHGRNAPAGVAA----------------------------
|
215 |
+
>SRR5215218_1112994 62 0.319 3.614E-06 204 344 352 0 139 145
|
216 |
+
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------PLTREWLTEVIATNLRRFREPDAGAGWAGMAGLTHYLQSICER-SLGPDAGDALQELYTVGWTAQGSTALHADFLMLAGRQLGWDRLVEVGRHVDRLANEWTALRMFGAHGFTRPAEIAERLRRRTIIFLSNYEQTLQLLE-------
|
217 |
+
>SRR5215216_6411204 61 0.281 8.489E-06 101 171 352 27 93 106
|
218 |
+
-----------------------------------------------------------------------------------------------------LEQLIAHLETGLPAIAFVNTAHLSHW----NHETGHAVVVIGMDEQSVFIHDPAIDEPAKAIPIPEFEAAW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
219 |
+
>MGYP000919097685 60 0.303 1.499E-05 100 171 352 5 83 138
|
220 |
+
----------------------------------------------------------------------------------------------------TVDDLRAALDAGEPVIVGIDSadvYSGGGGPFDPGMESGHAVVVTGIDngpPGVLYINDPGFPdGAGVEIPLELFEDAW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
221 |
+
>SRR5205814_1734864 60 0.336 1.992E-05 136 243 352 0 112 114
|
222 |
+
----------------------------------------------------------------------------------------------------------------------------------------HLVTVVGFDDERgeAFVIDPVPPRFSGAIPVEVLTAARSSGNPiVGERDLFFTGNPIGNRWLDIEVEADVPAHDLDFVREVVACNRDAFEATSESGSPvrTGLSGERRFLDSI------------------------------------------------------------------------------------------------------------
|
223 |
+
>ERR1700722_17685023 59 0.402 2.647E-05 25 118 352 15 111 112
|
224 |
+
-------------------------WCRDLISCLQSTFAAVLDRAGEDPLAVLGAGWQFRQIPGDVRPEEFYYPCSgEGDLGAALAPRHALHSRWWQpaDPDDPWQEIRAELDEDRLVIAAV-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
225 |
+
>SRR5690348_3972261 59 0.380 2.647E-05 19 115 352 22 121 122
|
226 |
+
-------------------APPLPRWYRDYVSCLQSTMATLLLAAGEDPVDTLGLGWEFLHIEGDVRAEEFYWPCRkPGDVAGSLLPHHAATSRWLQaDADDPLAPLEAALAEGRlPVL--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
227 |
+
>MGYP000822340748 59 0.274 2.647E-05 113 200 352 49 137 210
|
228 |
+
-----------------------------------------------------------------------------------------------------------------PIIVGADTYFLPYSANHKKRHAKHTLILCGFDltRNVVYVIDWYsPWFYKGEIDIEIFLQARNSKNEKD--DSFYSGTPIKNNWAYIEKIP-------------------------------------------------------------------------------------------------------------------------------------------------------
|
229 |
+
>SRR5215831_14536786 59 0.487 3.516E-05 230 350 352 10 129 138
|
230 |
+
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------WSGLRGVERYVDHVLSKAP-GPDGLAVLEELYVLSWGLQAQADLHAEFLRRHGVAQDLPALREAAAVVDRVAHAWTGFRMTGAHGRLDRPDFGGELAWHGSVLVRAHHEAADALELALEEL-
|
231 |
+
>OM-RGC.v1.007693089 59 0.259 4.671E-05 101 171 352 0 76 148
|
232 |
+
-----------------------------------------------------------------------------------------------------LDELRTLLDADTPVIIGLDAddlYGTGDSPFADDLVSGHAVVITGIDDEAglVYINDPGFPDGAGVaISISEFEDAW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
233 |
+
>SRR6185436_5620841 58 0.392 6.204E-05 136 244 352 2 113 117
|
234 |
+
----------------------------------------------------------------------------------------------------------------------------------------HLVVTYGFDEeaDEVYVLDSTPPVFQGAIAVRDFLAARGSTVPREGeQDFFFSGTPIANRWLHLEVGADFPELTREWVSEVVAAHVRGFRATRETASGAALSGLAGLGRYLR-----------------------------------------------------------------------------------------------------------
|
235 |
+
>SRR5262245_38567583 58 0.340 8.239E-05 158 290 352 1 132 133
|
236 |
+
--------------------------------------------------------------------------------------------------------------------------------------------------------------FQGPIPLAALTAARDSANPvRHERDLFFTANPLANRWLTIHLHGPQPVLDLDLVRRALLGNATGLLDGAADgDRLRGLAGQRWF---LQSLLADLAAGAHRVDEVFIVAGALLATTGLHADFLAAAASRLDRPEL-------------------------------------------------------------
|
237 |
+
>3300027819.a:Ga0209514_10000001_1266 58 0.188 8.239E-05 94 171 352 222 311 431
|
238 |
+
----------------------------------------------------------------------------------------------FSSVSEAIRQLEIVINSGRPIMVHLDSYYVKeefaktsefWKNNVGDSHSSHFMVVTGYNESHVYINDPTEPNlsiKNVEVPIEIFKEAW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
239 |
+
>SRR5688500_5242141 57 0.301 1.094E-04 100 181 352 29 110 113
|
240 |
+
----------------------------------------------------------------------------------------------------SLGKLRQALTSGPVLVGPLDMSQLTYMPNHERlVGADHFVVLYGIDHEHVFLHDPAGFAY-VALPLNDFRRAWRAENIEYRPD--------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
241 |
+
>SRR4051794_22847671 57 0.238 1.453E-04 100 175 352 10 96 106
|
242 |
+
----------------------------------------------------------------------------------------------------SLDGIKSEIAQGRPVIVLVDNSRyirseggkqVPY-PSGQGFEAPHIVVVTGYDADNVYLNDPlaiTVSGKDFRVPTASFQAAAGAKG--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
243 |
+
>SRR5512142_3069760 56 0.355 3.400E-04 180 286 352 2 106 108
|
244 |
+
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------RDYFFAGSQIGCRWIDVKLGDPFPKPTTAWVGEVIAENLRGFVEPSVPDRRIGLAGLRWYLTDLSERVDSED--GNVLGELYTVGWAAQSVTALHADFLRQTASAFG-----------------------------------------------------------------
|
245 |
+
>SRR5215510_68064 55 0.326 4.513E-04 148 245 352 4 107 115
|
246 |
+
----------------------------------------------------------------------------------------------------------------------------------------------------IYVCDSMPPRFRGPIPITDFINAWSSINQRTENDEVFSGAGIDRRWLSVELGAAFPAVDAAFLCRTLAANREQWHRTTPPqsaserDSWTGAQGLWQFLDALLE----------------------------------------------------------------------------------------------------------
|
247 |
+
>SRR5215472_923646 55 0.569 4.513E-04 21 99 352 76 154 157
|
248 |
+
---------------------PVQLWTRDLGSCLQAVFATLLLRERQDPVTVLGAAWDFRYEPGDWRSEEFYYPCAGRPLAEALAPLHPLTSTWHRAPTD------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
249 |
+
>SRR5262249_44885629 55 0.361 5.989E-04 26 116 352 65 158 160
|
250 |
+
--------------------------RHDLAGCLPACAASLLGARGLDPLETLGASWSSLHARGEVRRVEYSSPSPPGaSLLGALAPYHPTRSRWHEpaDAEQGWAQVRAAVAAGTPVAV-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
251 |
+
>MGYP000859423510 55 0.312 7.949E-04 95 171 352 82 161 324
|
252 |
+
-----------------------------------------------------------------------------------------------PSGDNGLYDVLQIVNDGVPCIVNVDGYFLSYHPLFKTEHENHAAVLYGYNMNlhTIYISDYMPPYfFNGEISISEFLEAR------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
253 |
+
>SRR5580693_9259363 53 0.469 1.857E-03 21 100 352 23 103 111
|
254 |
+
---------------------PIQHWCRDLVSCLQATFATVLAHAGHDPLDALGAHWEFRYRPGDVRTEEFYYPcAVPGDLAASLAPHHPLSSSWRQPAEGT-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
255 |
+
>SRR5690349_8663926 53 0.305 1.857E-03 92 173 352 44 126 150
|
256 |
+
--------------------------------------------------------------------------------------------TSFPDFRQASDYLKELLARRKLVFVWGDEYYLPYRkEAFHAIHSTHSFVVTDYDGENkaYYVEDWD--GLYGYLPAAHVEAAFDS----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
257 |
+
>SRR6266851_2378427 53 0.386 2.464E-03 127 224 352 0 100 101
|
258 |
+
-------------------------------------------------------------------------------------------------------------------------------PAFGDVHTNHLLSVFGLDEERgcAYVVDDIPPAFRGWIPIAMLDAARGSANrAEHGRDRFFTNEAIRWRWLEIRCSAQLPRLSDEAIDAVLGQNLAGFRQP-------------------------------------------------------------------------------------------------------------------------------
|
259 |
+
>SaaInlV_150m_DNA_4_1039716.scaffolds.fasta_scaffold159517_1 53 0.262 2.464E-03 85 175 352 0 99 177
|
260 |
+
-------------------------------------------------------------------------------------EHYGVD--THQGAGAGIESLMSELVQGHAVIVAVDADDM-WNPGslfrglFGEDGADHAVVVTGLdlsdpDHPQVYINDPgDPNGAGKAYPLEQFLAAWSDSG--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
261 |
+
>SRR5919198_383436 53 0.382 2.464E-03 22 113 352 254 347 351
|
262 |
+
----------------------VEPWYDDLASCLHSTLASVVAQRQLDPLVVLGSGWTFAYSPGEWAPAEFFYPALDGSLAATLAPHqpLSIAWREPGSPEEAEQELVAALAGGGP----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
263 |
+
>SRR5436853_446621 53 0.283 3.268E-03 136 238 352 0 97 123
|
264 |
+
----------------------------------------------------------------------------------------------------------------------------------------HLLTVYGFDDDagTALLADPVPPSFAGPVPRSLLAAARDSDNPvLHERDMFFTANPIANRWLDVRVGPVQPAFGPAFVRDAVAAAR--------PDPPSAAASLRR-----------------------------------------------------------------------------------------------------------------
|
265 |
+
>SRR5215204_5524451 52 0.283 4.335E-03 103 162 352 1 59 117
|
266 |
+
-------------------------------------------------------------------------------------------------------HLVDKLDKGMPVCLPVDIYFLPFTSHFQRLHMAHYVNVFGYNDTQYYIISPYY-RYQGWV---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
267 |
+
>SRR3954452_13515641 52 0.349 4.335E-03 219 341 352 0 119 120
|
268 |
+
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------AGMLAISTETAWTGMNGLDRYLTRLT--VRAEEKDADALADVYPFGWAMQAQAYMHAELLRACAVRWRRPVLGEAARRLAAVSHAWTGVPISAGHGGAD-TRVAETVERHCRRLRRCHEQAVE----------
|
269 |
+
>SRR5271163_123213 52 0.265 5.750E-03 100 175 352 2 79 120
|
270 |
+
----------------------------------------------------------------------------------------------------SVDDLQQYLDQGRSVIVSVDPDPI-WYPGQPDQGEGHAVMITAIDETTgmVTLDDTgSPQGNEEQVPISEFQQAWAEHD--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
271 |
+
>GraSoiStandDraft_4_1057263.scaffolds.fasta_scaffold8820481_1 52 0.270 5.750E-03 99 171 352 122 191 210
|
272 |
+
---------------------------------------------------------------------------------------------------PTLKTIRDFLRKGYIAIVNVNYYPLYRQPGY----SGHFVIVLSIDKRSVRLHDPGLPRKdNMRIPLRQFLAAW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
273 |
+
>SRR3990172_4137688 52 0.270 5.750E-03 99 171 352 128 197 216
|
274 |
+
---------------------------------------------------------------------------------------------------PTLKTIRDFLRKGYIAIVNVNYYPLYRQPGY----SGHFVIVLSIDKRSVRLHDPGLPRKdNMRIPLRQFLAAW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
275 |
+
>SRR5215467_2164624 51 0.316 7.626E-03 152 289 352 0 137 138
|
276 |
+
--------------------------------------------------------------------------------------------------------------------------------------------------------DASPPSFKGSIPIHALQAARGSENPAiHNRDLFFTNVPIANRWLEVEIGNTFPCITKQWVKEVLSTNTQRFAAPTSEVALSGTEGMVRYFGDF-GLDQVRERGTPAMDELVLVSVAFQEATALHADFLRAMGQRLDWRE--------------------------------------------------------------
|
277 |
+
>MGYP000895684420 51 0.301 7.626E-03 100 171 352 80 151 152
|
278 |
+
----------------------------------------------------------------------------------------------------SLARLRGLVARGPVVVGPLDMSKLTYIPGHEHlVGADHFVLVHDVTDEEVFMHDPGGSPY-VSLPIADFLAAW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
279 |
+
>SRR5919197_1036140 51 0.415 7.626E-03 22 119 352 166 266 267
|
280 |
+
----------------------VRQWYRDPISCLQSTLATVLLAAGAEPLPVLGLAWEFRFQRGRVGREEFAYPCRfEGDVARSLAPYHPLRSSWWSPalDEDPLGELARRVEDGELPVAAVD----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
281 |
+
>2286|Ga0268252_1295275_1|+2|10 51 0.373 1.011E-02 274 348 352 2 76 87
|
282 |
+
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------HGELLRWCGRRWIDAALAEAGRRVERVAHLWTGVRVVCAHLRDDPAPHGRTLARHTNRLQHAYEDAVEAVARAAG---
|
283 |
+
>SRR5947208_11376747 51 0.406 1.011E-02 226 343 352 2 118 122
|
284 |
+
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------PARGRAGLAGLACYLDGICER-AAGPHGRAALDEIYTVGWQMQAATALHADFLRLAGRSLDWDELAEAGRQVDRLANAWTPLRIMGAHAGTRDLDVTGELRRRATRLVADHELTLELL--------
|
285 |
+
>ERR671937_651557 51 0.397 1.011E-02 22 111 352 66 158 160
|
286 |
+
----------------------VRQWYRDPVSCLQSTLATVVLAAGAEPLPVLGLGWEFLFEPGDVRPEEFYYPCRfEGDLARSLAPYHPIRSRWWspEPDEDPLAELARRVEAG------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
287 |
+
>SRR5215472_1733542 51 0.384 1.341E-02 156 269 352 0 114 118
|
288 |
+
------------------------------------------------------------------------------------------------------------------------------------------------------------PGFDGPMASRDFLLAWGSANPADTRDAFFSDSGIGRRCLITRVTGPgRGPVGRDGLAAALRANLNDLTAPlaTSQLPWTGLAGLHRYLDDLRQRAAAGD--QEVLADAYAFGWPMQA----------------------------------------------------------------------------------
|
289 |
+
>SRR4051812_10767532 51 0.393 1.341E-02 20 106 352 55 143 145
|
290 |
+
--------------------PEPELWYRDLISCLQATFGSLLVRDGADPLVVLGAGWRFLHLPGDVRFDEFYYPCRDGDLGAALAPHHDLHSRWWQPGDEGdvWREIRE-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
291 |
+
>MGYP000929587384 50 0.285 1.778E-02 115 172 352 1 63 84
|
292 |
+
-------------------------------------------------------------------------------------------------------------------IVPVDTALLPYWLTRSDvpeteRGTDHAVVVVAVDEQHVYVNDPDFAQAPQVVELGWFLDAWR-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
293 |
+
>SRR5713101_4123152 50 0.337 1.778E-02 52 140 352 9 92 103
|
294 |
+
----------------------------------------------------LGARWSFGFPRDDRELQEYTL---PDYLAAKLRTRTGIVIRSHSDPAGL--ELRRHLADEQPAAIAVDSFYLPYRPAFQRVHSSRTILV-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
295 |
+
>SRR5206468_3369128 50 0.358 1.778E-02 224 343 352 7 123 124
|
296 |
+
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GEPAPAYRGLAGLRGF---LTAVADALAEGRPASDEGFVVAGPILAITGLHADWLARAARMFDDPRLLEAARVVERVAHHWTAFRIAVATARADPPGAAESLRARGRALVDDHERALALL--------
|
297 |
+
>SRR5437879_4950898 50 0.301 2.358E-02 141 239 352 1 106 113
|
298 |
+
---------------------------------------------------------------------------------------------------------------------------------------------YGVDDGHklVWVLDAIPPAFQGPISRATFDAGRDSSNRsRQERDMFFADNPIANRWLELDIEPDRNADgalgDRDRLRQILEGNLRGFTQERKSGPFEGVRGVDRF----------------------------------------------------------------------------------------------------------------
|
299 |
+
>GraSoiStandDraft_27_1057306.scaffolds.fasta_scaffold00573_2 50 0.252 2.358E-02 101 171 352 0 77 154
|
300 |
+
-----------------------------------------------------------------------------------------------------LDDLKKFIDNGQPPIALVKYANLPDRVDKGST-GGQYVVVVGYDDaaQNIFINDPDMfpwnhaAGFQKAYPYQTWLSAW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
301 |
+
>A0A2H2XNW3 50 0.271 2.358E-02 101 180 352 125 201 208
|
302 |
+
-----------------------------------------------------------------------------------------------------IEDIRQLLACGYLIICLVNWCKLNAHIGYE----GHYVLVYDISETHIYLHDPGLPTYiSQALTITEFEQAWAAPTQRDRN---------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
303 |
+
>SRR3954466_6934608 49 0.329 3.126E-02 88 175 352 0 87 132
|
304 |
+
----------------------------------------------------------------------------------------GIPVSIENSD---VSGLIDALDSGKGIIVAVDSGEYWTGEATEDNAPDHAVVVAAIDEENgiVYLSDTGtPDGNMLAVPLDAFLDAWGDSG--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
305 |
+
>ERR1700752_922602 49 0.304 3.126E-02 100 175 352 3 84 182
|
306 |
+
----------------------------------------------------------------------------------------------------TLTDLETYLDEGRSIILGVDSsdiWNEPEDPAEPVDQADHALVITAIDKEQGLVvlSDPgDPDGNQSIVPLSDFTEAWSDSG--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
307 |
+
>SRR2546423_3336499 49 0.373 3.126E-02 24 111 352 176 266 268
|
308 |
+
------------------------QWYRDPISCLQSTLATVLLAAGVEPLPVLGLAWEFLFRPGRIGREEFAYPCRfEGDVARSLAPYHPLRSHWWSPAPEedALAELARRVERG------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
309 |
+
>SRR5947209_18303409 49 0.482 4.144E-02 20 75 352 68 123 125
|
310 |
+
--------------------PGVEFWRHDLSSCLQDCLATLLVQRRHDPVPALGAAWDFYYPPGDLRPEEYYYPCR------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
311 |
+
>SRR6185503_18933790 49 0.450 4.144E-02 20 99 352 52 131 136
|
312 |
+
--------------------PEPELWYRDLISCLQATFGSLLLRAGADPLTVLGAGWRFLHVPGDVRSEEFYYPAEDADLGAALAPHHELHSRWWQPADE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
313 |
+
>ERR1700687_2983525 49 0.379 5.494E-02 232 318 352 22 106 109
|
314 |
+
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GLAALADLLADLASRLEREGAWP--LRDIYVLGWSAQAEASLHSRFLAGAARDLQRPDVAEAARWVDAVAHAWTGFRVAAAHGAAAP---------------------------------
|
315 |
+
>SRR5579862_9683268 49 0.357 5.494E-02 213 337 352 4 127 133
|
316 |
+
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ALKANLAAFSADSPADaPWTGNIGLRRYLDHVISHAEA--GHADAVRQVYPFAWSAQASAAMHGELLRTRGGQWRIPELTEAGRAVEAVASAWTPVRVLAAHGWQSPASIASSLRSYGIDLARQYR--------------
|
317 |
+
>SRR5215471_6261074 49 0.308 5.494E-02 197 327 352 0 128 147
|
318 |
+
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------EVTGPARALDRVFLRRVLADNGRRFRSGSGAGPeLTGLAGLERF----CEWaAAALAAAQPVVDEVFVVAGVVLADTALHADYLAHAAARLGAARLLEAARRVERLAHHWTAVRIAVATARGAEPTAAPRLGR------------------------
|
319 |
+
>SRR5213076_2182558 48 0.551 7.282E-02 115 161 352 0 48 116
|
320 |
+
-------------------------------------------------------------------------------------------------------------------IVAVDNYHLPFRPAYGDVHTNHLVVVYGFDDEagEVHVLDSKPPRYRGP----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
321 |
+
>SRR5207237_9007299 48 0.259 7.282E-02 100 171 352 27 106 120
|
322 |
+
----------------------------------------------------------------------------------------------------GVDRLKAEVAAGNPVVVWITAGkYVQRTPvvasyngeTFKLVAGEHAVVVYGYDSGGVYIMDVSNGSFTYT-EWSSFLTRW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
323 |
+
>4958|scaffold10375_5|+5204|00 48 0.285 7.282E-02 95 175 352 87 169 196
|
324 |
+
-----------------------------------------------------------------------------------------------SSPDKAWEANKTAIKT-KPIIVLTDIYHLKFRNEYLKQHGAHFIILFYYDElsNSVGVLDWYePHFYKGEILLNDFLSARYSEN--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
325 |
+
>EndMetStandDraft_8_1072994.scaffolds.fasta_scaffold5616288_1 48 0.285 7.282E-02 95 175 352 87 169 196
|
326 |
+
-----------------------------------------------------------------------------------------------SSPDKAWEANKTAIKT-KPIIVLTDIYHLKFRNEYLKQHGAHFIILFYYDElsNSVGVLDWYePHFYKGEILLNDFLSARYSEN--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
327 |
+
>SRR5262249_50530649 48 0.330 7.282E-02 1 105 352 93 204 205
|
328 |
+
-GGGRGMTSDVTARTYMIIGPEPELWYRDLVSCLRATFGSLLARACADPLTVLGAGWRFLHLPGDVRSEEFYYPCPADesggaDLGAALAPHHGLHSRWWQpaDEDDLWREVR------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
329 |
+
>SRR5437667_7529899 48 0.381 9.652E-02 235 344 352 0 108 114
|
330 |
+
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GLRDYLAGVAARARDARD-PGVMDELYVAGWAAQQSAGLHADFLAAAGCRLGRTRLAEAGREVDRLAHHWSDLRMLGAHCRAEPAAAASRIRRRAAQLVTDTERVLAQLE-------
|
331 |
+
>SRR2546430_2482901 48 0.346 9.652E-02 146 269 352 4 127 134
|
332 |
+
--------------------------------------------------------------------------------------------------------------------------------------------------GKVRVLDEVPPRFDGEIELAELAAARHSVNPiVHGRDKFFTANPIAGRWLSVRATASPPytVDDRERISALLRANIERFDQ-SQADVYSGLSGQARFLAEGARRLDTDPAAKDEL--FVVAGTALAA----------------------------------------------------------------------------------
|
333 |
+
>SRR4030042_1077736 48 0.232 9.652E-02 117 172 352 7 59 215
|
334 |
+
---------------------------------------------------------------------------------------------------------------------WRDRTDVPWTETWDD---GHYMVLLGMDAANLYFEDPSLLGARGVIPRAEFVDRWH-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
335 |
+
>MGYP000240722769 48 0.333 1.279E-01 113 145 352 42 74 91
|
336 |
+
-----------------------------------------------------------------------------------------------------------------PIIVLVDVYYLPYRKEYHKYHASHAVLLVGYDE--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
337 |
+
>SRR5205814_4332124 48 0.359 1.279E-01 194 296 352 0 101 104
|
338 |
+
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------LQVSIGEDFPAPAPGWVDGVIRQNVERFRAGGDGGSFSGLAGLRDYLAGVAARARDARD-PGVMDELYVAGWAAQQSAGLHADFLAAAGCRLGRTRLAEAGRE-------------------------------------------------------
|
339 |
+
>SRR6266542_2363188 47 0.509 1.695E-01 21 73 352 49 101 105
|
340 |
+
---------------------PVRLWCRDLVSCLQATFATVLLHAGRDPLAALGAAWGFYYLPGDVTSEEFYYP--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
341 |
+
>SRR5688572_6867596 47 0.298 1.695E-01 49 150 352 41 139 141
|
342 |
+
-------------------------------------------------PELLGARWTFAlpHDERRAELTEYSLPFHPLGFVAAIDRRTGLRLQEGR-----AEELDGTVAGGTPVIAAVDSFHLPYRPAFGRVHSHRTVIVRRVLRDAVEV---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
343 |
+
>SRR3989344_2091767 47 0.222 2.247E-01 125 175 352 7 60 146
|
344 |
+
-----------------------------------------------------------------------------------------------------------------------------WVKSWEKVHSSHFMTVTGYDDDYVYINDPtdhDLTIKNMKVPNNNFLSAWSNGN--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
345 |
+
>SRR5216683_1865760 47 0.315 2.247E-01 101 172 352 126 194 216
|
346 |
+
-----------------------------------------------------------------------------------------------------LDDLKRELSLGSLAIVNVNYFRL----LEKVGYAGHFVVVEAVDEEEVLLQNPGLPPIPnQRVAMATFLAAWH-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
347 |
+
>SRR3569832_2248283 46 0.377 2.977E-01 134 178 352 21 65 133
|
348 |
+
--------------------------------------------------------------------------------------------------------------------------------------GDHWFSVYGIDESGVRIYDPIPSKFTGTIPLEDFKRAWGGNALVA-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
349 |
+
>SRR4051812_39536360 46 0.300 2.977E-01 104 172 352 70 137 138
|
350 |
+
--------------------------------------------------------------------------------------------------------VAAQLAAGLPPIALIpDWRALPAEQTY-ATGNAHAVVITGVTDSDVTFIDPW-PGKSFAMSTTRFEAAWS-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
351 |
+
>SRR5688572_24881951 46 0.294 2.977E-01 104 171 352 6 62 140
|
352 |
+
--------------------------------------------------------------------------------------------------------LEEALGAGLPPIVLIEVAPARW----------HYVVVVGVTDNAVHVHDPARAP-DLRIERAEFLSRW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
353 |
+
>SRR2546423_2718336 46 0.417 2.977E-01 22 99 352 74 152 162
|
354 |
+
----------------------IRQWYRDPISCLQSTLAAVLLAAGAEPLPVLGLAWEFRFQPGRVGREEFAYPCRfEGDLAGSLAPYHPLRSSWWSPAPE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
355 |
+
>SRR5688500_13065661 46 0.325 3.944E-01 271 313 352 2 44 130
|
356 |
+
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------RGLYADFLTEAAALLGDPRVAEAASGWRAAADLWEDLSDAVIP--------------------------------------
|
357 |
+
>SRR4051812_2527260 46 0.279 3.944E-01 178 313 352 4 132 135
|
358 |
+
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------HDRDRFFTNQPIAHRWLDVGLESPRPRPSTASI---VRGNLAAFRKVSDGHIFRGSSGLAQFLDTAAGAIAGGKPPDELF---VVAGTALAAS-SLHAEWLSREGMRTGTPVWCELGRAVDRISHHWTALRIAAAR--------------------------------------
|
359 |
+
>SRR4051812_40660863 46 0.407 3.944E-01 1 76 352 78 153 154
|
360 |
+
-GGGRGMTSVVMARTYMITGPEPELWYRDLISCLQATFGSLLARAGADPLTVLGAGWRFLHLPGDVRSEEFYYPCPP-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
361 |
+
>SRR5207244_1963219 46 0.405 3.944E-01 22 99 352 88 166 171
|
362 |
+
----------------------IRQWYRDPISCLQSTLATVLLAAGAEPLPVLGLAWEFLFVPGDVRPEEFYYPCRfKGDPARSLAPYHPIRSHWWRPAEE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
363 |
+
>A0A2B0WVF8 46 0.236 3.944E-01 93 171 352 79 171 362
|
364 |
+
---------------------------------------------------------------------------------------------VYSDFGNGLQFIKECLNRQEVFIALGSTFFLPYSNDYlnpkfikshidihtDKYVTDHYLAINKLTEDSVFVQDPVPNKYMGEISLEEFHSFW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
365 |
+
>MGYP001339385609 46 0.295 5.226E-01 129 172 352 3 46 60
|
366 |
+
---------------------------------------------------------------------------------------------------------------------------------YWDQVTSHVVVVIGCDKQGVVINDPSLPDGGRAISWDVFLAAWA-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
367 |
+
>SRR6185295_5817841 46 0.388 5.226E-01 252 318 352 41 107 118
|
368 |
+
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------DSAGVRQELYVLGWWMQAMTAVHARFLAEAGRALGRPELVEIARRVELVAHEWTGLRITAVHGPADD---------------------------------
|
369 |
+
>SRR4051812_38390578 46 0.280 5.226E-01 104 153 352 45 94 119
|
370 |
+
--------------------------------------------------------------------------------------------------------IRAELAASGTVIVVANTYHLPWSPAYQQSATPHWLLVTDTHDGLWHLVDP------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
371 |
+
>UPI000845F276 46 0.256 5.226E-01 100 169 352 85 156 167
|
372 |
+
----------------------------------------------------------------------------------------------------TFQDIASALRNGEVPIVLISTYRL------HRVRAPHWVVVTGFDRHYIYFHDPYegfyernkRQAQHISIPINEFLR--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
373 |
+
>MGYP001050378313 46 0.526 5.226E-01 87 160 352 109 178 179
|
374 |
+
---------------------------------------------------------------------------------------HGLRRESIDADDGAL------VARGELPIAAVDNFHLPFRPAFHDVHAAHLVVVYGVDRARGLVNvsDAMPPAFSG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
375 |
+
>ERR671931_602696 45 0.323 6.924E-01 203 304 352 0 98 99
|
376 |
+
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------PALTRKWLHEVVEQNRERLL--ADDDPFCGLVALRAYMEDLLAYCGTPFGGRALVGA-YRLGWPMQAQADLHGELLRLAGARFDDAALCEAAAEVRGVGHAW-----------------------------------------------
|
377 |
+
>SRR5882724_1744776 45 0.295 6.924E-01 94 154 352 41 95 101
|
378 |
+
----------------------------------------------------------------------------------------------YRIGARGLDELRGDLADGAVPVVLVSMDYI------HKDPTAHWVVVTGVDDENVTVNDPW-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
379 |
+
>SRR6478752_1188215 45 0.400 6.924E-01 20 99 352 50 134 137
|
380 |
+
--------------------PEPELWYRDLISCLQATFGSLLARLGADPLAVLGAGWRFLHLPGDVRSEEFYYPCPADesggtDLGAALAPHHQLHSLWWQPADE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
381 |
+
>SRR6185437_13975043 45 0.282 6.924E-01 135 173 352 14 51 246
|
382 |
+
---------------------------------------------------------------------------------------------------------------------------------------DHYVVVTGVEDGIVRFHDPHGHPYA-TLPADDFIAAWAS----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
383 |
+
>SRR5215467_3611681 45 0.365 9.172E-01 255 347 352 9 101 110
|
384 |
+
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------HMLGELYTVGWAAQSVAALHADFLRRHAMAFGWSEGLEAARSVDQLANLWTPLRIFAAHASTNGIEVSDRLRQRMRVVVDHYELALEELDRAA----
|
385 |
+
>SRR6185312_7446242 45 0.341 9.172E-01 268 308 352 11 51 138
|
386 |
+
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAGRLLYADFLDVASVVLDEPALERAAAGFREAGDRWSKLA-------------------------------------------
|
387 |
+
>SRR5205085_3472310 45 0.430 9.172E-01 22 99 352 58 136 150
|
388 |
+
----------------------VSQWYRDPISCLQSTLATVLLAAGAEPLPVLGLAWELRFQPGRVGREEFAYTCRfEGDLARSLAPYHPISSRWWSPAED------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
389 |
+
>SaaInl7_100m_RNA_FD_contig_21_4047958_length_241_multi_3_in_0_out_0_1 45 0.217 9.172E-01 85 171 352 72 172 360
|
390 |
+
-------------------------------------------------------------------------------------KIYTMERKFYRSLEEGLYGIHEHLSQNDFFISLGTTYYLPYSRDYKNPKyieshvksnsnkyvTDHYLSVYGLHNENIFINDPVPNKYIGAISLKDFSDFW------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
391 |
+
>SRR6185312_10924508 44 0.395 1.215E+00 232 327 352 5 95 127
|
392 |
+
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GMSGLARYFDLLRERTAA-PDGHQALEELYVLGWSLQASTALHADFLMKAGKQLDWYQLTTVGRQVARIAHHWTPLRMLGAHG----RARPDEIRE------------------------
|
393 |
+
>MGYP001029765960 44 0.279 1.215E+00 110 152 352 91 133 258
|
394 |
+
--------------------------------------------------------------------------------------------------------------QKQPIMAFVDAFDCDWLPFYRKHHIKHTIIVYSINKDDIYFYD-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
395 |
+
>18595|scaffold4953588_1|+69|01 44 0.323 1.609E+00 86 152 352 25 95 99
|
396 |
+
--------------------------------------------------------------------------------------RAGATVTLHETSGrvAAVKHLQEALAAGTPPLAWVDQAHMPYlqLPEVLKGHIGHLVAIAGQTGDSYLIDD-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
397 |
+
>SRR5215208_421555 44 0.275 1.609E+00 188 295 352 1 106 107
|
398 |
+
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------PIRNRWFEIEWSGAVPVLPSlGRVRCVVNRNLETFDRPPVDGWTFGQAGLRLMFDVLAG---SAAPGVRESDEMFVVAGAALAKTALHAEYLRQAGRRWGVPSLSEASR--------------------------------------------------------
|
399 |
+
>SRR5215213_5558053 44 0.298 1.609E+00 101 175 352 49 114 117
|
400 |
+
-----------------------------------------------------------------------------------------------------LDDLKEELARGLYPIVYLElvSGQLRYV---------HSVVVVEITDDQVQVLDPEIGERAFNI--EDFNRAWSAKN--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
401 |
+
>SRR5919202_3634501 44 0.400 1.609E+00 22 90 352 119 188 191
|
402 |
+
----------------------VAHWYRDPLSCLQSTLASVLLHAGAEPLPVLGLAWEFLFKPGEVPREEFAYPCRfEGDVARSLEPYHPIR---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
403 |
+
>MGYP000886274396 44 0.291 2.132E+00 101 148 352 21 68 69
|
404 |
+
-----------------------------------------------------------------------------------------------------WKQNLKKLQEGIPLIILVDLFYLPYTIYLGKEHAAHAIIVYEYCDNKI-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
405 |
+
>SRR5581483_1739624 44 0.382 2.132E+00 25 92 352 42 109 110
|
406 |
+
-------------------------WYRDLISCLQATFGTLVARLGADPLAVLGAGWRFLHLPGDVRFDEFYYPCPDGDLGAALAPHHELRAR-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
407 |
+
>ERR1700741_1246718 44 0.491 2.132E+00 20 78 352 52 110 120
|
408 |
+
--------------------PEPELWYRDLISCLQATFGSIVARAGADPLTVLGAGWRFLHLPGDVRSEEFYYPCPPDD---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
409 |
+
>SRR4051794_34888363 44 0.322 2.132E+00 4 62 352 75 133 134
|
410 |
+
----HAAAGRPGRRIAAMRLPRMEPWRHDLVGCLHTAIGSVMANHGFDPLVTLGASWGFYYRP-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
411 |
+
>5940|scaffold5088856_1|+3|11 43 0.366 2.824E+00 22 81 352 21 80 81
|
412 |
+
----------------------LKRWYADPMSCLQACLGTVLIFPGADPLETLGTSWEFRYVPGDVRREEFYYPCRFKDDPA------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
413 |
+
>ERR1700729_2025172 43 0.463 3.739E+00 23 63 352 65 105 111
|
414 |
+
-----------------------PMWYRDTVSCLPATIGSVLAYHGYDPLEVLGAGWDFTFIPG------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
415 |
+
>SRR5206468_2692918 42 0.328 4.952E+00 1 64 352 41 104 110
|
416 |
+
-GRAQPAGRRGRGARVKVELTPLSYWFHDLCSCLHDCLGTVLTYHGQSPIEVMGASWEFFHSPDD-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
417 |
+
>SRR3954452_12164045 42 0.481 4.952E+00 20 73 352 70 123 125
|
418 |
+
--------------------PEPQWWYRDLVSCLQATFGTVLARAGIDPLSVLGAGWRFLHLPGDVRSEEFYYP--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
419 |
+
>SRR5579862_4067062 42 0.341 4.952E+00 268 308 352 13 53 130
|
420 |
+
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAGRPLYGDFLDVAASVLREPSLERAAAGFRDAGRRWTGLA-------------------------------------------
|
421 |
+
>ERR1051325_11233650 42 0.454 4.952E+00 21 75 352 72 126 131
|
422 |
+
---------------------PVRQWYRDPVSCLQSTLSTVLLDVGADPLAVLGLGFEFRYLPGEVRPEEFYYPGR------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
423 |
+
>ERR1700712_5093054 42 0.252 4.952E+00 98 174 352 75 169 193
|
424 |
+
--------------------------------------------------------------------------------------------------DQALNDLKAALAAGKATMVGVNNTALykdfPHQDQPgQTPAANHQVVVIGYDtaTDTVYIDDGGWPPDPedagrpeggqkMPVKLDTFLAAWKAD---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
425 |
+
>SRR3712207_5935252 42 0.292 6.558E+00 268 308 352 0 40 104
|
426 |
+
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GNLRALYADFLEQAAARLHAPELVPVAGLFRAAAEAWQSVA-------------------------------------------
|
427 |
+
>SRR5579872_692970 42 0.280 6.558E+00 76 146 352 32 106 111
|
428 |
+
----------------------------------------------------------------------------PDVGIGKALELLGFAVaeRAHDDGADApFDDLRHALASGPVVLGPLDMGHLTYIPYHEALGGaDHFVLAYAMDEE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
429 |
+
>SRR2546430_4736840 42 0.270 6.558E+00 106 167 352 7 80 145
|
430 |
+
----------------------------------------------------------------------------------------------------------EALGGGDPVLVVGDAFHLPWVPYHGRQHLDHGFVIEGLEPGAmpvvAHVVDAYDNatqwghavPLATTLPLADL----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
431 |
+
>SRR5256885_6193825 42 0.359 8.684E+00 1 98 352 12 114 119
|
432 |
+
-GRSPDMTVDVPARTYMITGPEPEWWYRDLVSCLQATFGSLLIREGADPLSVLGAGWRFLHLPGEVRSEEFYYPCPTDesgaiDLGAALAPHHQLHSRRLPPAG-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
433 |
+
>SRR3954469_21777195 42 0.431 8.684E+00 20 77 352 32 89 121
|
434 |
+
--------------------PEPEWWYRDLISCLQATFGSVLANAGADPLAVLGAGWRFLHLPGEVRSEEFYYPCPAG----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
435 |
+
>SRR3546814_4326061 42 0.311 8.684E+00 100 160 352 66 120 151
|
436 |
+
----------------------------------------------------------------------------------------------------GVAELEERFRAGWLPMVLVSTFYVHG------DHVAHWVVVTGFGPDAVYINDPWVDRKGG-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
examples/7wux/msa/2/pairing.a3m
ADDED
The diff for this file is too large to render.
See raw diff
|
|
examples/dimer.fasta
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
>dimer
|
2 |
+
FPTIPLSRLFDNAMLRAHRLHQLAFDTYQEFEEAYIPKEQKYSFLQNPQTSLCFSESIPTPSNREETQQKSNLELLRISLLLIQSWLEPVQFLRSVFANSLVYGASDSNVYDLLKDLEERIQTLMGRLEDGSPRTGQIFKQTYSKFDTNSHNDDALLKNYGLLYCFRKDMDKVETFLRIVQCRSVEGSCGF:FSGSEATAAILSRAPWSLQSVNPGLKTNSSKEPKFTKCRSPERETFSCHWTDEVHHGTKNLGPIQLFYTRRNTQEWTQEWKECPDYVSAGENSCYFNSSFTSIWIPYCIKLTSNGGTVDEKCFSVDEIVQPDPPIALNWTLLNVSLTGIHADIQVRWEAPRNADIQKGWMVLEYELQYKEVNETKWKMMDPILTTSVPVYSLKVDKEYEVRVRSKQRNSGNYGEFSEVLYVTLPQMSQ
|
examples/example.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc30e4fc1073854069611a138dcec5e0aeefacbad5889784d4e97cf6f0bc1d9e
|
3 |
+
size 836
|
examples/example_with_msa.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de916e3746b17638c3e7f2409f4e0242f2e54dadeab39ae226aa582b1378ae4b
|
3 |
+
size 3367
|
examples/finetune_subset.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
6hvq
|
2 |
+
5mqc
|
3 |
+
5zin
|
4 |
+
3ew0
|
5 |
+
5akv
|
examples/ligands/7wux_smiles.smi
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Nc1ncnc2c1ncn2[C@@H]1O[C@H](CO[P@@](=O)(O)O[P@](=O)(O)OP(=O)(O)O)[C@@H](O)[C@H]1O
|
examples/ligands/compounds-3d-R.sdf
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
mol_R
|
2 |
+
MOE2022 3D
|
3 |
+
|
4 |
+
55 59 0 0 1 0 0 0 0 0999 V2000
|
5 |
+
1.8430 3.9110 0.0060 O 0 0 0 0 0 0 0 0 0 0 0 0
|
6 |
+
1.0400 3.0050 0.1630 C 0 0 0 0 0 0 0 0 0 0 0 0
|
7 |
+
1.3790 1.8370 0.7780 N 0 0 0 0 0 0 0 0 0 0 0 0
|
8 |
+
2.6800 1.7040 1.4280 C 0 0 0 0 0 0 0 0 0 0 0 0
|
9 |
+
3.2700 2.6180 1.3520 H 0 0 0 0 0 0 0 0 0 0 0 0
|
10 |
+
2.5860 1.4190 2.4760 H 0 0 0 0 0 0 0 0 0 0 0 0
|
11 |
+
3.2540 0.5870 0.5860 C 0 0 0 0 0 0 0 0 0 0 0 0
|
12 |
+
3.7280 0.8360 -0.7040 C 0 0 0 0 0 0 0 0 0 0 0 0
|
13 |
+
3.6890 1.8370 -1.1060 H 0 0 0 0 0 0 0 0 0 0 0 0
|
14 |
+
4.2500 -0.1980 -1.4740 C 0 0 0 0 0 0 0 0 0 0 0 0
|
15 |
+
4.6150 -0.0020 -2.4720 H 0 0 0 0 0 0 0 0 0 0 0 0
|
16 |
+
4.2990 -1.4870 -0.9510 C 0 0 0 0 0 0 0 0 0 0 0 0
|
17 |
+
4.8160 -2.4870 -1.7410 O 0 0 0 0 0 0 0 0 0 0 0 0
|
18 |
+
4.8150 -3.8560 -1.2980 C 0 0 0 0 0 0 0 0 0 0 0 0
|
19 |
+
5.2650 -4.4870 -2.0640 H 0 0 0 0 0 0 0 0 0 0 0 0
|
20 |
+
3.7890 -4.1790 -1.1180 H 0 0 0 0 0 0 0 0 0 0 0 0
|
21 |
+
5.3890 -3.9390 -0.3750 H 0 0 0 0 0 0 0 0 0 0 0 0
|
22 |
+
3.8390 -1.7460 0.3410 C 0 0 0 0 0 0 0 0 0 0 0 0
|
23 |
+
3.8840 -2.7470 0.7440 H 0 0 0 0 0 0 0 0 0 0 0 0
|
24 |
+
3.3220 -0.7060 1.1050 C 0 0 0 0 0 0 0 0 0 0 0 0
|
25 |
+
2.9690 -0.9010 2.1070 H 0 0 0 0 0 0 0 0 0 0 0 0
|
26 |
+
0.1730 0.9630 0.9500 C 0 0 2 0 0 0 0 0 0 0 0 0
|
27 |
+
-0.0990 0.7160 2.4440 C 0 0 0 0 0 0 0 0 0 0 0 0
|
28 |
+
0.8190 0.5160 2.9970 H 0 0 0 0 0 0 0 0 0 0 0 0
|
29 |
+
-0.6260 1.5510 2.9050 H 0 0 0 0 0 0 0 0 0 0 0 0
|
30 |
+
-0.9990 -0.5510 2.3340 C 0 0 0 0 0 0 0 0 0 0 0 0
|
31 |
+
-2.0270 -0.2950 2.0750 H 0 0 0 0 0 0 0 0 0 0 0 0
|
32 |
+
-0.9910 -1.1350 3.2540 H 0 0 0 0 0 0 0 0 0 0 0 0
|
33 |
+
-0.3240 -1.2490 1.2430 N 0 0 0 0 0 0 0 0 0 0 0 0
|
34 |
+
-0.4900 -2.6690 0.9440 C 0 0 0 0 0 0 0 0 0 0 0 0
|
35 |
+
0.2320 -3.0190 0.2070 H 0 0 0 0 0 0 0 0 0 0 0 0
|
36 |
+
-0.4230 -3.2860 1.8400 H 0 0 0 0 0 0 0 0 0 0 0 0
|
37 |
+
-1.9000 -2.6260 0.3860 C 0 0 0 0 0 0 0 0 0 0 0 0
|
38 |
+
-2.1050 -2.4640 -0.9870 C 0 0 0 0 0 0 0 0 0 0 0 0
|
39 |
+
-1.2570 -2.3610 -1.6480 H 0 0 0 0 0 0 0 0 0 0 0 0
|
40 |
+
-3.3950 -2.4360 -1.5060 C 0 0 0 0 0 0 0 0 0 0 0 0
|
41 |
+
-3.5580 -2.3090 -2.5660 H 0 0 0 0 0 0 0 0 0 0 0 0
|
42 |
+
-4.4710 -2.5730 -0.6380 C 0 0 0 0 0 0 0 0 0 0 0 0
|
43 |
+
-5.7290 -2.5430 -1.1380 F 0 0 0 0 0 0 0 0 0 0 0 0
|
44 |
+
-4.2930 -2.7400 0.7280 C 0 0 0 0 0 0 0 0 0 0 0 0
|
45 |
+
-5.1440 -2.8450 1.3840 H 0 0 0 0 0 0 0 0 0 0 0 0
|
46 |
+
-2.9990 -2.7680 1.2370 C 0 0 0 0 0 0 0 0 0 0 0 0
|
47 |
+
-2.8430 -2.9000 2.2980 H 0 0 0 0 0 0 0 0 0 0 0 0
|
48 |
+
0.2400 -0.4310 0.3720 C 0 0 0 0 0 0 0 0 0 0 0 0
|
49 |
+
0.6660 -0.7810 -0.7260 O 0 0 0 0 0 0 0 0 0 0 0 0
|
50 |
+
-0.9170 1.7860 0.2880 C 0 0 0 0 0 0 0 0 0 0 0 0
|
51 |
+
-2.2660 1.5010 0.0860 C 0 0 0 0 0 0 0 0 0 0 0 0
|
52 |
+
-2.6860 0.5710 0.4390 H 0 0 0 0 0 0 0 0 0 0 0 0
|
53 |
+
-3.0670 2.4300 -0.5770 C 0 0 0 0 0 0 0 0 0 0 0 0
|
54 |
+
-4.1150 2.2290 -0.7440 H 0 0 0 0 0 0 0 0 0 0 0 0
|
55 |
+
-2.4960 3.6230 -1.0220 C 0 0 0 0 0 0 0 0 0 0 0 0
|
56 |
+
-3.2780 4.5220 -1.6600 F 0 0 0 0 0 0 0 0 0 0 0 0
|
57 |
+
-1.1510 3.9170 -0.8290 C 0 0 0 0 0 0 0 0 0 0 0 0
|
58 |
+
-0.7280 4.8460 -1.1810 H 0 0 0 0 0 0 0 0 0 0 0 0
|
59 |
+
-0.3650 2.9730 -0.1680 C 0 0 0 0 0 0 0 0 0 0 0 0
|
60 |
+
1 2 2 0 0 0 0
|
61 |
+
2 3 1 0 0 0 0
|
62 |
+
2 55 1 0 0 0 0
|
63 |
+
3 4 1 0 0 0 0
|
64 |
+
3 22 1 0 0 0 0
|
65 |
+
4 5 1 0 0 0 0
|
66 |
+
4 6 1 0 0 0 0
|
67 |
+
4 7 1 0 0 0 0
|
68 |
+
7 8 1 0 0 0 0
|
69 |
+
7 20 2 0 0 0 0
|
70 |
+
8 9 1 0 0 0 0
|
71 |
+
8 10 2 0 0 0 0
|
72 |
+
10 11 1 0 0 0 0
|
73 |
+
10 12 1 0 0 0 0
|
74 |
+
12 13 1 0 0 0 0
|
75 |
+
12 18 2 0 0 0 0
|
76 |
+
13 14 1 0 0 0 0
|
77 |
+
14 15 1 0 0 0 0
|
78 |
+
14 16 1 0 0 0 0
|
79 |
+
14 17 1 0 0 0 0
|
80 |
+
18 19 1 0 0 0 0
|
81 |
+
18 20 1 0 0 0 0
|
82 |
+
20 21 1 0 0 0 0
|
83 |
+
22 23 1 0 0 0 0
|
84 |
+
22 44 1 0 0 0 0
|
85 |
+
22 46 1 0 0 0 0
|
86 |
+
23 24 1 0 0 0 0
|
87 |
+
23 25 1 0 0 0 0
|
88 |
+
23 26 1 0 0 0 0
|
89 |
+
26 27 1 0 0 0 0
|
90 |
+
26 28 1 0 0 0 0
|
91 |
+
26 29 1 0 0 0 0
|
92 |
+
29 30 1 0 0 0 0
|
93 |
+
29 44 1 0 0 0 0
|
94 |
+
30 31 1 0 0 0 0
|
95 |
+
30 32 1 0 0 0 0
|
96 |
+
30 33 1 0 0 0 0
|
97 |
+
33 34 1 0 0 0 0
|
98 |
+
33 42 2 0 0 0 0
|
99 |
+
34 35 1 0 0 0 0
|
100 |
+
34 36 2 0 0 0 0
|
101 |
+
36 37 1 0 0 0 0
|
102 |
+
36 38 1 0 0 0 0
|
103 |
+
38 39 1 0 0 0 0
|
104 |
+
38 40 2 0 0 0 0
|
105 |
+
40 41 1 0 0 0 0
|
106 |
+
40 42 1 0 0 0 0
|
107 |
+
42 43 1 0 0 0 0
|
108 |
+
44 45 2 0 0 0 0
|
109 |
+
46 47 1 0 0 0 0
|
110 |
+
46 55 2 0 0 0 0
|
111 |
+
47 48 1 0 0 0 0
|
112 |
+
47 49 2 0 0 0 0
|
113 |
+
49 50 1 0 0 0 0
|
114 |
+
49 51 1 0 0 0 0
|
115 |
+
51 52 1 0 0 0 0
|
116 |
+
51 53 2 0 0 0 0
|
117 |
+
53 54 1 0 0 0 0
|
118 |
+
53 55 1 0 0 0 0
|
119 |
+
M END
|
120 |
+
> <Name>
|
121 |
+
S
|
122 |
+
|
123 |
+
$$$$
|
examples/ligands/compounds-3d-RS.sdf
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
mol_R
|
2 |
+
MOE2022 3D
|
3 |
+
|
4 |
+
55 59 0 0 1 0 0 0 0 0999 V2000
|
5 |
+
1.8430 3.9110 0.0060 O 0 0 0 0 0 0 0 0 0 0 0 0
|
6 |
+
1.0400 3.0050 0.1630 C 0 0 0 0 0 0 0 0 0 0 0 0
|
7 |
+
1.3790 1.8370 0.7780 N 0 0 0 0 0 0 0 0 0 0 0 0
|
8 |
+
2.6800 1.7040 1.4280 C 0 0 0 0 0 0 0 0 0 0 0 0
|
9 |
+
3.2700 2.6180 1.3520 H 0 0 0 0 0 0 0 0 0 0 0 0
|
10 |
+
2.5860 1.4190 2.4760 H 0 0 0 0 0 0 0 0 0 0 0 0
|
11 |
+
3.2540 0.5870 0.5860 C 0 0 0 0 0 0 0 0 0 0 0 0
|
12 |
+
3.7280 0.8360 -0.7040 C 0 0 0 0 0 0 0 0 0 0 0 0
|
13 |
+
3.6890 1.8370 -1.1060 H 0 0 0 0 0 0 0 0 0 0 0 0
|
14 |
+
4.2500 -0.1980 -1.4740 C 0 0 0 0 0 0 0 0 0 0 0 0
|
15 |
+
4.6150 -0.0020 -2.4720 H 0 0 0 0 0 0 0 0 0 0 0 0
|
16 |
+
4.2990 -1.4870 -0.9510 C 0 0 0 0 0 0 0 0 0 0 0 0
|
17 |
+
4.8160 -2.4870 -1.7410 O 0 0 0 0 0 0 0 0 0 0 0 0
|
18 |
+
4.8150 -3.8560 -1.2980 C 0 0 0 0 0 0 0 0 0 0 0 0
|
19 |
+
5.2650 -4.4870 -2.0640 H 0 0 0 0 0 0 0 0 0 0 0 0
|
20 |
+
3.7890 -4.1790 -1.1180 H 0 0 0 0 0 0 0 0 0 0 0 0
|
21 |
+
5.3890 -3.9390 -0.3750 H 0 0 0 0 0 0 0 0 0 0 0 0
|
22 |
+
3.8390 -1.7460 0.3410 C 0 0 0 0 0 0 0 0 0 0 0 0
|
23 |
+
3.8840 -2.7470 0.7440 H 0 0 0 0 0 0 0 0 0 0 0 0
|
24 |
+
3.3220 -0.7060 1.1050 C 0 0 0 0 0 0 0 0 0 0 0 0
|
25 |
+
2.9690 -0.9010 2.1070 H 0 0 0 0 0 0 0 0 0 0 0 0
|
26 |
+
0.1730 0.9630 0.9500 C 0 0 2 0 0 0 0 0 0 0 0 0
|
27 |
+
-0.0990 0.7160 2.4440 C 0 0 0 0 0 0 0 0 0 0 0 0
|
28 |
+
0.8190 0.5160 2.9970 H 0 0 0 0 0 0 0 0 0 0 0 0
|
29 |
+
-0.6260 1.5510 2.9050 H 0 0 0 0 0 0 0 0 0 0 0 0
|
30 |
+
-0.9990 -0.5510 2.3340 C 0 0 0 0 0 0 0 0 0 0 0 0
|
31 |
+
-2.0270 -0.2950 2.0750 H 0 0 0 0 0 0 0 0 0 0 0 0
|
32 |
+
-0.9910 -1.1350 3.2540 H 0 0 0 0 0 0 0 0 0 0 0 0
|
33 |
+
-0.3240 -1.2490 1.2430 N 0 0 0 0 0 0 0 0 0 0 0 0
|
34 |
+
-0.4900 -2.6690 0.9440 C 0 0 0 0 0 0 0 0 0 0 0 0
|
35 |
+
0.2320 -3.0190 0.2070 H 0 0 0 0 0 0 0 0 0 0 0 0
|
36 |
+
-0.4230 -3.2860 1.8400 H 0 0 0 0 0 0 0 0 0 0 0 0
|
37 |
+
-1.9000 -2.6260 0.3860 C 0 0 0 0 0 0 0 0 0 0 0 0
|
38 |
+
-2.1050 -2.4640 -0.9870 C 0 0 0 0 0 0 0 0 0 0 0 0
|
39 |
+
-1.2570 -2.3610 -1.6480 H 0 0 0 0 0 0 0 0 0 0 0 0
|
40 |
+
-3.3950 -2.4360 -1.5060 C 0 0 0 0 0 0 0 0 0 0 0 0
|
41 |
+
-3.5580 -2.3090 -2.5660 H 0 0 0 0 0 0 0 0 0 0 0 0
|
42 |
+
-4.4710 -2.5730 -0.6380 C 0 0 0 0 0 0 0 0 0 0 0 0
|
43 |
+
-5.7290 -2.5430 -1.1380 F 0 0 0 0 0 0 0 0 0 0 0 0
|
44 |
+
-4.2930 -2.7400 0.7280 C 0 0 0 0 0 0 0 0 0 0 0 0
|
45 |
+
-5.1440 -2.8450 1.3840 H 0 0 0 0 0 0 0 0 0 0 0 0
|
46 |
+
-2.9990 -2.7680 1.2370 C 0 0 0 0 0 0 0 0 0 0 0 0
|
47 |
+
-2.8430 -2.9000 2.2980 H 0 0 0 0 0 0 0 0 0 0 0 0
|
48 |
+
0.2400 -0.4310 0.3720 C 0 0 0 0 0 0 0 0 0 0 0 0
|
49 |
+
0.6660 -0.7810 -0.7260 O 0 0 0 0 0 0 0 0 0 0 0 0
|
50 |
+
-0.9170 1.7860 0.2880 C 0 0 0 0 0 0 0 0 0 0 0 0
|
51 |
+
-2.2660 1.5010 0.0860 C 0 0 0 0 0 0 0 0 0 0 0 0
|
52 |
+
-2.6860 0.5710 0.4390 H 0 0 0 0 0 0 0 0 0 0 0 0
|
53 |
+
-3.0670 2.4300 -0.5770 C 0 0 0 0 0 0 0 0 0 0 0 0
|
54 |
+
-4.1150 2.2290 -0.7440 H 0 0 0 0 0 0 0 0 0 0 0 0
|
55 |
+
-2.4960 3.6230 -1.0220 C 0 0 0 0 0 0 0 0 0 0 0 0
|
56 |
+
-3.2780 4.5220 -1.6600 F 0 0 0 0 0 0 0 0 0 0 0 0
|
57 |
+
-1.1510 3.9170 -0.8290 C 0 0 0 0 0 0 0 0 0 0 0 0
|
58 |
+
-0.7280 4.8460 -1.1810 H 0 0 0 0 0 0 0 0 0 0 0 0
|
59 |
+
-0.3650 2.9730 -0.1680 C 0 0 0 0 0 0 0 0 0 0 0 0
|
60 |
+
1 2 2 0 0 0 0
|
61 |
+
2 3 1 0 0 0 0
|
62 |
+
2 55 1 0 0 0 0
|
63 |
+
3 4 1 0 0 0 0
|
64 |
+
3 22 1 0 0 0 0
|
65 |
+
4 5 1 0 0 0 0
|
66 |
+
4 6 1 0 0 0 0
|
67 |
+
4 7 1 0 0 0 0
|
68 |
+
7 8 1 0 0 0 0
|
69 |
+
7 20 2 0 0 0 0
|
70 |
+
8 9 1 0 0 0 0
|
71 |
+
8 10 2 0 0 0 0
|
72 |
+
10 11 1 0 0 0 0
|
73 |
+
10 12 1 0 0 0 0
|
74 |
+
12 13 1 0 0 0 0
|
75 |
+
12 18 2 0 0 0 0
|
76 |
+
13 14 1 0 0 0 0
|
77 |
+
14 15 1 0 0 0 0
|
78 |
+
14 16 1 0 0 0 0
|
79 |
+
14 17 1 0 0 0 0
|
80 |
+
18 19 1 0 0 0 0
|
81 |
+
18 20 1 0 0 0 0
|
82 |
+
20 21 1 0 0 0 0
|
83 |
+
22 23 1 0 0 0 0
|
84 |
+
22 44 1 0 0 0 0
|
85 |
+
22 46 1 0 0 0 0
|
86 |
+
23 24 1 0 0 0 0
|
87 |
+
23 25 1 0 0 0 0
|
88 |
+
23 26 1 0 0 0 0
|
89 |
+
26 27 1 0 0 0 0
|
90 |
+
26 28 1 0 0 0 0
|
91 |
+
26 29 1 0 0 0 0
|
92 |
+
29 30 1 0 0 0 0
|
93 |
+
29 44 1 0 0 0 0
|
94 |
+
30 31 1 0 0 0 0
|
95 |
+
30 32 1 0 0 0 0
|
96 |
+
30 33 1 0 0 0 0
|
97 |
+
33 34 1 0 0 0 0
|
98 |
+
33 42 2 0 0 0 0
|
99 |
+
34 35 1 0 0 0 0
|
100 |
+
34 36 2 0 0 0 0
|
101 |
+
36 37 1 0 0 0 0
|
102 |
+
36 38 1 0 0 0 0
|
103 |
+
38 39 1 0 0 0 0
|
104 |
+
38 40 2 0 0 0 0
|
105 |
+
40 41 1 0 0 0 0
|
106 |
+
40 42 1 0 0 0 0
|
107 |
+
42 43 1 0 0 0 0
|
108 |
+
44 45 2 0 0 0 0
|
109 |
+
46 47 1 0 0 0 0
|
110 |
+
46 55 2 0 0 0 0
|
111 |
+
47 48 1 0 0 0 0
|
112 |
+
47 49 2 0 0 0 0
|
113 |
+
49 50 1 0 0 0 0
|
114 |
+
49 51 1 0 0 0 0
|
115 |
+
51 52 1 0 0 0 0
|
116 |
+
51 53 2 0 0 0 0
|
117 |
+
53 54 1 0 0 0 0
|
118 |
+
53 55 1 0 0 0 0
|
119 |
+
M END
|
120 |
+
> <Name>
|
121 |
+
S
|
122 |
+
|
123 |
+
$$$$
|
124 |
+
mol_S
|
125 |
+
MOE2022 3D
|
126 |
+
|
127 |
+
55 59 0 0 1 0 0 0 0 0999 V2000
|
128 |
+
1.5220 4.2070 -0.8770 O 0 0 0 0 0 0 0 0 0 0 0 0
|
129 |
+
0.8070 3.3300 -0.4130 C 0 0 0 0 0 0 0 0 0 0 0 0
|
130 |
+
1.3080 2.1770 0.1270 N 0 0 0 0 0 0 0 0 0 0 0 0
|
131 |
+
2.7120 2.0740 0.5360 C 0 0 0 0 0 0 0 0 0 0 0 0
|
132 |
+
3.2960 2.8760 0.1030 H 0 0 0 0 0 0 0 0 0 0 0 0
|
133 |
+
2.7910 2.2270 1.6050 H 0 0 0 0 0 0 0 0 0 0 0 0
|
134 |
+
3.3560 0.7600 0.1460 C 0 0 0 0 0 0 0 0 0 0 0 0
|
135 |
+
3.4390 0.3940 -1.1990 C 0 0 0 0 0 0 0 0 0 0 0 0
|
136 |
+
3.0560 1.0430 -1.9460 H 0 0 0 0 0 0 0 0 0 0 0 0
|
137 |
+
4.0160 -0.8170 -1.5650 C 0 0 0 0 0 0 0 0 0 0 0 0
|
138 |
+
4.0770 -1.1020 -2.5830 H 0 0 0 0 0 0 0 0 0 0 0 0
|
139 |
+
4.5220 -1.6610 -0.5810 C 0 0 0 0 0 0 0 0 0 0 0 0
|
140 |
+
5.0790 -2.8500 -0.9960 O 0 0 0 0 0 0 0 0 0 0 0 0
|
141 |
+
5.7550 -3.7890 -0.1290 C 0 0 0 0 0 0 0 0 0 0 0 0
|
142 |
+
6.1420 -4.6210 -0.7170 H 0 0 0 0 0 0 0 0 0 0 0 0
|
143 |
+
5.0650 -4.1830 0.6170 H 0 0 0 0 0 0 0 0 0 0 0 0
|
144 |
+
6.5910 -3.3030 0.3750 H 0 0 0 0 0 0 0 0 0 0 0 0
|
145 |
+
4.4620 -1.3010 0.7650 C 0 0 0 0 0 0 0 0 0 0 0 0
|
146 |
+
4.8540 -1.9350 1.5140 H 0 0 0 0 0 0 0 0 0 0 0 0
|
147 |
+
3.8780 -0.0900 1.1220 C 0 0 0 0 0 0 0 0 0 0 0 0
|
148 |
+
3.8290 0.1840 2.1450 H 0 0 0 0 0 0 0 0 0 0 0 0
|
149 |
+
0.1910 1.3310 0.6670 C 0 0 1 0 0 0 0 0 0 0 0 0
|
150 |
+
0.1330 -0.0450 -0.0200 C 0 0 0 0 0 0 0 0 0 0 0 0
|
151 |
+
-0.8850 -0.2520 -0.3480 H 0 0 0 0 0 0 0 0 0 0 0 0
|
152 |
+
0.8030 -0.1100 -0.8750 H 0 0 0 0 0 0 0 0 0 0 0 0
|
153 |
+
0.5050 -1.0520 1.0690 C 0 0 0 0 0 0 0 0 0 0 0 0
|
154 |
+
-0.1300 -1.9280 1.0300 H 0 0 0 0 0 0 0 0 0 0 0 0
|
155 |
+
1.5230 -1.3960 0.9350 H 0 0 0 0 0 0 0 0 0 0 0 0
|
156 |
+
0.3750 -0.3420 2.3320 N 0 0 0 0 0 0 0 0 0 0 0 0
|
157 |
+
0.5330 -0.9630 3.6460 C 0 0 0 0 0 0 0 0 0 0 0 0
|
158 |
+
0.0500 -0.3600 4.4060 H 0 0 0 0 0 0 0 0 0 0 0 0
|
159 |
+
0.0060 -1.9070 3.6730 H 0 0 0 0 0 0 0 0 0 0 0 0
|
160 |
+
1.9830 -1.1770 4.0270 C 0 0 0 0 0 0 0 0 0 0 0 0
|
161 |
+
2.7010 -0.1350 4.6230 C 0 0 0 0 0 0 0 0 0 0 0 0
|
162 |
+
2.2280 0.8000 4.8010 H 0 0 0 0 0 0 0 0 0 0 0 0
|
163 |
+
4.0350 -0.3160 4.9780 C 0 0 0 0 0 0 0 0 0 0 0 0
|
164 |
+
4.5900 0.4650 5.4300 H 0 0 0 0 0 0 0 0 0 0 0 0
|
165 |
+
4.6300 -1.5450 4.7290 C 0 0 0 0 0 0 0 0 0 0 0 0
|
166 |
+
5.9270 -1.7250 5.0670 F 0 0 0 0 0 0 0 0 0 0 0 0
|
167 |
+
3.9400 -2.5950 4.1400 C 0 0 0 0 0 0 0 0 0 0 0 0
|
168 |
+
4.4240 -3.5210 3.9680 H 0 0 0 0 0 0 0 0 0 0 0 0
|
169 |
+
2.6070 -2.4050 3.7830 C 0 0 0 0 0 0 0 0 0 0 0 0
|
170 |
+
2.0670 -3.1950 3.3280 H 0 0 0 0 0 0 0 0 0 0 0 0
|
171 |
+
0.2890 0.9840 2.1620 C 0 0 0 0 0 0 0 0 0 0 0 0
|
172 |
+
0.2840 1.7830 3.0980 O 0 0 0 0 0 0 0 0 0 0 0 0
|
173 |
+
-1.0390 2.1600 0.3390 C 0 0 0 0 0 0 0 0 0 0 0 0
|
174 |
+
-2.3890 1.9160 0.5940 C 0 0 0 0 0 0 0 0 0 0 0 0
|
175 |
+
-2.6960 1.0370 1.1020 H 0 0 0 0 0 0 0 0 0 0 0 0
|
176 |
+
-3.3350 2.8540 0.1730 C 0 0 0 0 0 0 0 0 0 0 0 0
|
177 |
+
-4.3690 2.7000 0.3510 H 0 0 0 0 0 0 0 0 0 0 0 0
|
178 |
+
-2.9040 4.0060 -0.4840 C 0 0 0 0 0 0 0 0 0 0 0 0
|
179 |
+
-3.8270 4.9050 -0.8900 F 0 0 0 0 0 0 0 0 0 0 0 0
|
180 |
+
-1.5630 4.2630 -0.7360 C 0 0 0 0 0 0 0 0 0 0 0 0
|
181 |
+
-1.2550 5.1470 -1.2310 H 0 0 0 0 0 0 0 0 0 0 0 0
|
182 |
+
-0.6330 3.3150 -0.3080 C 0 0 0 0 0 0 0 0 0 0 0 0
|
183 |
+
1 2 2 0 0 0 0
|
184 |
+
2 3 1 0 0 0 0
|
185 |
+
2 55 1 0 0 0 0
|
186 |
+
3 4 1 0 0 0 0
|
187 |
+
3 22 1 0 0 0 0
|
188 |
+
4 5 1 0 0 0 0
|
189 |
+
4 6 1 0 0 0 0
|
190 |
+
4 7 1 0 0 0 0
|
191 |
+
7 8 1 0 0 0 0
|
192 |
+
7 20 2 0 0 0 0
|
193 |
+
8 9 1 0 0 0 0
|
194 |
+
8 10 2 0 0 0 0
|
195 |
+
10 11 1 0 0 0 0
|
196 |
+
10 12 1 0 0 0 0
|
197 |
+
12 13 1 0 0 0 0
|
198 |
+
12 18 2 0 0 0 0
|
199 |
+
13 14 1 0 0 0 0
|
200 |
+
14 15 1 0 0 0 0
|
201 |
+
14 16 1 0 0 0 0
|
202 |
+
14 17 1 0 0 0 0
|
203 |
+
18 19 1 0 0 0 0
|
204 |
+
18 20 1 0 0 0 0
|
205 |
+
20 21 1 0 0 0 0
|
206 |
+
22 23 1 0 0 0 0
|
207 |
+
22 44 1 0 0 0 0
|
208 |
+
22 46 1 0 0 0 0
|
209 |
+
23 24 1 0 0 0 0
|
210 |
+
23 25 1 0 0 0 0
|
211 |
+
23 26 1 0 0 0 0
|
212 |
+
26 27 1 0 0 0 0
|
213 |
+
26 28 1 0 0 0 0
|
214 |
+
26 29 1 0 0 0 0
|
215 |
+
29 30 1 0 0 0 0
|
216 |
+
29 44 1 0 0 0 0
|
217 |
+
30 31 1 0 0 0 0
|
218 |
+
30 32 1 0 0 0 0
|
219 |
+
30 33 1 0 0 0 0
|
220 |
+
33 34 1 0 0 0 0
|
221 |
+
33 42 2 0 0 0 0
|
222 |
+
34 35 1 0 0 0 0
|
223 |
+
34 36 2 0 0 0 0
|
224 |
+
36 37 1 0 0 0 0
|
225 |
+
36 38 1 0 0 0 0
|
226 |
+
38 39 1 0 0 0 0
|
227 |
+
38 40 2 0 0 0 0
|
228 |
+
40 41 1 0 0 0 0
|
229 |
+
40 42 1 0 0 0 0
|
230 |
+
42 43 1 0 0 0 0
|
231 |
+
44 45 2 0 0 0 0
|
232 |
+
46 47 1 0 0 0 0
|
233 |
+
46 55 2 0 0 0 0
|
234 |
+
47 48 1 0 0 0 0
|
235 |
+
47 49 2 0 0 0 0
|
236 |
+
49 50 1 0 0 0 0
|
237 |
+
49 51 1 0 0 0 0
|
238 |
+
51 52 1 0 0 0 0
|
239 |
+
51 53 2 0 0 0 0
|
240 |
+
53 54 1 0 0 0 0
|
241 |
+
53 55 1 0 0 0 0
|
242 |
+
M END
|
243 |
+
> <Name>
|
244 |
+
R
|
245 |
+
|
246 |
+
$$$$
|
examples/prot.fasta
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
>A|protein
|
2 |
+
MASWSHPQFEKGGTHVAETSAPTRSEPDTRVLTLPGTASAPEFRLIDIDGLLNNRATTDVRDLGSGRLNAWGNSFPAAELPAPGSLITVAGIPFTWANAHARGDNIRCEGQVVDIPPGQYDWIYLLAASERRSEDTIWAHYDDGHADPLRVGISDFLDGTPAFGELSAFRTSRMHYPHHVQEGLPTTMWLTRVGMPRHGVARSLRLPRSVAMHVFALTLRTAAAVRLAEGATT
|
3 |
+
>B|protein
|
4 |
+
MGSSHHHHHHSQDPNSTTTAPPVELWTRDLGSCLHGTLATALIRDGHDPVTVLGAPWEFRRRPGAWSSEEYFFFAEPDSLAGRLALYHPFESTWHRSDGDGVDDLREALAAGVLPIAAVDNFHLPFRPAFHDVHAAHLLVVYRITETEVYVSDAQPPAFQGAIPLADFLASWGSLNPPDDADVFFSASPSGRRWLRTRMTGPVPEPDRHWVGRVIRENVARYRQEPPADTQTGLPGLRRYLDELCALTPGTNAASEALSELYVISWNIQAQSGLHAEFLRAHSVKWRIPELAEAAAGVDAVAHGWTGVRMTGAHSRVWQRHRPAELRGHATALVRRLEAALDLLELAADAVS
|