KevinHuSh
commited on
Commit
·
c9a1362
1
Parent(s):
0332a6c
make sure the models will not be load twice (#422)
Browse files### What problem does this PR solve?
#381
### Type of change
- [x] Refactoring
- api/apps/api_app.py +4 -5
- api/db/db_models.py +1 -1
- deepdoc/parser/pdf_parser.py +3 -1
- deepdoc/vision/layout_recognizer.py +3 -1
- deepdoc/vision/ocr.py +3 -1
- deepdoc/vision/recognizer.py +3 -1
- deepdoc/vision/table_structure_recognizer.py +3 -1
- rag/llm/embedding_model.py +6 -1
api/apps/api_app.py
CHANGED
@@ -105,8 +105,8 @@ def stats():
|
|
105 |
res = {
|
106 |
"pv": [(o["dt"], o["pv"]) for o in objs],
|
107 |
"uv": [(o["dt"], o["uv"]) for o in objs],
|
108 |
-
"speed": [(o["dt"], o["tokens"]/o["duration"]) for o in objs],
|
109 |
-
"tokens": [(o["dt"], o["tokens"]/1000.) for o in objs],
|
110 |
"round": [(o["dt"], o["round"]) for o in objs],
|
111 |
"thumb_up": [(o["dt"], o["thumb_up"]) for o in objs]
|
112 |
}
|
@@ -115,8 +115,7 @@ def stats():
|
|
115 |
return server_error_response(e)
|
116 |
|
117 |
|
118 |
-
@manager.route('/new_conversation', methods=['
|
119 |
-
@validate_request("user_id")
|
120 |
def set_conversation():
|
121 |
token = request.headers.get('Authorization').split()[1]
|
122 |
objs = APIToken.query(token=token)
|
@@ -131,7 +130,7 @@ def set_conversation():
|
|
131 |
conv = {
|
132 |
"id": get_uuid(),
|
133 |
"dialog_id": dia.id,
|
134 |
-
"user_id":
|
135 |
"message": [{"role": "assistant", "content": dia.prompt_config["prologue"]}]
|
136 |
}
|
137 |
API4ConversationService.save(**conv)
|
|
|
105 |
res = {
|
106 |
"pv": [(o["dt"], o["pv"]) for o in objs],
|
107 |
"uv": [(o["dt"], o["uv"]) for o in objs],
|
108 |
+
"speed": [(o["dt"], float(o["tokens"])/float(o["duration"])) for o in objs],
|
109 |
+
"tokens": [(o["dt"], float(o["tokens"])/1000.) for o in objs],
|
110 |
"round": [(o["dt"], o["round"]) for o in objs],
|
111 |
"thumb_up": [(o["dt"], o["thumb_up"]) for o in objs]
|
112 |
}
|
|
|
115 |
return server_error_response(e)
|
116 |
|
117 |
|
118 |
+
@manager.route('/new_conversation', methods=['GET'])
|
|
|
119 |
def set_conversation():
|
120 |
token = request.headers.get('Authorization').split()[1]
|
121 |
objs = APIToken.query(token=token)
|
|
|
130 |
conv = {
|
131 |
"id": get_uuid(),
|
132 |
"dialog_id": dia.id,
|
133 |
+
"user_id": request.args.get("user_id", ""),
|
134 |
"message": [{"role": "assistant", "content": dia.prompt_config["prologue"]}]
|
135 |
}
|
136 |
API4ConversationService.save(**conv)
|
api/db/db_models.py
CHANGED
@@ -629,7 +629,7 @@ class Document(DataBaseModel):
|
|
629 |
max_length=128,
|
630 |
null=False,
|
631 |
default="local",
|
632 |
-
help_text="where dose this document from")
|
633 |
type = CharField(max_length=32, null=False, help_text="file extension")
|
634 |
created_by = CharField(
|
635 |
max_length=32,
|
|
|
629 |
max_length=128,
|
630 |
null=False,
|
631 |
default="local",
|
632 |
+
help_text="where dose this document come from")
|
633 |
type = CharField(max_length=32, null=False, help_text="file extension")
|
634 |
created_by = CharField(
|
635 |
max_length=32,
|
deepdoc/parser/pdf_parser.py
CHANGED
@@ -43,7 +43,9 @@ class HuParser:
|
|
43 |
model_dir, "updown_concat_xgb.model"))
|
44 |
except Exception as e:
|
45 |
model_dir = snapshot_download(
|
46 |
-
repo_id="InfiniFlow/text_concat_xgb_v1.0"
|
|
|
|
|
47 |
self.updown_cnt_mdl.load_model(os.path.join(
|
48 |
model_dir, "updown_concat_xgb.model"))
|
49 |
|
|
|
43 |
model_dir, "updown_concat_xgb.model"))
|
44 |
except Exception as e:
|
45 |
model_dir = snapshot_download(
|
46 |
+
repo_id="InfiniFlow/text_concat_xgb_v1.0",
|
47 |
+
local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
|
48 |
+
local_dir_use_symlinks=False)
|
49 |
self.updown_cnt_mdl.load_model(os.path.join(
|
50 |
model_dir, "updown_concat_xgb.model"))
|
51 |
|
deepdoc/vision/layout_recognizer.py
CHANGED
@@ -43,7 +43,9 @@ class LayoutRecognizer(Recognizer):
|
|
43 |
"rag/res/deepdoc")
|
44 |
super().__init__(self.labels, domain, model_dir)
|
45 |
except Exception as e:
|
46 |
-
model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc"
|
|
|
|
|
47 |
super().__init__(self.labels, domain, model_dir)
|
48 |
|
49 |
self.garbage_layouts = ["footer", "header", "reference"]
|
|
|
43 |
"rag/res/deepdoc")
|
44 |
super().__init__(self.labels, domain, model_dir)
|
45 |
except Exception as e:
|
46 |
+
model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc",
|
47 |
+
local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
|
48 |
+
local_dir_use_symlinks=False)
|
49 |
super().__init__(self.labels, domain, model_dir)
|
50 |
|
51 |
self.garbage_layouts = ["footer", "header", "reference"]
|
deepdoc/vision/ocr.py
CHANGED
@@ -486,7 +486,9 @@ class OCR(object):
|
|
486 |
self.text_detector = TextDetector(model_dir)
|
487 |
self.text_recognizer = TextRecognizer(model_dir)
|
488 |
except Exception as e:
|
489 |
-
model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc"
|
|
|
|
|
490 |
self.text_detector = TextDetector(model_dir)
|
491 |
self.text_recognizer = TextRecognizer(model_dir)
|
492 |
|
|
|
486 |
self.text_detector = TextDetector(model_dir)
|
487 |
self.text_recognizer = TextRecognizer(model_dir)
|
488 |
except Exception as e:
|
489 |
+
model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc",
|
490 |
+
local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
|
491 |
+
local_dir_use_symlinks=False)
|
492 |
self.text_detector = TextDetector(model_dir)
|
493 |
self.text_recognizer = TextRecognizer(model_dir)
|
494 |
|
deepdoc/vision/recognizer.py
CHANGED
@@ -41,7 +41,9 @@ class Recognizer(object):
|
|
41 |
"rag/res/deepdoc")
|
42 |
model_file_path = os.path.join(model_dir, task_name + ".onnx")
|
43 |
if not os.path.exists(model_file_path):
|
44 |
-
model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc"
|
|
|
|
|
45 |
model_file_path = os.path.join(model_dir, task_name + ".onnx")
|
46 |
else:
|
47 |
model_file_path = os.path.join(model_dir, task_name + ".onnx")
|
|
|
41 |
"rag/res/deepdoc")
|
42 |
model_file_path = os.path.join(model_dir, task_name + ".onnx")
|
43 |
if not os.path.exists(model_file_path):
|
44 |
+
model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc",
|
45 |
+
local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
|
46 |
+
local_dir_use_symlinks=False)
|
47 |
model_file_path = os.path.join(model_dir, task_name + ".onnx")
|
48 |
else:
|
49 |
model_file_path = os.path.join(model_dir, task_name + ".onnx")
|
deepdoc/vision/table_structure_recognizer.py
CHANGED
@@ -39,7 +39,9 @@ class TableStructureRecognizer(Recognizer):
|
|
39 |
get_project_base_directory(),
|
40 |
"rag/res/deepdoc"))
|
41 |
except Exception as e:
|
42 |
-
super().__init__(self.labels, "tsr", snapshot_download(repo_id="InfiniFlow/deepdoc"
|
|
|
|
|
43 |
|
44 |
def __call__(self, images, thr=0.2):
|
45 |
tbls = super().__call__(images, thr)
|
|
|
39 |
get_project_base_directory(),
|
40 |
"rag/res/deepdoc"))
|
41 |
except Exception as e:
|
42 |
+
super().__init__(self.labels, "tsr", snapshot_download(repo_id="InfiniFlow/deepdoc",
|
43 |
+
local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
|
44 |
+
local_dir_use_symlinks=False))
|
45 |
|
46 |
def __call__(self, images, thr=0.2):
|
47 |
tbls = super().__call__(images, thr)
|
rag/llm/embedding_model.py
CHANGED
@@ -14,6 +14,8 @@
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
from typing import Optional
|
|
|
|
|
17 |
from zhipuai import ZhipuAI
|
18 |
import os
|
19 |
from abc import ABC
|
@@ -35,7 +37,10 @@ try:
|
|
35 |
query_instruction_for_retrieval="为这个句子生成表示以用于检索相关文章:",
|
36 |
use_fp16=torch.cuda.is_available())
|
37 |
except Exception as e:
|
38 |
-
|
|
|
|
|
|
|
39 |
query_instruction_for_retrieval="为这个句子生成表示以用于检索相关文章:",
|
40 |
use_fp16=torch.cuda.is_available())
|
41 |
|
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
from typing import Optional
|
17 |
+
|
18 |
+
from huggingface_hub import snapshot_download
|
19 |
from zhipuai import ZhipuAI
|
20 |
import os
|
21 |
from abc import ABC
|
|
|
37 |
query_instruction_for_retrieval="为这个句子生成表示以用于检索相关文章:",
|
38 |
use_fp16=torch.cuda.is_available())
|
39 |
except Exception as e:
|
40 |
+
model_dir = snapshot_download(repo_id="BAAI/bge-large-zh-v1.5",
|
41 |
+
local_dir=os.path.join(get_project_base_directory(), "rag/res/bge-large-zh-v1.5"),
|
42 |
+
local_dir_use_symlinks=False)
|
43 |
+
flag_model = FlagModel(model_dir,
|
44 |
query_instruction_for_retrieval="为这个句子生成表示以用于检索相关文章:",
|
45 |
use_fp16=torch.cuda.is_available())
|
46 |
|