Use consistent log file names, introduced initLogger (#3403)
Browse files### What problem does this PR solve?
Use consistent log file names, introduced initLogger
### Type of change
- [ ] Bug Fix (non-breaking change which fixes an issue)
- [ ] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [x] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):
This view is limited to 50 files because it contains too many changes.
See raw diff
- agent/canvas.py +5 -5
- agent/component/arxiv.py +2 -2
- agent/component/baidu.py +2 -2
- agent/component/base.py +5 -5
- agent/component/bing.py +2 -2
- agent/component/categorize.py +2 -2
- agent/component/duckduckgo.py +2 -2
- agent/component/github.py +2 -2
- agent/component/google.py +2 -2
- agent/component/googlescholar.py +3 -3
- agent/component/keyword.py +2 -2
- agent/component/pubmed.py +2 -2
- agent/component/relevant.py +2 -2
- agent/component/retrieval.py +2 -2
- agent/component/rewrite.py +2 -2
- agent/component/wikipedia.py +2 -2
- agent/component/yahoofinance.py +2 -2
- api/apps/__init__.py +2 -2
- api/apps/canvas_app.py +2 -2
- api/apps/llm_app.py +2 -2
- api/apps/user_app.py +7 -7
- api/db/db_models.py +7 -7
- api/db/init_data.py +8 -8
- api/db/services/dialog_service.py +9 -9
- api/db/services/document_service.py +3 -3
- api/db/services/file_service.py +3 -3
- api/db/services/llm_service.py +9 -9
- api/ragflow_server.py +17 -16
- api/utils/api_utils.py +3 -3
- api/utils/log_utils.py +30 -27
- api/validation.py +3 -3
- deepdoc/parser/pdf_parser.py +17 -21
- deepdoc/parser/resume/entities/corporations.py +2 -2
- deepdoc/parser/resume/step_two.py +7 -8
- deepdoc/vision/operators.py +2 -2
- deepdoc/vision/recognizer.py +2 -2
- deepdoc/vision/seeit.py +2 -2
- deepdoc/vision/t_recognizer.py +2 -2
- deepdoc/vision/table_structure_recognizer.py +1 -1
- graphrag/claim_extractor.py +3 -3
- graphrag/community_reports_extractor.py +3 -3
- graphrag/description_summary.py +0 -8
- graphrag/entity_resolution.py +1 -1
- graphrag/index.py +3 -3
- graphrag/leiden.py +1 -3
- graphrag/mind_map_extractor.py +3 -4
- intergrations/chatgpt-on-wechat/plugins/ragflow_chat.py +11 -11
- rag/app/book.py +2 -2
- rag/app/email.py +2 -2
- rag/app/laws.py +1 -2
agent/canvas.py
CHANGED
@@ -13,13 +13,13 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
import json
|
17 |
from abc import ABC
|
18 |
from copy import deepcopy
|
19 |
from functools import partial
|
20 |
from agent.component import component_class
|
21 |
from agent.component.base import ComponentBase
|
22 |
-
from api.utils.log_utils import logger
|
23 |
|
24 |
class Canvas(ABC):
|
25 |
"""
|
@@ -187,7 +187,7 @@ class Canvas(ABC):
|
|
187 |
if cpn.component_name == "Answer":
|
188 |
self.answer.append(c)
|
189 |
else:
|
190 |
-
|
191 |
cpids = cpn.get_dependent_components()
|
192 |
if any([c not in self.path[-1] for c in cpids]):
|
193 |
continue
|
@@ -197,7 +197,7 @@ class Canvas(ABC):
|
|
197 |
|
198 |
prepare2run(self.components[self.path[-2][-1]]["downstream"])
|
199 |
while 0 <= ran < len(self.path[-1]):
|
200 |
-
|
201 |
cpn_id = self.path[-1][ran]
|
202 |
cpn = self.get_component(cpn_id)
|
203 |
if not cpn["downstream"]: break
|
@@ -217,7 +217,7 @@ class Canvas(ABC):
|
|
217 |
self.get_component(p)["obj"].set_exception(e)
|
218 |
prepare2run([p])
|
219 |
break
|
220 |
-
|
221 |
break
|
222 |
continue
|
223 |
|
@@ -229,7 +229,7 @@ class Canvas(ABC):
|
|
229 |
self.get_component(p)["obj"].set_exception(e)
|
230 |
prepare2run([p])
|
231 |
break
|
232 |
-
|
233 |
break
|
234 |
|
235 |
if self.answer:
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
import json
|
18 |
from abc import ABC
|
19 |
from copy import deepcopy
|
20 |
from functools import partial
|
21 |
from agent.component import component_class
|
22 |
from agent.component.base import ComponentBase
|
|
|
23 |
|
24 |
class Canvas(ABC):
|
25 |
"""
|
|
|
187 |
if cpn.component_name == "Answer":
|
188 |
self.answer.append(c)
|
189 |
else:
|
190 |
+
logging.debug(f"Canvas.prepare2run: {c}")
|
191 |
cpids = cpn.get_dependent_components()
|
192 |
if any([c not in self.path[-1] for c in cpids]):
|
193 |
continue
|
|
|
197 |
|
198 |
prepare2run(self.components[self.path[-2][-1]]["downstream"])
|
199 |
while 0 <= ran < len(self.path[-1]):
|
200 |
+
logging.debug(f"Canvas.run: {ran} {self.path}")
|
201 |
cpn_id = self.path[-1][ran]
|
202 |
cpn = self.get_component(cpn_id)
|
203 |
if not cpn["downstream"]: break
|
|
|
217 |
self.get_component(p)["obj"].set_exception(e)
|
218 |
prepare2run([p])
|
219 |
break
|
220 |
+
logging.exception("Canvas.run got exception")
|
221 |
break
|
222 |
continue
|
223 |
|
|
|
229 |
self.get_component(p)["obj"].set_exception(e)
|
230 |
prepare2run([p])
|
231 |
break
|
232 |
+
logging.exception("Canvas.run got exception")
|
233 |
break
|
234 |
|
235 |
if self.answer:
|
agent/component/arxiv.py
CHANGED
@@ -13,11 +13,11 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
17 |
import arxiv
|
18 |
import pandas as pd
|
19 |
from agent.component.base import ComponentBase, ComponentParamBase
|
20 |
-
from api.utils.log_utils import logger
|
21 |
|
22 |
class ArXivParam(ComponentParamBase):
|
23 |
"""
|
@@ -64,5 +64,5 @@ class ArXiv(ComponentBase, ABC):
|
|
64 |
return ArXiv.be_output("")
|
65 |
|
66 |
df = pd.DataFrame(arxiv_res)
|
67 |
-
|
68 |
return df
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
from abc import ABC
|
18 |
import arxiv
|
19 |
import pandas as pd
|
20 |
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
21 |
|
22 |
class ArXivParam(ComponentParamBase):
|
23 |
"""
|
|
|
64 |
return ArXiv.be_output("")
|
65 |
|
66 |
df = pd.DataFrame(arxiv_res)
|
67 |
+
logging.debug(f"df: {str(df)}")
|
68 |
return df
|
agent/component/baidu.py
CHANGED
@@ -13,12 +13,12 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
17 |
import pandas as pd
|
18 |
import requests
|
19 |
import re
|
20 |
from agent.component.base import ComponentBase, ComponentParamBase
|
21 |
-
from api.utils.log_utils import logger
|
22 |
|
23 |
|
24 |
class BaiduParam(ComponentParamBase):
|
@@ -62,6 +62,6 @@ class Baidu(ComponentBase, ABC):
|
|
62 |
return Baidu.be_output("")
|
63 |
|
64 |
df = pd.DataFrame(baidu_res)
|
65 |
-
|
66 |
return df
|
67 |
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
from abc import ABC
|
18 |
import pandas as pd
|
19 |
import requests
|
20 |
import re
|
21 |
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
22 |
|
23 |
|
24 |
class BaiduParam(ComponentParamBase):
|
|
|
62 |
return Baidu.be_output("")
|
63 |
|
64 |
df = pd.DataFrame(baidu_res)
|
65 |
+
logging.debug(f"df: {str(df)}")
|
66 |
return df
|
67 |
|
agent/component/base.py
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
17 |
import builtins
|
18 |
import json
|
@@ -23,7 +24,6 @@ from typing import Tuple, Union
|
|
23 |
import pandas as pd
|
24 |
|
25 |
from agent import settings
|
26 |
-
from api.utils.log_utils import logger
|
27 |
|
28 |
|
29 |
_FEEDED_DEPRECATED_PARAMS = "_feeded_deprecated_params"
|
@@ -361,13 +361,13 @@ class ComponentParamBase(ABC):
|
|
361 |
|
362 |
def _warn_deprecated_param(self, param_name, descr):
|
363 |
if self._deprecated_params_set.get(param_name):
|
364 |
-
|
365 |
f"{descr} {param_name} is deprecated and ignored in this version."
|
366 |
)
|
367 |
|
368 |
def _warn_to_deprecate_param(self, param_name, descr, new_param):
|
369 |
if self._deprecated_params_set.get(param_name):
|
370 |
-
|
371 |
f"{descr} {param_name} will be deprecated in future release; "
|
372 |
f"please use {new_param} instead."
|
373 |
)
|
@@ -403,7 +403,7 @@ class ComponentBase(ABC):
|
|
403 |
return cpnts
|
404 |
|
405 |
def run(self, history, **kwargs):
|
406 |
-
|
407 |
json.dumps(kwargs, ensure_ascii=False)))
|
408 |
try:
|
409 |
res = self._run(history, **kwargs)
|
@@ -476,7 +476,7 @@ class ComponentBase(ABC):
|
|
476 |
reversed_cpnts.extend(self._canvas.path[-2])
|
477 |
reversed_cpnts.extend(self._canvas.path[-1])
|
478 |
|
479 |
-
|
480 |
for u in reversed_cpnts[::-1]:
|
481 |
if self.get_component_name(u) in ["switch", "concentrator"]: continue
|
482 |
if self.component_name.lower() == "generate" and self.get_component_name(u) == "retrieval":
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
from abc import ABC
|
18 |
import builtins
|
19 |
import json
|
|
|
24 |
import pandas as pd
|
25 |
|
26 |
from agent import settings
|
|
|
27 |
|
28 |
|
29 |
_FEEDED_DEPRECATED_PARAMS = "_feeded_deprecated_params"
|
|
|
361 |
|
362 |
def _warn_deprecated_param(self, param_name, descr):
|
363 |
if self._deprecated_params_set.get(param_name):
|
364 |
+
logging.warning(
|
365 |
f"{descr} {param_name} is deprecated and ignored in this version."
|
366 |
)
|
367 |
|
368 |
def _warn_to_deprecate_param(self, param_name, descr, new_param):
|
369 |
if self._deprecated_params_set.get(param_name):
|
370 |
+
logging.warning(
|
371 |
f"{descr} {param_name} will be deprecated in future release; "
|
372 |
f"please use {new_param} instead."
|
373 |
)
|
|
|
403 |
return cpnts
|
404 |
|
405 |
def run(self, history, **kwargs):
|
406 |
+
logging.debug("{}, history: {}, kwargs: {}".format(self, json.dumps(history, ensure_ascii=False),
|
407 |
json.dumps(kwargs, ensure_ascii=False)))
|
408 |
try:
|
409 |
res = self._run(history, **kwargs)
|
|
|
476 |
reversed_cpnts.extend(self._canvas.path[-2])
|
477 |
reversed_cpnts.extend(self._canvas.path[-1])
|
478 |
|
479 |
+
logging.debug(f"{self.component_name} {reversed_cpnts[::-1]}")
|
480 |
for u in reversed_cpnts[::-1]:
|
481 |
if self.get_component_name(u) in ["switch", "concentrator"]: continue
|
482 |
if self.component_name.lower() == "generate" and self.get_component_name(u) == "retrieval":
|
agent/component/bing.py
CHANGED
@@ -13,11 +13,11 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
17 |
import requests
|
18 |
import pandas as pd
|
19 |
from agent.component.base import ComponentBase, ComponentParamBase
|
20 |
-
from api.utils.log_utils import logger
|
21 |
|
22 |
class BingParam(ComponentParamBase):
|
23 |
"""
|
@@ -80,5 +80,5 @@ class Bing(ComponentBase, ABC):
|
|
80 |
return Bing.be_output("")
|
81 |
|
82 |
df = pd.DataFrame(bing_res)
|
83 |
-
|
84 |
return df
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
from abc import ABC
|
18 |
import requests
|
19 |
import pandas as pd
|
20 |
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
21 |
|
22 |
class BingParam(ComponentParamBase):
|
23 |
"""
|
|
|
80 |
return Bing.be_output("")
|
81 |
|
82 |
df = pd.DataFrame(bing_res)
|
83 |
+
logging.debug(f"df: {str(df)}")
|
84 |
return df
|
agent/component/categorize.py
CHANGED
@@ -13,11 +13,11 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
17 |
from api.db import LLMType
|
18 |
from api.db.services.llm_service import LLMBundle
|
19 |
from agent.component import GenerateParam, Generate
|
20 |
-
from api.utils.log_utils import logger
|
21 |
|
22 |
|
23 |
class CategorizeParam(GenerateParam):
|
@@ -77,7 +77,7 @@ class Categorize(Generate, ABC):
|
|
77 |
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
|
78 |
ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": input}],
|
79 |
self._param.gen_conf())
|
80 |
-
|
81 |
for c in self._param.category_description.keys():
|
82 |
if ans.lower().find(c.lower()) >= 0:
|
83 |
return Categorize.be_output(self._param.category_description[c]["to"])
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
from abc import ABC
|
18 |
from api.db import LLMType
|
19 |
from api.db.services.llm_service import LLMBundle
|
20 |
from agent.component import GenerateParam, Generate
|
|
|
21 |
|
22 |
|
23 |
class CategorizeParam(GenerateParam):
|
|
|
77 |
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
|
78 |
ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": input}],
|
79 |
self._param.gen_conf())
|
80 |
+
logging.debug(f"input: {input}, answer: {str(ans)}")
|
81 |
for c in self._param.category_description.keys():
|
82 |
if ans.lower().find(c.lower()) >= 0:
|
83 |
return Categorize.be_output(self._param.category_description[c]["to"])
|
agent/component/duckduckgo.py
CHANGED
@@ -13,11 +13,11 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
17 |
from duckduckgo_search import DDGS
|
18 |
import pandas as pd
|
19 |
from agent.component.base import ComponentBase, ComponentParamBase
|
20 |
-
from api.utils.log_utils import logger
|
21 |
|
22 |
|
23 |
class DuckDuckGoParam(ComponentParamBase):
|
@@ -62,5 +62,5 @@ class DuckDuckGo(ComponentBase, ABC):
|
|
62 |
return DuckDuckGo.be_output("")
|
63 |
|
64 |
df = pd.DataFrame(duck_res)
|
65 |
-
|
66 |
return df
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
from abc import ABC
|
18 |
from duckduckgo_search import DDGS
|
19 |
import pandas as pd
|
20 |
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
21 |
|
22 |
|
23 |
class DuckDuckGoParam(ComponentParamBase):
|
|
|
62 |
return DuckDuckGo.be_output("")
|
63 |
|
64 |
df = pd.DataFrame(duck_res)
|
65 |
+
logging.debug("df: {df}")
|
66 |
return df
|
agent/component/github.py
CHANGED
@@ -13,11 +13,11 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
17 |
import pandas as pd
|
18 |
import requests
|
19 |
from agent.component.base import ComponentBase, ComponentParamBase
|
20 |
-
from api.utils.log_utils import logger
|
21 |
|
22 |
|
23 |
class GitHubParam(ComponentParamBase):
|
@@ -57,5 +57,5 @@ class GitHub(ComponentBase, ABC):
|
|
57 |
return GitHub.be_output("")
|
58 |
|
59 |
df = pd.DataFrame(github_res)
|
60 |
-
|
61 |
return df
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
from abc import ABC
|
18 |
import pandas as pd
|
19 |
import requests
|
20 |
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
21 |
|
22 |
|
23 |
class GitHubParam(ComponentParamBase):
|
|
|
57 |
return GitHub.be_output("")
|
58 |
|
59 |
df = pd.DataFrame(github_res)
|
60 |
+
logging.debug(f"df: {df}")
|
61 |
return df
|
agent/component/google.py
CHANGED
@@ -13,11 +13,11 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
17 |
from serpapi import GoogleSearch
|
18 |
import pandas as pd
|
19 |
from agent.component.base import ComponentBase, ComponentParamBase
|
20 |
-
from api.utils.log_utils import logger
|
21 |
|
22 |
|
23 |
class GoogleParam(ComponentParamBase):
|
@@ -92,5 +92,5 @@ class Google(ComponentBase, ABC):
|
|
92 |
return Google.be_output("")
|
93 |
|
94 |
df = pd.DataFrame(google_res)
|
95 |
-
|
96 |
return df
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
from abc import ABC
|
18 |
from serpapi import GoogleSearch
|
19 |
import pandas as pd
|
20 |
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
21 |
|
22 |
|
23 |
class GoogleParam(ComponentParamBase):
|
|
|
92 |
return Google.be_output("")
|
93 |
|
94 |
df = pd.DataFrame(google_res)
|
95 |
+
logging.debug(f"df: {df}")
|
96 |
return df
|
agent/component/googlescholar.py
CHANGED
@@ -13,11 +13,11 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
17 |
import pandas as pd
|
18 |
from agent.component.base import ComponentBase, ComponentParamBase
|
19 |
from scholarly import scholarly
|
20 |
-
from api.utils.log_utils import logger
|
21 |
|
22 |
|
23 |
class GoogleScholarParam(ComponentParamBase):
|
@@ -59,12 +59,12 @@ class GoogleScholar(ComponentBase, ABC):
|
|
59 |
'bib'].get('abstract', 'no abstract')})
|
60 |
|
61 |
except StopIteration or Exception:
|
62 |
-
|
63 |
break
|
64 |
|
65 |
if not scholar_res:
|
66 |
return GoogleScholar.be_output("")
|
67 |
|
68 |
df = pd.DataFrame(scholar_res)
|
69 |
-
|
70 |
return df
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
from abc import ABC
|
18 |
import pandas as pd
|
19 |
from agent.component.base import ComponentBase, ComponentParamBase
|
20 |
from scholarly import scholarly
|
|
|
21 |
|
22 |
|
23 |
class GoogleScholarParam(ComponentParamBase):
|
|
|
59 |
'bib'].get('abstract', 'no abstract')})
|
60 |
|
61 |
except StopIteration or Exception:
|
62 |
+
logging.exception("GoogleScholar")
|
63 |
break
|
64 |
|
65 |
if not scholar_res:
|
66 |
return GoogleScholar.be_output("")
|
67 |
|
68 |
df = pd.DataFrame(scholar_res)
|
69 |
+
logging.debug(f"df: {df}")
|
70 |
return df
|
agent/component/keyword.py
CHANGED
@@ -13,12 +13,12 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
import re
|
17 |
from abc import ABC
|
18 |
from api.db import LLMType
|
19 |
from api.db.services.llm_service import LLMBundle
|
20 |
from agent.component import GenerateParam, Generate
|
21 |
-
from api.utils.log_utils import logger
|
22 |
|
23 |
|
24 |
class KeywordExtractParam(GenerateParam):
|
@@ -58,5 +58,5 @@ class KeywordExtract(Generate, ABC):
|
|
58 |
self._param.gen_conf())
|
59 |
|
60 |
ans = re.sub(r".*keyword:", "", ans).strip()
|
61 |
-
|
62 |
return KeywordExtract.be_output(ans)
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
import re
|
18 |
from abc import ABC
|
19 |
from api.db import LLMType
|
20 |
from api.db.services.llm_service import LLMBundle
|
21 |
from agent.component import GenerateParam, Generate
|
|
|
22 |
|
23 |
|
24 |
class KeywordExtractParam(GenerateParam):
|
|
|
58 |
self._param.gen_conf())
|
59 |
|
60 |
ans = re.sub(r".*keyword:", "", ans).strip()
|
61 |
+
logging.debug(f"ans: {ans}")
|
62 |
return KeywordExtract.be_output(ans)
|
agent/component/pubmed.py
CHANGED
@@ -13,13 +13,13 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
17 |
from Bio import Entrez
|
18 |
import re
|
19 |
import pandas as pd
|
20 |
import xml.etree.ElementTree as ET
|
21 |
from agent.component.base import ComponentBase, ComponentParamBase
|
22 |
-
from api.utils.log_utils import logger
|
23 |
|
24 |
|
25 |
class PubMedParam(ComponentParamBase):
|
@@ -65,5 +65,5 @@ class PubMed(ComponentBase, ABC):
|
|
65 |
return PubMed.be_output("")
|
66 |
|
67 |
df = pd.DataFrame(pubmed_res)
|
68 |
-
|
69 |
return df
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
from abc import ABC
|
18 |
from Bio import Entrez
|
19 |
import re
|
20 |
import pandas as pd
|
21 |
import xml.etree.ElementTree as ET
|
22 |
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
23 |
|
24 |
|
25 |
class PubMedParam(ComponentParamBase):
|
|
|
65 |
return PubMed.be_output("")
|
66 |
|
67 |
df = pd.DataFrame(pubmed_res)
|
68 |
+
logging.debug(f"df: {df}")
|
69 |
return df
|
agent/component/relevant.py
CHANGED
@@ -13,12 +13,12 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
17 |
from api.db import LLMType
|
18 |
from api.db.services.llm_service import LLMBundle
|
19 |
from agent.component import GenerateParam, Generate
|
20 |
from rag.utils import num_tokens_from_string, encoder
|
21 |
-
from api.utils.log_utils import logger
|
22 |
|
23 |
|
24 |
class RelevantParam(GenerateParam):
|
@@ -71,7 +71,7 @@ class Relevant(Generate, ABC):
|
|
71 |
ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": ans}],
|
72 |
self._param.gen_conf())
|
73 |
|
74 |
-
|
75 |
if ans.lower().find("yes") >= 0:
|
76 |
return Relevant.be_output(self._param.yes)
|
77 |
if ans.lower().find("no") >= 0:
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
from abc import ABC
|
18 |
from api.db import LLMType
|
19 |
from api.db.services.llm_service import LLMBundle
|
20 |
from agent.component import GenerateParam, Generate
|
21 |
from rag.utils import num_tokens_from_string, encoder
|
|
|
22 |
|
23 |
|
24 |
class RelevantParam(GenerateParam):
|
|
|
71 |
ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": ans}],
|
72 |
self._param.gen_conf())
|
73 |
|
74 |
+
logging.debug(ans)
|
75 |
if ans.lower().find("yes") >= 0:
|
76 |
return Relevant.be_output(self._param.yes)
|
77 |
if ans.lower().find("no") >= 0:
|
agent/component/retrieval.py
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
17 |
|
18 |
import pandas as pd
|
@@ -22,7 +23,6 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
|
|
22 |
from api.db.services.llm_service import LLMBundle
|
23 |
from api.settings import retrievaler
|
24 |
from agent.component.base import ComponentBase, ComponentParamBase
|
25 |
-
from api.utils.log_utils import logger
|
26 |
|
27 |
|
28 |
class RetrievalParam(ComponentParamBase):
|
@@ -81,7 +81,7 @@ class Retrieval(ComponentBase, ABC):
|
|
81 |
df = pd.DataFrame(kbinfos["chunks"])
|
82 |
df["content"] = df["content_with_weight"]
|
83 |
del df["content_with_weight"]
|
84 |
-
|
85 |
return df
|
86 |
|
87 |
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
from abc import ABC
|
18 |
|
19 |
import pandas as pd
|
|
|
23 |
from api.db.services.llm_service import LLMBundle
|
24 |
from api.settings import retrievaler
|
25 |
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
26 |
|
27 |
|
28 |
class RetrievalParam(ComponentParamBase):
|
|
|
81 |
df = pd.DataFrame(kbinfos["chunks"])
|
82 |
df["content"] = df["content_with_weight"]
|
83 |
del df["content_with_weight"]
|
84 |
+
logging.debug("{} {}".format(query, df))
|
85 |
return df
|
86 |
|
87 |
|
agent/component/rewrite.py
CHANGED
@@ -13,11 +13,11 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
17 |
from api.db import LLMType
|
18 |
from api.db.services.llm_service import LLMBundle
|
19 |
from agent.component import GenerateParam, Generate
|
20 |
-
from api.utils.log_utils import logger
|
21 |
|
22 |
|
23 |
class RewriteQuestionParam(GenerateParam):
|
@@ -105,7 +105,7 @@ class RewriteQuestion(Generate, ABC):
|
|
105 |
self._canvas.history.pop()
|
106 |
self._canvas.history.append(("user", ans))
|
107 |
|
108 |
-
|
109 |
return RewriteQuestion.be_output(ans)
|
110 |
|
111 |
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
from abc import ABC
|
18 |
from api.db import LLMType
|
19 |
from api.db.services.llm_service import LLMBundle
|
20 |
from agent.component import GenerateParam, Generate
|
|
|
21 |
|
22 |
|
23 |
class RewriteQuestionParam(GenerateParam):
|
|
|
105 |
self._canvas.history.pop()
|
106 |
self._canvas.history.append(("user", ans))
|
107 |
|
108 |
+
logging.debug(ans)
|
109 |
return RewriteQuestion.be_output(ans)
|
110 |
|
111 |
|
agent/component/wikipedia.py
CHANGED
@@ -13,11 +13,11 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
17 |
import wikipedia
|
18 |
import pandas as pd
|
19 |
from agent.component.base import ComponentBase, ComponentParamBase
|
20 |
-
from api.utils.log_utils import logger
|
21 |
|
22 |
|
23 |
class WikipediaParam(ComponentParamBase):
|
@@ -63,5 +63,5 @@ class Wikipedia(ComponentBase, ABC):
|
|
63 |
return Wikipedia.be_output("")
|
64 |
|
65 |
df = pd.DataFrame(wiki_res)
|
66 |
-
|
67 |
return df
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
from abc import ABC
|
18 |
import wikipedia
|
19 |
import pandas as pd
|
20 |
from agent.component.base import ComponentBase, ComponentParamBase
|
|
|
21 |
|
22 |
|
23 |
class WikipediaParam(ComponentParamBase):
|
|
|
63 |
return Wikipedia.be_output("")
|
64 |
|
65 |
df = pd.DataFrame(wiki_res)
|
66 |
+
logging.debug(f"df: {df}")
|
67 |
return df
|
agent/component/yahoofinance.py
CHANGED
@@ -13,11 +13,11 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from abc import ABC
|
17 |
import pandas as pd
|
18 |
from agent.component.base import ComponentBase, ComponentParamBase
|
19 |
import yfinance as yf
|
20 |
-
from api.utils.log_utils import logger
|
21 |
|
22 |
|
23 |
class YahooFinanceParam(ComponentParamBase):
|
@@ -76,7 +76,7 @@ class YahooFinance(ComponentBase, ABC):
|
|
76 |
if self._param.news:
|
77 |
yohoo_res.append({"content": "news:\n" + pd.DataFrame(msft.news).to_markdown() + "\n"})
|
78 |
except Exception:
|
79 |
-
|
80 |
|
81 |
if not yohoo_res:
|
82 |
return YahooFinance.be_output("")
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
from abc import ABC
|
18 |
import pandas as pd
|
19 |
from agent.component.base import ComponentBase, ComponentParamBase
|
20 |
import yfinance as yf
|
|
|
21 |
|
22 |
|
23 |
class YahooFinanceParam(ComponentParamBase):
|
|
|
76 |
if self._param.news:
|
77 |
yohoo_res.append({"content": "news:\n" + pd.DataFrame(msft.news).to_markdown() + "\n"})
|
78 |
except Exception:
|
79 |
+
logging.exception("YahooFinance got exception")
|
80 |
|
81 |
if not yohoo_res:
|
82 |
return YahooFinance.be_output("")
|
api/apps/__init__.py
CHANGED
@@ -15,6 +15,7 @@
|
|
15 |
#
|
16 |
import os
|
17 |
import sys
|
|
|
18 |
from importlib.util import module_from_spec, spec_from_file_location
|
19 |
from pathlib import Path
|
20 |
from flask import Blueprint, Flask
|
@@ -32,7 +33,6 @@ from flask_login import LoginManager
|
|
32 |
from api.settings import SECRET_KEY
|
33 |
from api.settings import API_VERSION
|
34 |
from api.utils.api_utils import server_error_response
|
35 |
-
from api.utils.log_utils import logger
|
36 |
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
37 |
|
38 |
__all__ = ["app"]
|
@@ -154,7 +154,7 @@ def load_user(web_request):
|
|
154 |
else:
|
155 |
return None
|
156 |
except Exception:
|
157 |
-
|
158 |
return None
|
159 |
else:
|
160 |
return None
|
|
|
15 |
#
|
16 |
import os
|
17 |
import sys
|
18 |
+
import logging
|
19 |
from importlib.util import module_from_spec, spec_from_file_location
|
20 |
from pathlib import Path
|
21 |
from flask import Blueprint, Flask
|
|
|
33 |
from api.settings import SECRET_KEY
|
34 |
from api.settings import API_VERSION
|
35 |
from api.utils.api_utils import server_error_response
|
|
|
36 |
from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
|
37 |
|
38 |
__all__ = ["app"]
|
|
|
154 |
else:
|
155 |
return None
|
156 |
except Exception:
|
157 |
+
logging.exception("load_user got exception")
|
158 |
return None
|
159 |
else:
|
160 |
return None
|
api/apps/canvas_app.py
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
import json
|
17 |
from functools import partial
|
18 |
from flask import request, Response
|
@@ -23,7 +24,6 @@ from api.utils import get_uuid
|
|
23 |
from api.utils.api_utils import get_json_result, server_error_response, validate_request, get_data_error_result
|
24 |
from agent.canvas import Canvas
|
25 |
from peewee import MySQLDatabase, PostgresqlDatabase
|
26 |
-
from api.utils.log_utils import logger
|
27 |
|
28 |
|
29 |
@manager.route('/templates', methods=['GET'])
|
@@ -115,7 +115,7 @@ def run():
|
|
115 |
pass
|
116 |
canvas.add_user_input(req["message"])
|
117 |
answer = canvas.run(stream=stream)
|
118 |
-
|
119 |
except Exception as e:
|
120 |
return server_error_response(e)
|
121 |
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
import json
|
18 |
from functools import partial
|
19 |
from flask import request, Response
|
|
|
24 |
from api.utils.api_utils import get_json_result, server_error_response, validate_request, get_data_error_result
|
25 |
from agent.canvas import Canvas
|
26 |
from peewee import MySQLDatabase, PostgresqlDatabase
|
|
|
27 |
|
28 |
|
29 |
@manager.route('/templates', methods=['GET'])
|
|
|
115 |
pass
|
116 |
canvas.add_user_input(req["message"])
|
117 |
answer = canvas.run(stream=stream)
|
118 |
+
logging.debug(canvas)
|
119 |
except Exception as e:
|
120 |
return server_error_response(e)
|
121 |
|
api/apps/llm_app.py
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
import json
|
17 |
|
18 |
from flask import request
|
@@ -25,7 +26,6 @@ from api.db.db_models import TenantLLM
|
|
25 |
from api.utils.api_utils import get_json_result
|
26 |
from rag.llm import EmbeddingModel, ChatModel, RerankModel, CvModel, TTSModel
|
27 |
import requests
|
28 |
-
from api.utils.log_utils import logger
|
29 |
|
30 |
|
31 |
@manager.route('/factories', methods=['GET'])
|
@@ -90,7 +90,7 @@ def set_api_key():
|
|
90 |
if len(arr) == 0 or tc == 0:
|
91 |
raise Exception("Fail")
|
92 |
rerank_passed = True
|
93 |
-
|
94 |
except Exception as e:
|
95 |
msg += f"\nFail to access model({llm.llm_name}) using this api key." + str(
|
96 |
e)
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
import json
|
18 |
|
19 |
from flask import request
|
|
|
26 |
from api.utils.api_utils import get_json_result
|
27 |
from rag.llm import EmbeddingModel, ChatModel, RerankModel, CvModel, TTSModel
|
28 |
import requests
|
|
|
29 |
|
30 |
|
31 |
@manager.route('/factories', methods=['GET'])
|
|
|
90 |
if len(arr) == 0 or tc == 0:
|
91 |
raise Exception("Fail")
|
92 |
rerank_passed = True
|
93 |
+
logging.debug(f'passed model rerank {llm.llm_name}')
|
94 |
except Exception as e:
|
95 |
msg += f"\nFail to access model({llm.llm_name}) using this api key." + str(
|
96 |
e)
|
api/apps/user_app.py
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
import json
|
17 |
import re
|
18 |
from datetime import datetime
|
@@ -54,7 +55,6 @@ from api.settings import (
|
|
54 |
from api.db.services.user_service import UserService, TenantService, UserTenantService
|
55 |
from api.db.services.file_service import FileService
|
56 |
from api.utils.api_utils import get_json_result, construct_response
|
57 |
-
from api.utils.log_utils import logger
|
58 |
|
59 |
|
60 |
@manager.route("/login", methods=["POST", "GET"])
|
@@ -177,7 +177,7 @@ def github_callback():
|
|
177 |
try:
|
178 |
avatar = download_img(user_info["avatar_url"])
|
179 |
except Exception as e:
|
180 |
-
|
181 |
avatar = ""
|
182 |
users = user_register(
|
183 |
user_id,
|
@@ -202,7 +202,7 @@ def github_callback():
|
|
202 |
return redirect("/?auth=%s" % user.get_id())
|
203 |
except Exception as e:
|
204 |
rollback_user_registration(user_id)
|
205 |
-
|
206 |
return redirect("/?error=%s" % str(e))
|
207 |
|
208 |
# User has already registered, try to log in
|
@@ -279,7 +279,7 @@ def feishu_callback():
|
|
279 |
try:
|
280 |
avatar = download_img(user_info["avatar_url"])
|
281 |
except Exception as e:
|
282 |
-
|
283 |
avatar = ""
|
284 |
users = user_register(
|
285 |
user_id,
|
@@ -304,7 +304,7 @@ def feishu_callback():
|
|
304 |
return redirect("/?auth=%s" % user.get_id())
|
305 |
except Exception as e:
|
306 |
rollback_user_registration(user_id)
|
307 |
-
|
308 |
return redirect("/?error=%s" % str(e))
|
309 |
|
310 |
# User has already registered, try to log in
|
@@ -436,7 +436,7 @@ def setting_user():
|
|
436 |
UserService.update_by_id(current_user.id, update_dict)
|
437 |
return get_json_result(data=True)
|
438 |
except Exception as e:
|
439 |
-
|
440 |
return get_json_result(
|
441 |
data=False, message="Update failure!", code=RetCode.EXCEPTION_ERROR
|
442 |
)
|
@@ -621,7 +621,7 @@ def user_add():
|
|
621 |
)
|
622 |
except Exception as e:
|
623 |
rollback_user_registration(user_id)
|
624 |
-
|
625 |
return get_json_result(
|
626 |
data=False,
|
627 |
message=f"User registration failure, error: {str(e)}",
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
import json
|
18 |
import re
|
19 |
from datetime import datetime
|
|
|
55 |
from api.db.services.user_service import UserService, TenantService, UserTenantService
|
56 |
from api.db.services.file_service import FileService
|
57 |
from api.utils.api_utils import get_json_result, construct_response
|
|
|
58 |
|
59 |
|
60 |
@manager.route("/login", methods=["POST", "GET"])
|
|
|
177 |
try:
|
178 |
avatar = download_img(user_info["avatar_url"])
|
179 |
except Exception as e:
|
180 |
+
logging.exception(e)
|
181 |
avatar = ""
|
182 |
users = user_register(
|
183 |
user_id,
|
|
|
202 |
return redirect("/?auth=%s" % user.get_id())
|
203 |
except Exception as e:
|
204 |
rollback_user_registration(user_id)
|
205 |
+
logging.exception(e)
|
206 |
return redirect("/?error=%s" % str(e))
|
207 |
|
208 |
# User has already registered, try to log in
|
|
|
279 |
try:
|
280 |
avatar = download_img(user_info["avatar_url"])
|
281 |
except Exception as e:
|
282 |
+
logging.exception(e)
|
283 |
avatar = ""
|
284 |
users = user_register(
|
285 |
user_id,
|
|
|
304 |
return redirect("/?auth=%s" % user.get_id())
|
305 |
except Exception as e:
|
306 |
rollback_user_registration(user_id)
|
307 |
+
logging.exception(e)
|
308 |
return redirect("/?error=%s" % str(e))
|
309 |
|
310 |
# User has already registered, try to log in
|
|
|
436 |
UserService.update_by_id(current_user.id, update_dict)
|
437 |
return get_json_result(data=True)
|
438 |
except Exception as e:
|
439 |
+
logging.exception(e)
|
440 |
return get_json_result(
|
441 |
data=False, message="Update failure!", code=RetCode.EXCEPTION_ERROR
|
442 |
)
|
|
|
621 |
)
|
622 |
except Exception as e:
|
623 |
rollback_user_registration(user_id)
|
624 |
+
logging.exception(e)
|
625 |
return get_json_result(
|
626 |
data=False,
|
627 |
message=f"User registration failure, error: {str(e)}",
|
api/db/db_models.py
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
import inspect
|
17 |
import os
|
18 |
import sys
|
@@ -32,7 +33,6 @@ from playhouse.pool import PooledMySQLDatabase, PooledPostgresqlDatabase
|
|
32 |
from api.db import SerializedType, ParserType
|
33 |
from api.settings import DATABASE, SECRET_KEY, DATABASE_TYPE
|
34 |
from api import utils
|
35 |
-
from api.utils.log_utils import logger
|
36 |
|
37 |
def singleton(cls, *args, **kw):
|
38 |
instances = {}
|
@@ -285,7 +285,7 @@ class BaseDataBase:
|
|
285 |
database_config = DATABASE.copy()
|
286 |
db_name = database_config.pop("name")
|
287 |
self.database_connection = PooledDatabase[DATABASE_TYPE.upper()].value(db_name, **database_config)
|
288 |
-
|
289 |
|
290 |
class PostgresDatabaseLock:
|
291 |
def __init__(self, lock_name, timeout=10, db=None):
|
@@ -393,7 +393,7 @@ def close_connection():
|
|
393 |
if DB:
|
394 |
DB.close_stale(age=30)
|
395 |
except Exception as e:
|
396 |
-
|
397 |
|
398 |
|
399 |
class DataBaseModel(BaseModel):
|
@@ -409,15 +409,15 @@ def init_database_tables(alter_fields=[]):
|
|
409 |
for name, obj in members:
|
410 |
if obj != DataBaseModel and issubclass(obj, DataBaseModel):
|
411 |
table_objs.append(obj)
|
412 |
-
|
413 |
try:
|
414 |
obj.create_table()
|
415 |
-
|
416 |
except Exception as e:
|
417 |
-
|
418 |
create_failed_list.append(obj.__name__)
|
419 |
if create_failed_list:
|
420 |
-
|
421 |
raise Exception(f"create tables failed: {create_failed_list}")
|
422 |
migrate_db()
|
423 |
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
import inspect
|
18 |
import os
|
19 |
import sys
|
|
|
33 |
from api.db import SerializedType, ParserType
|
34 |
from api.settings import DATABASE, SECRET_KEY, DATABASE_TYPE
|
35 |
from api import utils
|
|
|
36 |
|
37 |
def singleton(cls, *args, **kw):
|
38 |
instances = {}
|
|
|
285 |
database_config = DATABASE.copy()
|
286 |
db_name = database_config.pop("name")
|
287 |
self.database_connection = PooledDatabase[DATABASE_TYPE.upper()].value(db_name, **database_config)
|
288 |
+
logging.info('init database on cluster mode successfully')
|
289 |
|
290 |
class PostgresDatabaseLock:
|
291 |
def __init__(self, lock_name, timeout=10, db=None):
|
|
|
393 |
if DB:
|
394 |
DB.close_stale(age=30)
|
395 |
except Exception as e:
|
396 |
+
logging.exception(e)
|
397 |
|
398 |
|
399 |
class DataBaseModel(BaseModel):
|
|
|
409 |
for name, obj in members:
|
410 |
if obj != DataBaseModel and issubclass(obj, DataBaseModel):
|
411 |
table_objs.append(obj)
|
412 |
+
logging.debug(f"start create table {obj.__name__}")
|
413 |
try:
|
414 |
obj.create_table()
|
415 |
+
logging.debug(f"create table success: {obj.__name__}")
|
416 |
except Exception as e:
|
417 |
+
logging.exception(e)
|
418 |
create_failed_list.append(obj.__name__)
|
419 |
if create_failed_list:
|
420 |
+
logging.error(f"create tables failed: {create_failed_list}")
|
421 |
raise Exception(f"create tables failed: {create_failed_list}")
|
422 |
migrate_db()
|
423 |
|
api/db/init_data.py
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
import base64
|
17 |
import json
|
18 |
import os
|
@@ -30,7 +31,6 @@ from api.db.services.llm_service import LLMFactoriesService, LLMService, TenantL
|
|
30 |
from api.db.services.user_service import TenantService, UserTenantService
|
31 |
from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY, LLM_BASE_URL
|
32 |
from api.utils.file_utils import get_project_base_directory
|
33 |
-
from api.utils.log_utils import logger
|
34 |
|
35 |
|
36 |
def encode_to_base64(input_string):
|
@@ -70,26 +70,26 @@ def init_superuser():
|
|
70 |
"api_key": API_KEY, "api_base": LLM_BASE_URL})
|
71 |
|
72 |
if not UserService.save(**user_info):
|
73 |
-
|
74 |
return
|
75 |
TenantService.insert(**tenant)
|
76 |
UserTenantService.insert(**usr_tenant)
|
77 |
TenantLLMService.insert_many(tenant_llm)
|
78 |
-
|
79 |
-
"Super user initialized. email: [email protected], password: admin. Changing the password after
|
80 |
|
81 |
chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
|
82 |
msg = chat_mdl.chat(system="", history=[
|
83 |
{"role": "user", "content": "Hello!"}], gen_conf={})
|
84 |
if msg.find("ERROR: ") == 0:
|
85 |
-
|
86 |
"'{}' dosen't work. {}".format(
|
87 |
tenant["llm_id"],
|
88 |
msg))
|
89 |
embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
|
90 |
v, c = embd_mdl.encode(["Hello!"])
|
91 |
if c == 0:
|
92 |
-
|
93 |
"'{}' dosen't work!".format(
|
94 |
tenant["embd_id"]))
|
95 |
|
@@ -172,7 +172,7 @@ def add_graph_templates():
|
|
172 |
except:
|
173 |
CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
|
174 |
except Exception:
|
175 |
-
|
176 |
|
177 |
|
178 |
def init_web_data():
|
@@ -183,7 +183,7 @@ def init_web_data():
|
|
183 |
# init_superuser()
|
184 |
|
185 |
add_graph_templates()
|
186 |
-
|
187 |
|
188 |
|
189 |
if __name__ == '__main__':
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
import base64
|
18 |
import json
|
19 |
import os
|
|
|
31 |
from api.db.services.user_service import TenantService, UserTenantService
|
32 |
from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY, LLM_BASE_URL
|
33 |
from api.utils.file_utils import get_project_base_directory
|
|
|
34 |
|
35 |
|
36 |
def encode_to_base64(input_string):
|
|
|
70 |
"api_key": API_KEY, "api_base": LLM_BASE_URL})
|
71 |
|
72 |
if not UserService.save(**user_info):
|
73 |
+
logging.error("can't init admin.")
|
74 |
return
|
75 |
TenantService.insert(**tenant)
|
76 |
UserTenantService.insert(**usr_tenant)
|
77 |
TenantLLMService.insert_many(tenant_llm)
|
78 |
+
logging.info(
|
79 |
+
"Super user initialized. email: [email protected], password: admin. Changing the password after login is strongly recommended.")
|
80 |
|
81 |
chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
|
82 |
msg = chat_mdl.chat(system="", history=[
|
83 |
{"role": "user", "content": "Hello!"}], gen_conf={})
|
84 |
if msg.find("ERROR: ") == 0:
|
85 |
+
logging.error(
|
86 |
"'{}' dosen't work. {}".format(
|
87 |
tenant["llm_id"],
|
88 |
msg))
|
89 |
embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
|
90 |
v, c = embd_mdl.encode(["Hello!"])
|
91 |
if c == 0:
|
92 |
+
logging.error(
|
93 |
"'{}' dosen't work!".format(
|
94 |
tenant["embd_id"]))
|
95 |
|
|
|
172 |
except:
|
173 |
CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
|
174 |
except Exception:
|
175 |
+
logging.exception("Add graph templates error: ")
|
176 |
|
177 |
|
178 |
def init_web_data():
|
|
|
183 |
# init_superuser()
|
184 |
|
185 |
add_graph_templates()
|
186 |
+
logging.info("init web data success:{}".format(time.time() - start_time))
|
187 |
|
188 |
|
189 |
if __name__ == '__main__':
|
api/db/services/dialog_service.py
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
import binascii
|
17 |
import os
|
18 |
import json
|
@@ -31,7 +32,6 @@ from rag.app.resume import forbidden_select_fields4resume
|
|
31 |
from rag.nlp.search import index_name
|
32 |
from rag.utils import rmSpace, num_tokens_from_string, encoder
|
33 |
from api.utils.file_utils import get_project_base_directory
|
34 |
-
from api.utils.log_utils import logger
|
35 |
|
36 |
|
37 |
class DialogService(CommonService):
|
@@ -178,7 +178,7 @@ def chat(dialog, messages, stream=True, **kwargs):
|
|
178 |
tts_mdl = LLMBundle(dialog.tenant_id, LLMType.TTS)
|
179 |
# try to use sql if field mapping is good to go
|
180 |
if field_map:
|
181 |
-
|
182 |
ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True))
|
183 |
if ans:
|
184 |
yield ans
|
@@ -220,7 +220,7 @@ def chat(dialog, messages, stream=True, **kwargs):
|
|
220 |
doc_ids=attachments,
|
221 |
top=dialog.top_k, aggs=False, rerank_mdl=rerank_mdl)
|
222 |
knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]]
|
223 |
-
|
224 |
"{}->{}".format(" ".join(questions), "\n->".join(knowledges)))
|
225 |
retrieval_tm = timer()
|
226 |
|
@@ -292,7 +292,7 @@ def chat(dialog, messages, stream=True, **kwargs):
|
|
292 |
yield decorate_answer(answer)
|
293 |
else:
|
294 |
answer = chat_mdl.chat(prompt, msg[1:], gen_conf)
|
295 |
-
|
296 |
msg[-1]["content"], answer))
|
297 |
res = decorate_answer(answer)
|
298 |
res["audio_binary"] = tts(tts_mdl, answer)
|
@@ -320,7 +320,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
|
|
320 |
nonlocal sys_prompt, user_promt, question, tried_times
|
321 |
sql = chat_mdl.chat(sys_prompt, [{"role": "user", "content": user_promt}], {
|
322 |
"temperature": 0.06})
|
323 |
-
|
324 |
sql = re.sub(r"[\r\n]+", " ", sql.lower())
|
325 |
sql = re.sub(r".*select ", "select ", sql.lower())
|
326 |
sql = re.sub(r" +", " ", sql)
|
@@ -340,7 +340,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
|
|
340 |
flds.append(k)
|
341 |
sql = "select doc_id,docnm_kwd," + ",".join(flds) + sql[8:]
|
342 |
|
343 |
-
|
344 |
tried_times += 1
|
345 |
return retrievaler.sql_retrieval(sql, format="json"), sql
|
346 |
|
@@ -369,9 +369,9 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
|
|
369 |
question, sql, tbl["error"]
|
370 |
)
|
371 |
tbl, sql = get_table()
|
372 |
-
|
373 |
|
374 |
-
|
375 |
if tbl.get("error") or len(tbl["rows"]) == 0:
|
376 |
return None
|
377 |
|
@@ -401,7 +401,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
|
|
401 |
rows = re.sub(r"T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+Z)?\|", "|", rows)
|
402 |
|
403 |
if not docid_idx or not docnm_idx:
|
404 |
-
|
405 |
return {
|
406 |
"answer": "\n".join([clmns, line, rows]),
|
407 |
"reference": {"chunks": [], "doc_aggs": []},
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
import binascii
|
18 |
import os
|
19 |
import json
|
|
|
32 |
from rag.nlp.search import index_name
|
33 |
from rag.utils import rmSpace, num_tokens_from_string, encoder
|
34 |
from api.utils.file_utils import get_project_base_directory
|
|
|
35 |
|
36 |
|
37 |
class DialogService(CommonService):
|
|
|
178 |
tts_mdl = LLMBundle(dialog.tenant_id, LLMType.TTS)
|
179 |
# try to use sql if field mapping is good to go
|
180 |
if field_map:
|
181 |
+
logging.debug("Use SQL to retrieval:{}".format(questions[-1]))
|
182 |
ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True))
|
183 |
if ans:
|
184 |
yield ans
|
|
|
220 |
doc_ids=attachments,
|
221 |
top=dialog.top_k, aggs=False, rerank_mdl=rerank_mdl)
|
222 |
knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]]
|
223 |
+
logging.debug(
|
224 |
"{}->{}".format(" ".join(questions), "\n->".join(knowledges)))
|
225 |
retrieval_tm = timer()
|
226 |
|
|
|
292 |
yield decorate_answer(answer)
|
293 |
else:
|
294 |
answer = chat_mdl.chat(prompt, msg[1:], gen_conf)
|
295 |
+
logging.debug("User: {}|Assistant: {}".format(
|
296 |
msg[-1]["content"], answer))
|
297 |
res = decorate_answer(answer)
|
298 |
res["audio_binary"] = tts(tts_mdl, answer)
|
|
|
320 |
nonlocal sys_prompt, user_promt, question, tried_times
|
321 |
sql = chat_mdl.chat(sys_prompt, [{"role": "user", "content": user_promt}], {
|
322 |
"temperature": 0.06})
|
323 |
+
logging.debug(f"{question} ==> {user_promt} get SQL: {sql}")
|
324 |
sql = re.sub(r"[\r\n]+", " ", sql.lower())
|
325 |
sql = re.sub(r".*select ", "select ", sql.lower())
|
326 |
sql = re.sub(r" +", " ", sql)
|
|
|
340 |
flds.append(k)
|
341 |
sql = "select doc_id,docnm_kwd," + ",".join(flds) + sql[8:]
|
342 |
|
343 |
+
logging.debug(f"{question} get SQL(refined): {sql}")
|
344 |
tried_times += 1
|
345 |
return retrievaler.sql_retrieval(sql, format="json"), sql
|
346 |
|
|
|
369 |
question, sql, tbl["error"]
|
370 |
)
|
371 |
tbl, sql = get_table()
|
372 |
+
logging.debug("TRY it again: {}".format(sql))
|
373 |
|
374 |
+
logging.debug("GET table: {}".format(tbl))
|
375 |
if tbl.get("error") or len(tbl["rows"]) == 0:
|
376 |
return None
|
377 |
|
|
|
401 |
rows = re.sub(r"T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+Z)?\|", "|", rows)
|
402 |
|
403 |
if not docid_idx or not docnm_idx:
|
404 |
+
logging.warning("SQL missing field: " + sql)
|
405 |
return {
|
406 |
"answer": "\n".join([clmns, line, rows]),
|
407 |
"reference": {"chunks": [], "doc_aggs": []},
|
api/db/services/document_service.py
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
import hashlib
|
17 |
import json
|
18 |
import random
|
@@ -39,7 +40,6 @@ from api.db.services.common_service import CommonService
|
|
39 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
40 |
from api.db import StatusEnum
|
41 |
from rag.utils.redis_conn import REDIS_CONN
|
42 |
-
from api.utils.log_utils import logger
|
43 |
|
44 |
|
45 |
class DocumentService(CommonService):
|
@@ -387,7 +387,7 @@ class DocumentService(CommonService):
|
|
387 |
cls.update_by_id(d["id"], info)
|
388 |
except Exception as e:
|
389 |
if str(e).find("'0'") < 0:
|
390 |
-
|
391 |
|
392 |
@classmethod
|
393 |
@DB.connection_context()
|
@@ -544,7 +544,7 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
|
|
544 |
"knowledge_graph_kwd": "mind_map"
|
545 |
})
|
546 |
except Exception as e:
|
547 |
-
|
548 |
|
549 |
vects = embedding(doc_id, [c["content_with_weight"] for c in cks])
|
550 |
assert len(cks) == len(vects)
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
import hashlib
|
18 |
import json
|
19 |
import random
|
|
|
40 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
41 |
from api.db import StatusEnum
|
42 |
from rag.utils.redis_conn import REDIS_CONN
|
|
|
43 |
|
44 |
|
45 |
class DocumentService(CommonService):
|
|
|
387 |
cls.update_by_id(d["id"], info)
|
388 |
except Exception as e:
|
389 |
if str(e).find("'0'") < 0:
|
390 |
+
logging.exception("fetch task exception")
|
391 |
|
392 |
@classmethod
|
393 |
@DB.connection_context()
|
|
|
544 |
"knowledge_graph_kwd": "mind_map"
|
545 |
})
|
546 |
except Exception as e:
|
547 |
+
logging.exception("Mind map generation error")
|
548 |
|
549 |
vects = embedding(doc_id, [c["content_with_weight"] for c in cks])
|
550 |
assert len(cks) == len(vects)
|
api/db/services/file_service.py
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
import re
|
17 |
import os
|
18 |
from concurrent.futures import ThreadPoolExecutor
|
@@ -30,7 +31,6 @@ from api.db.services.file2document_service import File2DocumentService
|
|
30 |
from api.utils import get_uuid
|
31 |
from api.utils.file_utils import filename_type, thumbnail_img
|
32 |
from rag.utils.storage_factory import STORAGE_IMPL
|
33 |
-
from api.utils.log_utils import logger
|
34 |
|
35 |
|
36 |
class FileService(CommonService):
|
@@ -276,7 +276,7 @@ class FileService(CommonService):
|
|
276 |
return cls.model.delete().where((cls.model.tenant_id == user_id)
|
277 |
& (cls.model.id == folder_id)).execute(),
|
278 |
except Exception:
|
279 |
-
|
280 |
raise RuntimeError("Database error (File retrieval)!")
|
281 |
|
282 |
@classmethod
|
@@ -325,7 +325,7 @@ class FileService(CommonService):
|
|
325 |
try:
|
326 |
cls.filter_update((cls.model.id << file_ids, ), { 'parent_id': folder_id })
|
327 |
except Exception:
|
328 |
-
|
329 |
raise RuntimeError("Database error (File move)!")
|
330 |
|
331 |
@classmethod
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
import re
|
18 |
import os
|
19 |
from concurrent.futures import ThreadPoolExecutor
|
|
|
31 |
from api.utils import get_uuid
|
32 |
from api.utils.file_utils import filename_type, thumbnail_img
|
33 |
from rag.utils.storage_factory import STORAGE_IMPL
|
|
|
34 |
|
35 |
|
36 |
class FileService(CommonService):
|
|
|
276 |
return cls.model.delete().where((cls.model.tenant_id == user_id)
|
277 |
& (cls.model.id == folder_id)).execute(),
|
278 |
except Exception:
|
279 |
+
logging.exception("delete_folder_by_pf_id")
|
280 |
raise RuntimeError("Database error (File retrieval)!")
|
281 |
|
282 |
@classmethod
|
|
|
325 |
try:
|
326 |
cls.filter_update((cls.model.id << file_ids, ), { 'parent_id': folder_id })
|
327 |
except Exception:
|
328 |
+
logging.exception("move_file")
|
329 |
raise RuntimeError("Database error (File move)!")
|
330 |
|
331 |
@classmethod
|
api/db/services/llm_service.py
CHANGED
@@ -13,13 +13,13 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
from api.db.services.user_service import TenantService
|
17 |
from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel, TTSModel
|
18 |
from api.db import LLMType
|
19 |
from api.db.db_models import DB
|
20 |
from api.db.db_models import LLMFactories, LLM, TenantLLM
|
21 |
from api.db.services.common_service import CommonService
|
22 |
-
from api.utils.log_utils import logger
|
23 |
|
24 |
|
25 |
class LLMFactoriesService(CommonService):
|
@@ -209,7 +209,7 @@ class LLMBundle(object):
|
|
209 |
emd, used_tokens = self.mdl.encode(texts, batch_size)
|
210 |
if not TenantLLMService.increase_usage(
|
211 |
self.tenant_id, self.llm_type, used_tokens):
|
212 |
-
|
213 |
"LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
214 |
return emd, used_tokens
|
215 |
|
@@ -217,7 +217,7 @@ class LLMBundle(object):
|
|
217 |
emd, used_tokens = self.mdl.encode_queries(query)
|
218 |
if not TenantLLMService.increase_usage(
|
219 |
self.tenant_id, self.llm_type, used_tokens):
|
220 |
-
|
221 |
"LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
222 |
return emd, used_tokens
|
223 |
|
@@ -225,7 +225,7 @@ class LLMBundle(object):
|
|
225 |
sim, used_tokens = self.mdl.similarity(query, texts)
|
226 |
if not TenantLLMService.increase_usage(
|
227 |
self.tenant_id, self.llm_type, used_tokens):
|
228 |
-
|
229 |
"LLMBundle.similarity can't update token usage for {}/RERANK used_tokens: {}".format(self.tenant_id, used_tokens))
|
230 |
return sim, used_tokens
|
231 |
|
@@ -233,7 +233,7 @@ class LLMBundle(object):
|
|
233 |
txt, used_tokens = self.mdl.describe(image, max_tokens)
|
234 |
if not TenantLLMService.increase_usage(
|
235 |
self.tenant_id, self.llm_type, used_tokens):
|
236 |
-
|
237 |
"LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
|
238 |
return txt
|
239 |
|
@@ -241,7 +241,7 @@ class LLMBundle(object):
|
|
241 |
txt, used_tokens = self.mdl.transcription(audio)
|
242 |
if not TenantLLMService.increase_usage(
|
243 |
self.tenant_id, self.llm_type, used_tokens):
|
244 |
-
|
245 |
"LLMBundle.transcription can't update token usage for {}/SEQUENCE2TXT used_tokens: {}".format(self.tenant_id, used_tokens))
|
246 |
return txt
|
247 |
|
@@ -250,7 +250,7 @@ class LLMBundle(object):
|
|
250 |
if isinstance(chunk,int):
|
251 |
if not TenantLLMService.increase_usage(
|
252 |
self.tenant_id, self.llm_type, chunk, self.llm_name):
|
253 |
-
|
254 |
"LLMBundle.tts can't update token usage for {}/TTS".format(self.tenant_id))
|
255 |
return
|
256 |
yield chunk
|
@@ -259,7 +259,7 @@ class LLMBundle(object):
|
|
259 |
txt, used_tokens = self.mdl.chat(system, history, gen_conf)
|
260 |
if isinstance(txt, int) and not TenantLLMService.increase_usage(
|
261 |
self.tenant_id, self.llm_type, used_tokens, self.llm_name):
|
262 |
-
|
263 |
"LLMBundle.chat can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, used_tokens))
|
264 |
return txt
|
265 |
|
@@ -268,7 +268,7 @@ class LLMBundle(object):
|
|
268 |
if isinstance(txt, int):
|
269 |
if not TenantLLMService.increase_usage(
|
270 |
self.tenant_id, self.llm_type, txt, self.llm_name):
|
271 |
-
|
272 |
"LLMBundle.chat_streamly can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name, txt))
|
273 |
return
|
274 |
yield txt
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
from api.db.services.user_service import TenantService
|
18 |
from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel, TTSModel
|
19 |
from api.db import LLMType
|
20 |
from api.db.db_models import DB
|
21 |
from api.db.db_models import LLMFactories, LLM, TenantLLM
|
22 |
from api.db.services.common_service import CommonService
|
|
|
23 |
|
24 |
|
25 |
class LLMFactoriesService(CommonService):
|
|
|
209 |
emd, used_tokens = self.mdl.encode(texts, batch_size)
|
210 |
if not TenantLLMService.increase_usage(
|
211 |
self.tenant_id, self.llm_type, used_tokens):
|
212 |
+
logging.error(
|
213 |
"LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
214 |
return emd, used_tokens
|
215 |
|
|
|
217 |
emd, used_tokens = self.mdl.encode_queries(query)
|
218 |
if not TenantLLMService.increase_usage(
|
219 |
self.tenant_id, self.llm_type, used_tokens):
|
220 |
+
logging.error(
|
221 |
"LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
|
222 |
return emd, used_tokens
|
223 |
|
|
|
225 |
sim, used_tokens = self.mdl.similarity(query, texts)
|
226 |
if not TenantLLMService.increase_usage(
|
227 |
self.tenant_id, self.llm_type, used_tokens):
|
228 |
+
logging.error(
|
229 |
"LLMBundle.similarity can't update token usage for {}/RERANK used_tokens: {}".format(self.tenant_id, used_tokens))
|
230 |
return sim, used_tokens
|
231 |
|
|
|
233 |
txt, used_tokens = self.mdl.describe(image, max_tokens)
|
234 |
if not TenantLLMService.increase_usage(
|
235 |
self.tenant_id, self.llm_type, used_tokens):
|
236 |
+
logging.error(
|
237 |
"LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
|
238 |
return txt
|
239 |
|
|
|
241 |
txt, used_tokens = self.mdl.transcription(audio)
|
242 |
if not TenantLLMService.increase_usage(
|
243 |
self.tenant_id, self.llm_type, used_tokens):
|
244 |
+
logging.error(
|
245 |
"LLMBundle.transcription can't update token usage for {}/SEQUENCE2TXT used_tokens: {}".format(self.tenant_id, used_tokens))
|
246 |
return txt
|
247 |
|
|
|
250 |
if isinstance(chunk,int):
|
251 |
if not TenantLLMService.increase_usage(
|
252 |
self.tenant_id, self.llm_type, chunk, self.llm_name):
|
253 |
+
logging.error(
|
254 |
"LLMBundle.tts can't update token usage for {}/TTS".format(self.tenant_id))
|
255 |
return
|
256 |
yield chunk
|
|
|
259 |
txt, used_tokens = self.mdl.chat(system, history, gen_conf)
|
260 |
if isinstance(txt, int) and not TenantLLMService.increase_usage(
|
261 |
self.tenant_id, self.llm_type, used_tokens, self.llm_name):
|
262 |
+
logging.error(
|
263 |
"LLMBundle.chat can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, used_tokens))
|
264 |
return txt
|
265 |
|
|
|
268 |
if isinstance(txt, int):
|
269 |
if not TenantLLMService.increase_usage(
|
270 |
self.tenant_id, self.llm_type, txt, self.llm_name):
|
271 |
+
logging.error(
|
272 |
"LLMBundle.chat_streamly can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name, txt))
|
273 |
return
|
274 |
yield txt
|
api/ragflow_server.py
CHANGED
@@ -15,6 +15,17 @@
|
|
15 |
#
|
16 |
|
17 |
import logging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
import os
|
19 |
import signal
|
20 |
import sys
|
@@ -22,7 +33,6 @@ import time
|
|
22 |
import traceback
|
23 |
from concurrent.futures import ThreadPoolExecutor
|
24 |
|
25 |
-
import validation
|
26 |
from werkzeug.serving import run_simple
|
27 |
from api.apps import app
|
28 |
from api.db.runtime_config import RuntimeConfig
|
@@ -31,7 +41,6 @@ from api.settings import (
|
|
31 |
HOST, HTTP_PORT
|
32 |
)
|
33 |
from api import utils
|
34 |
-
from api.utils.log_utils import logger
|
35 |
|
36 |
from api.db.db_models import init_database_tables as init_web_db
|
37 |
from api.db.init_data import init_web_data
|
@@ -44,11 +53,11 @@ def update_progress():
|
|
44 |
try:
|
45 |
DocumentService.update_progress()
|
46 |
except Exception:
|
47 |
-
|
48 |
|
49 |
|
50 |
if __name__ == '__main__':
|
51 |
-
|
52 |
____ ___ ______ ______ __
|
53 |
/ __ \ / | / ____// ____// /____ _ __
|
54 |
/ /_/ // /| | / / __ / /_ / // __ \| | /| / /
|
@@ -56,10 +65,10 @@ if __name__ == '__main__':
|
|
56 |
/_/ |_|/_/ |_|\____//_/ /_/ \____/ |__/|__/
|
57 |
|
58 |
""")
|
59 |
-
|
60 |
f'RAGFlow version: {RAGFLOW_VERSION_INFO}'
|
61 |
)
|
62 |
-
|
63 |
f'project base: {utils.file_utils.get_project_base_directory()}'
|
64 |
)
|
65 |
|
@@ -83,26 +92,18 @@ if __name__ == '__main__':
|
|
83 |
|
84 |
RuntimeConfig.DEBUG = args.debug
|
85 |
if RuntimeConfig.DEBUG:
|
86 |
-
|
87 |
|
88 |
RuntimeConfig.init_env()
|
89 |
RuntimeConfig.init_config(JOB_SERVER_HOST=HOST, HTTP_PORT=HTTP_PORT)
|
90 |
|
91 |
-
peewee_logger = logging.getLogger("peewee")
|
92 |
-
peewee_logger.propagate = False
|
93 |
-
# rag_arch.common.log.ROpenHandler
|
94 |
-
peewee_logger.addHandler(logger.handlers[0])
|
95 |
-
peewee_logger.setLevel(logger.handlers[0].level)
|
96 |
|
97 |
thr = ThreadPoolExecutor(max_workers=1)
|
98 |
thr.submit(update_progress)
|
99 |
|
100 |
# start http server
|
101 |
try:
|
102 |
-
|
103 |
-
werkzeug_logger = logging.getLogger("werkzeug")
|
104 |
-
for h in logger.handlers:
|
105 |
-
werkzeug_logger.addHandler(h)
|
106 |
run_simple(
|
107 |
hostname=HOST,
|
108 |
port=HTTP_PORT,
|
|
|
15 |
#
|
16 |
|
17 |
import logging
|
18 |
+
import inspect
|
19 |
+
from api.utils.log_utils import initRootLogger
|
20 |
+
initRootLogger(inspect.getfile(inspect.currentframe()))
|
21 |
+
for module in ["pdfminer"]:
|
22 |
+
module_logger = logging.getLogger(module)
|
23 |
+
module_logger.setLevel(logging.WARNING)
|
24 |
+
for module in ["peewee"]:
|
25 |
+
module_logger = logging.getLogger(module)
|
26 |
+
module_logger.handlers.clear()
|
27 |
+
module_logger.propagate = True
|
28 |
+
|
29 |
import os
|
30 |
import signal
|
31 |
import sys
|
|
|
33 |
import traceback
|
34 |
from concurrent.futures import ThreadPoolExecutor
|
35 |
|
|
|
36 |
from werkzeug.serving import run_simple
|
37 |
from api.apps import app
|
38 |
from api.db.runtime_config import RuntimeConfig
|
|
|
41 |
HOST, HTTP_PORT
|
42 |
)
|
43 |
from api import utils
|
|
|
44 |
|
45 |
from api.db.db_models import init_database_tables as init_web_db
|
46 |
from api.db.init_data import init_web_data
|
|
|
53 |
try:
|
54 |
DocumentService.update_progress()
|
55 |
except Exception:
|
56 |
+
logging.exception("update_progress exception")
|
57 |
|
58 |
|
59 |
if __name__ == '__main__':
|
60 |
+
logging.info(r"""
|
61 |
____ ___ ______ ______ __
|
62 |
/ __ \ / | / ____// ____// /____ _ __
|
63 |
/ /_/ // /| | / / __ / /_ / // __ \| | /| / /
|
|
|
65 |
/_/ |_|/_/ |_|\____//_/ /_/ \____/ |__/|__/
|
66 |
|
67 |
""")
|
68 |
+
logging.info(
|
69 |
f'RAGFlow version: {RAGFLOW_VERSION_INFO}'
|
70 |
)
|
71 |
+
logging.info(
|
72 |
f'project base: {utils.file_utils.get_project_base_directory()}'
|
73 |
)
|
74 |
|
|
|
92 |
|
93 |
RuntimeConfig.DEBUG = args.debug
|
94 |
if RuntimeConfig.DEBUG:
|
95 |
+
logging.info("run on debug mode")
|
96 |
|
97 |
RuntimeConfig.init_env()
|
98 |
RuntimeConfig.init_config(JOB_SERVER_HOST=HOST, HTTP_PORT=HTTP_PORT)
|
99 |
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
thr = ThreadPoolExecutor(max_workers=1)
|
102 |
thr.submit(update_progress)
|
103 |
|
104 |
# start http server
|
105 |
try:
|
106 |
+
logging.info("RAG Flow http server start...")
|
|
|
|
|
|
|
107 |
run_simple(
|
108 |
hostname=HOST,
|
109 |
port=HTTP_PORT,
|
api/utils/api_utils.py
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
import functools
|
17 |
import json
|
18 |
import random
|
@@ -40,7 +41,6 @@ from api.settings import (
|
|
40 |
from api.settings import RetCode
|
41 |
from api.utils import CustomJSONEncoder, get_uuid
|
42 |
from api.utils import json_dumps
|
43 |
-
from api.utils.log_utils import logger
|
44 |
|
45 |
requests.models.complexjson.dumps = functools.partial(
|
46 |
json.dumps, cls=CustomJSONEncoder)
|
@@ -118,7 +118,7 @@ def get_data_error_result(code=RetCode.DATA_ERROR,
|
|
118 |
|
119 |
|
120 |
def server_error_response(e):
|
121 |
-
|
122 |
try:
|
123 |
if e.code == 401:
|
124 |
return get_json_result(code=401, message=repr(e))
|
@@ -259,7 +259,7 @@ def construct_json_result(code=RetCode.SUCCESS, message='success', data=None):
|
|
259 |
|
260 |
|
261 |
def construct_error_response(e):
|
262 |
-
|
263 |
try:
|
264 |
if e.code == 401:
|
265 |
return construct_json_result(code=RetCode.UNAUTHORIZED, message=repr(e))
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
import functools
|
18 |
import json
|
19 |
import random
|
|
|
41 |
from api.settings import RetCode
|
42 |
from api.utils import CustomJSONEncoder, get_uuid
|
43 |
from api.utils import json_dumps
|
|
|
44 |
|
45 |
requests.models.complexjson.dumps = functools.partial(
|
46 |
json.dumps, cls=CustomJSONEncoder)
|
|
|
118 |
|
119 |
|
120 |
def server_error_response(e):
|
121 |
+
logging.exception(e)
|
122 |
try:
|
123 |
if e.code == 401:
|
124 |
return get_json_result(code=401, message=repr(e))
|
|
|
259 |
|
260 |
|
261 |
def construct_error_response(e):
|
262 |
+
logging.exception(e)
|
263 |
try:
|
264 |
if e.code == 401:
|
265 |
return construct_json_result(code=RetCode.UNAUTHORIZED, message=repr(e))
|
api/utils/log_utils.py
CHANGED
@@ -14,38 +14,41 @@
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
import os
|
|
|
17 |
import logging
|
18 |
from logging.handlers import RotatingFileHandler
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
41 |
logger.addHandler(handler1)
|
42 |
|
43 |
handler2 = logging.StreamHandler()
|
44 |
-
handler2.setLevel(
|
45 |
-
|
46 |
-
handler2.setFormatter(formatter2)
|
47 |
logger.addHandler(handler2)
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
logger = getLogger()
|
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
import os
|
17 |
+
import os.path
|
18 |
import logging
|
19 |
from logging.handlers import RotatingFileHandler
|
20 |
|
21 |
+
def get_project_base_directory():
|
22 |
+
PROJECT_BASE = os.path.abspath(
|
23 |
+
os.path.join(
|
24 |
+
os.path.dirname(os.path.realpath(__file__)),
|
25 |
+
os.pardir,
|
26 |
+
os.pardir,
|
27 |
+
)
|
28 |
+
)
|
29 |
+
return PROJECT_BASE
|
30 |
+
|
31 |
+
def initRootLogger(script_path: str, log_level: int = logging.INFO, log_format: str = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"):
|
32 |
+
logger = logging.getLogger()
|
33 |
+
if logger.hasHandlers():
|
34 |
+
return
|
35 |
+
|
36 |
+
script_name = os.path.basename(script_path)
|
37 |
+
log_path = os.path.abspath(os.path.join(get_project_base_directory(), "logs", f"{os.path.splitext(script_name)[0]}.log"))
|
38 |
+
|
39 |
+
os.makedirs(os.path.dirname(log_path), exist_ok=True)
|
40 |
+
logger.setLevel(log_level)
|
41 |
+
formatter = logging.Formatter(log_format)
|
42 |
+
|
43 |
+
handler1 = RotatingFileHandler(log_path, maxBytes=10*1024*1024, backupCount=5)
|
44 |
+
handler1.setLevel(log_level)
|
45 |
+
handler1.setFormatter(formatter)
|
46 |
logger.addHandler(handler1)
|
47 |
|
48 |
handler2 = logging.StreamHandler()
|
49 |
+
handler2.setLevel(log_level)
|
50 |
+
handler2.setFormatter(formatter)
|
|
|
51 |
logger.addHandler(handler2)
|
52 |
|
53 |
+
msg = f"{script_name} log path: {log_path}"
|
54 |
+
logger.info(msg)
|
|
api/validation.py
CHANGED
@@ -14,20 +14,20 @@
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
|
|
|
17 |
import sys
|
18 |
-
from api.utils.log_utils import logger
|
19 |
|
20 |
|
21 |
def python_version_validation():
|
22 |
# Check python version
|
23 |
required_python_version = (3, 10)
|
24 |
if sys.version_info < required_python_version:
|
25 |
-
|
26 |
f"Required Python: >= {required_python_version[0]}.{required_python_version[1]}. Current Python version: {sys.version_info[0]}.{sys.version_info[1]}."
|
27 |
)
|
28 |
sys.exit(1)
|
29 |
else:
|
30 |
-
|
31 |
|
32 |
|
33 |
python_version_validation()
|
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
|
17 |
+
import logging
|
18 |
import sys
|
|
|
19 |
|
20 |
|
21 |
def python_version_validation():
|
22 |
# Check python version
|
23 |
required_python_version = (3, 10)
|
24 |
if sys.version_info < required_python_version:
|
25 |
+
logging.info(
|
26 |
f"Required Python: >= {required_python_version[0]}.{required_python_version[1]}. Current Python version: {sys.version_info[0]}.{sys.version_info[1]}."
|
27 |
)
|
28 |
sys.exit(1)
|
29 |
else:
|
30 |
+
logging.info(f"Python version: {sys.version_info[0]}.{sys.version_info[1]}")
|
31 |
|
32 |
|
33 |
python_version_validation()
|
deepdoc/parser/pdf_parser.py
CHANGED
@@ -11,6 +11,7 @@
|
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
|
|
|
14 |
import os
|
15 |
import random
|
16 |
|
@@ -18,7 +19,6 @@ import xgboost as xgb
|
|
18 |
from io import BytesIO
|
19 |
import re
|
20 |
import pdfplumber
|
21 |
-
import logging
|
22 |
from PIL import Image
|
23 |
import numpy as np
|
24 |
from timeit import default_timer as timer
|
@@ -26,15 +26,11 @@ from pypdf import PdfReader as pdf2_read
|
|
26 |
|
27 |
from api.settings import LIGHTEN
|
28 |
from api.utils.file_utils import get_project_base_directory
|
29 |
-
from api.utils.log_utils import logger
|
30 |
from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
|
31 |
from rag.nlp import rag_tokenizer
|
32 |
from copy import deepcopy
|
33 |
from huggingface_hub import snapshot_download
|
34 |
|
35 |
-
logging.getLogger("pdfminer").setLevel(logging.WARNING)
|
36 |
-
|
37 |
-
|
38 |
class RAGFlowPdfParser:
|
39 |
def __init__(self):
|
40 |
self.ocr = OCR()
|
@@ -51,7 +47,7 @@ class RAGFlowPdfParser:
|
|
51 |
if torch.cuda.is_available():
|
52 |
self.updown_cnt_mdl.set_param({"device": "cuda"})
|
53 |
except Exception:
|
54 |
-
|
55 |
try:
|
56 |
model_dir = os.path.join(
|
57 |
get_project_base_directory(),
|
@@ -188,7 +184,7 @@ class RAGFlowPdfParser:
|
|
188 |
return True
|
189 |
|
190 |
def _table_transformer_job(self, ZM):
|
191 |
-
|
192 |
imgs, pos = [], []
|
193 |
tbcnt = [0]
|
194 |
MARGIN = 10
|
@@ -426,7 +422,7 @@ class RAGFlowPdfParser:
|
|
426 |
detach_feats = [b["x1"] < b_["x0"],
|
427 |
b["x0"] > b_["x1"]]
|
428 |
if (any(feats) and not any(concatting_feats)) or any(detach_feats):
|
429 |
-
|
430 |
b["text"],
|
431 |
b_["text"],
|
432 |
any(feats),
|
@@ -727,14 +723,14 @@ class RAGFlowPdfParser:
|
|
727 |
# continue
|
728 |
if tv < fv and tk:
|
729 |
tables[tk].insert(0, c)
|
730 |
-
|
731 |
"TABLE:" +
|
732 |
self.boxes[i]["text"] +
|
733 |
"; Cap: " +
|
734 |
tk)
|
735 |
elif fk:
|
736 |
figures[fk].insert(0, c)
|
737 |
-
|
738 |
"FIGURE:" +
|
739 |
self.boxes[i]["text"] +
|
740 |
"; Cap: " +
|
@@ -761,7 +757,7 @@ class RAGFlowPdfParser:
|
|
761 |
if ii is not None:
|
762 |
b = louts[ii]
|
763 |
else:
|
764 |
-
|
765 |
f"Missing layout match: {pn + 1},%s" %
|
766 |
(bxs[0].get(
|
767 |
"layoutno", "")))
|
@@ -919,7 +915,7 @@ class RAGFlowPdfParser:
|
|
919 |
if usefull(boxes[0]):
|
920 |
dfs(boxes[0], 0)
|
921 |
else:
|
922 |
-
|
923 |
except Exception:
|
924 |
pass
|
925 |
boxes.pop(0)
|
@@ -928,7 +924,7 @@ class RAGFlowPdfParser:
|
|
928 |
res.append(
|
929 |
"\n".join([c["text"] + self._line_tag(c, ZM) for c in lines]))
|
930 |
else:
|
931 |
-
|
932 |
"<<".join([c["text"] for c in lines]))
|
933 |
|
934 |
return "\n\n".join(res)
|
@@ -940,7 +936,7 @@ class RAGFlowPdfParser:
|
|
940 |
fnm) if not binary else pdfplumber.open(BytesIO(binary))
|
941 |
return len(pdf.pages)
|
942 |
except Exception:
|
943 |
-
|
944 |
|
945 |
def __images__(self, fnm, zoomin=3, page_from=0,
|
946 |
page_to=299, callback=None):
|
@@ -964,7 +960,7 @@ class RAGFlowPdfParser:
|
|
964 |
self.pdf.pages[page_from:page_to]]
|
965 |
self.total_page = len(self.pdf.pages)
|
966 |
except Exception:
|
967 |
-
|
968 |
|
969 |
self.outlines = []
|
970 |
try:
|
@@ -980,11 +976,11 @@ class RAGFlowPdfParser:
|
|
980 |
|
981 |
dfs(outlines, 0)
|
982 |
except Exception as e:
|
983 |
-
|
984 |
if not self.outlines:
|
985 |
-
|
986 |
|
987 |
-
|
988 |
self.is_english = [re.search(r"[a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(
|
989 |
random.choices([c["text"] for c in self.page_chars[i]], k=min(100, len(self.page_chars[i]))))) for i in
|
990 |
range(len(self.page_chars))]
|
@@ -1024,7 +1020,7 @@ class RAGFlowPdfParser:
|
|
1024 |
self.is_english = re.search(r"[\na-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}",
|
1025 |
"".join([b["text"] for b in random.choices(bxes, k=min(30, len(bxes)))]))
|
1026 |
|
1027 |
-
|
1028 |
|
1029 |
self.page_cum_height = np.cumsum(self.page_cum_height)
|
1030 |
assert len(self.page_cum_height) == len(self.page_images) + 1
|
@@ -1164,9 +1160,9 @@ class PlainParser(object):
|
|
1164 |
|
1165 |
dfs(outlines, 0)
|
1166 |
except Exception:
|
1167 |
-
|
1168 |
if not self.outlines:
|
1169 |
-
|
1170 |
|
1171 |
return [(l, "") for l in lines], []
|
1172 |
|
|
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
|
14 |
+
import logging
|
15 |
import os
|
16 |
import random
|
17 |
|
|
|
19 |
from io import BytesIO
|
20 |
import re
|
21 |
import pdfplumber
|
|
|
22 |
from PIL import Image
|
23 |
import numpy as np
|
24 |
from timeit import default_timer as timer
|
|
|
26 |
|
27 |
from api.settings import LIGHTEN
|
28 |
from api.utils.file_utils import get_project_base_directory
|
|
|
29 |
from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
|
30 |
from rag.nlp import rag_tokenizer
|
31 |
from copy import deepcopy
|
32 |
from huggingface_hub import snapshot_download
|
33 |
|
|
|
|
|
|
|
34 |
class RAGFlowPdfParser:
|
35 |
def __init__(self):
|
36 |
self.ocr = OCR()
|
|
|
47 |
if torch.cuda.is_available():
|
48 |
self.updown_cnt_mdl.set_param({"device": "cuda"})
|
49 |
except Exception:
|
50 |
+
logging.exception("RAGFlowPdfParser __init__")
|
51 |
try:
|
52 |
model_dir = os.path.join(
|
53 |
get_project_base_directory(),
|
|
|
184 |
return True
|
185 |
|
186 |
def _table_transformer_job(self, ZM):
|
187 |
+
logging.debug("Table processing...")
|
188 |
imgs, pos = [], []
|
189 |
tbcnt = [0]
|
190 |
MARGIN = 10
|
|
|
422 |
detach_feats = [b["x1"] < b_["x0"],
|
423 |
b["x0"] > b_["x1"]]
|
424 |
if (any(feats) and not any(concatting_feats)) or any(detach_feats):
|
425 |
+
logging.debug("{} {} {} {}".format(
|
426 |
b["text"],
|
427 |
b_["text"],
|
428 |
any(feats),
|
|
|
723 |
# continue
|
724 |
if tv < fv and tk:
|
725 |
tables[tk].insert(0, c)
|
726 |
+
logging.debug(
|
727 |
"TABLE:" +
|
728 |
self.boxes[i]["text"] +
|
729 |
"; Cap: " +
|
730 |
tk)
|
731 |
elif fk:
|
732 |
figures[fk].insert(0, c)
|
733 |
+
logging.debug(
|
734 |
"FIGURE:" +
|
735 |
self.boxes[i]["text"] +
|
736 |
"; Cap: " +
|
|
|
757 |
if ii is not None:
|
758 |
b = louts[ii]
|
759 |
else:
|
760 |
+
logging.warn(
|
761 |
f"Missing layout match: {pn + 1},%s" %
|
762 |
(bxs[0].get(
|
763 |
"layoutno", "")))
|
|
|
915 |
if usefull(boxes[0]):
|
916 |
dfs(boxes[0], 0)
|
917 |
else:
|
918 |
+
logging.debug("WASTE: " + boxes[0]["text"])
|
919 |
except Exception:
|
920 |
pass
|
921 |
boxes.pop(0)
|
|
|
924 |
res.append(
|
925 |
"\n".join([c["text"] + self._line_tag(c, ZM) for c in lines]))
|
926 |
else:
|
927 |
+
logging.debug("REMOVED: " +
|
928 |
"<<".join([c["text"] for c in lines]))
|
929 |
|
930 |
return "\n\n".join(res)
|
|
|
936 |
fnm) if not binary else pdfplumber.open(BytesIO(binary))
|
937 |
return len(pdf.pages)
|
938 |
except Exception:
|
939 |
+
logging.exception("total_page_number")
|
940 |
|
941 |
def __images__(self, fnm, zoomin=3, page_from=0,
|
942 |
page_to=299, callback=None):
|
|
|
960 |
self.pdf.pages[page_from:page_to]]
|
961 |
self.total_page = len(self.pdf.pages)
|
962 |
except Exception:
|
963 |
+
logging.exception("RAGFlowPdfParser __images__")
|
964 |
|
965 |
self.outlines = []
|
966 |
try:
|
|
|
976 |
|
977 |
dfs(outlines, 0)
|
978 |
except Exception as e:
|
979 |
+
logging.warning(f"Outlines exception: {e}")
|
980 |
if not self.outlines:
|
981 |
+
logging.warning("Miss outlines")
|
982 |
|
983 |
+
logging.debug("Images converted.")
|
984 |
self.is_english = [re.search(r"[a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(
|
985 |
random.choices([c["text"] for c in self.page_chars[i]], k=min(100, len(self.page_chars[i]))))) for i in
|
986 |
range(len(self.page_chars))]
|
|
|
1020 |
self.is_english = re.search(r"[\na-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}",
|
1021 |
"".join([b["text"] for b in random.choices(bxes, k=min(30, len(bxes)))]))
|
1022 |
|
1023 |
+
logging.debug("Is it English:", self.is_english)
|
1024 |
|
1025 |
self.page_cum_height = np.cumsum(self.page_cum_height)
|
1026 |
assert len(self.page_cum_height) == len(self.page_images) + 1
|
|
|
1160 |
|
1161 |
dfs(outlines, 0)
|
1162 |
except Exception:
|
1163 |
+
logging.exception("Outlines exception")
|
1164 |
if not self.outlines:
|
1165 |
+
logging.warning("Miss outlines")
|
1166 |
|
1167 |
return [(l, "") for l in lines], []
|
1168 |
|
deepdoc/parser/resume/entities/corporations.py
CHANGED
@@ -11,13 +11,13 @@
|
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
|
|
|
14 |
import re
|
15 |
import json
|
16 |
import os
|
17 |
import pandas as pd
|
18 |
from rag.nlp import rag_tokenizer
|
19 |
from . import regions
|
20 |
-
from api.utils.log_utils import logger
|
21 |
|
22 |
|
23 |
current_file_path = os.path.dirname(os.path.abspath(__file__))
|
@@ -71,7 +71,7 @@ GOOD_CORP = set([corpNorm(rmNoise(c), False) for c in GOOD_CORP])
|
|
71 |
for c,v in CORP_TAG.items():
|
72 |
cc = corpNorm(rmNoise(c), False)
|
73 |
if not cc:
|
74 |
-
|
75 |
CORP_TAG = {corpNorm(rmNoise(c), False):v for c,v in CORP_TAG.items()}
|
76 |
|
77 |
def is_good(nm):
|
|
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
|
14 |
+
import logging
|
15 |
import re
|
16 |
import json
|
17 |
import os
|
18 |
import pandas as pd
|
19 |
from rag.nlp import rag_tokenizer
|
20 |
from . import regions
|
|
|
21 |
|
22 |
|
23 |
current_file_path = os.path.dirname(os.path.abspath(__file__))
|
|
|
71 |
for c,v in CORP_TAG.items():
|
72 |
cc = corpNorm(rmNoise(c), False)
|
73 |
if not cc:
|
74 |
+
logging.debug(c)
|
75 |
CORP_TAG = {corpNorm(rmNoise(c), False):v for c,v in CORP_TAG.items()}
|
76 |
|
77 |
def is_good(nm):
|
deepdoc/parser/resume/step_two.py
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
# See the License for the specific language governing permissions and
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
-
|
14 |
import re
|
15 |
import copy
|
16 |
import time
|
@@ -23,7 +23,6 @@ from deepdoc.parser.resume.entities import degrees, schools, corporations
|
|
23 |
from rag.nlp import rag_tokenizer, surname
|
24 |
from xpinyin import Pinyin
|
25 |
from contextlib import contextmanager
|
26 |
-
from api.utils.log_utils import logger
|
27 |
|
28 |
|
29 |
class TimeoutException(Exception): pass
|
@@ -164,7 +163,7 @@ def forEdu(cv):
|
|
164 |
y, m, d = getYMD(edu_end_dt)
|
165 |
cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
|
166 |
except Exception as e:
|
167 |
-
|
168 |
if sch:
|
169 |
cv["school_name_kwd"] = sch
|
170 |
if (len(cv.get("degree_kwd", [])) >= 1 and "本科" in cv["degree_kwd"]) \
|
@@ -276,7 +275,7 @@ def forWork(cv):
|
|
276 |
try:
|
277 |
duas.append((datetime.datetime.strptime(ed, "%Y-%m-%d") - datetime.datetime.strptime(st, "%Y-%m-%d")).days)
|
278 |
except Exception:
|
279 |
-
|
280 |
|
281 |
if n.get("scale"):
|
282 |
r = re.search(r"^([0-9]+)", str(n["scale"]))
|
@@ -333,7 +332,7 @@ def forWork(cv):
|
|
333 |
y, m, d = getYMD(work_st_tm)
|
334 |
cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
|
335 |
except Exception as e:
|
336 |
-
|
337 |
|
338 |
cv["job_num_int"] = 0
|
339 |
if duas:
|
@@ -464,7 +463,7 @@ def parse(cv):
|
|
464 |
cv[f"{t}_kwd"] = nms
|
465 |
cv[f"{t}_tks"] = rag_tokenizer.tokenize(" ".join(nms))
|
466 |
except Exception:
|
467 |
-
|
468 |
cv[k] = []
|
469 |
|
470 |
# tokenize fields
|
@@ -565,7 +564,7 @@ def parse(cv):
|
|
565 |
cv["work_start_dt"] = "%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
|
566 |
cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y)
|
567 |
except Exception as e:
|
568 |
-
|
569 |
if "work_exp_flt" not in cv and cv.get("work_experience", 0): cv["work_exp_flt"] = int(cv["work_experience"]) / 12.
|
570 |
|
571 |
keys = list(cv.keys())
|
@@ -580,7 +579,7 @@ def parse(cv):
|
|
580 |
|
581 |
cv["tob_resume_id"] = str(cv["tob_resume_id"])
|
582 |
cv["id"] = cv["tob_resume_id"]
|
583 |
-
|
584 |
|
585 |
return dealWithInt64(cv)
|
586 |
|
|
|
10 |
# See the License for the specific language governing permissions and
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
+
import logging
|
14 |
import re
|
15 |
import copy
|
16 |
import time
|
|
|
23 |
from rag.nlp import rag_tokenizer, surname
|
24 |
from xpinyin import Pinyin
|
25 |
from contextlib import contextmanager
|
|
|
26 |
|
27 |
|
28 |
class TimeoutException(Exception): pass
|
|
|
163 |
y, m, d = getYMD(edu_end_dt)
|
164 |
cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
|
165 |
except Exception as e:
|
166 |
+
logging.exception("forEdu {} {} {}".format(e, edu_end_dt, cv.get("work_exp_flt")))
|
167 |
if sch:
|
168 |
cv["school_name_kwd"] = sch
|
169 |
if (len(cv.get("degree_kwd", [])) >= 1 and "本科" in cv["degree_kwd"]) \
|
|
|
275 |
try:
|
276 |
duas.append((datetime.datetime.strptime(ed, "%Y-%m-%d") - datetime.datetime.strptime(st, "%Y-%m-%d")).days)
|
277 |
except Exception:
|
278 |
+
logging.exception("forWork {} {}".format(n.get("start_time"), n.get("end_time")))
|
279 |
|
280 |
if n.get("scale"):
|
281 |
r = re.search(r"^([0-9]+)", str(n["scale"]))
|
|
|
332 |
y, m, d = getYMD(work_st_tm)
|
333 |
cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
|
334 |
except Exception as e:
|
335 |
+
logging.exception("forWork {} {} {}".format(e, work_st_tm, cv.get("work_exp_flt")))
|
336 |
|
337 |
cv["job_num_int"] = 0
|
338 |
if duas:
|
|
|
463 |
cv[f"{t}_kwd"] = nms
|
464 |
cv[f"{t}_tks"] = rag_tokenizer.tokenize(" ".join(nms))
|
465 |
except Exception:
|
466 |
+
logging.exception("parse {} {}".format(str(traceback.format_exc()), cv[k]))
|
467 |
cv[k] = []
|
468 |
|
469 |
# tokenize fields
|
|
|
564 |
cv["work_start_dt"] = "%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
|
565 |
cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y)
|
566 |
except Exception as e:
|
567 |
+
logging.exception("parse {} ==> {}".format(e, cv.get("work_start_time")))
|
568 |
if "work_exp_flt" not in cv and cv.get("work_experience", 0): cv["work_exp_flt"] = int(cv["work_experience"]) / 12.
|
569 |
|
570 |
keys = list(cv.keys())
|
|
|
579 |
|
580 |
cv["tob_resume_id"] = str(cv["tob_resume_id"])
|
581 |
cv["id"] = cv["tob_resume_id"]
|
582 |
+
logging.debug("CCCCCCCCCCCCCCC")
|
583 |
|
584 |
return dealWithInt64(cv)
|
585 |
|
deepdoc/vision/operators.py
CHANGED
@@ -14,13 +14,13 @@
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
|
|
|
17 |
import sys
|
18 |
import six
|
19 |
import cv2
|
20 |
import numpy as np
|
21 |
import math
|
22 |
from PIL import Image
|
23 |
-
from api.utils.log_utils import logger
|
24 |
|
25 |
|
26 |
class DecodeImage(object):
|
@@ -403,7 +403,7 @@ class DetResizeForTest(object):
|
|
403 |
return None, (None, None)
|
404 |
img = cv2.resize(img, (int(resize_w), int(resize_h)))
|
405 |
except BaseException:
|
406 |
-
|
407 |
sys.exit(0)
|
408 |
ratio_h = resize_h / float(h)
|
409 |
ratio_w = resize_w / float(w)
|
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
|
17 |
+
import logging
|
18 |
import sys
|
19 |
import six
|
20 |
import cv2
|
21 |
import numpy as np
|
22 |
import math
|
23 |
from PIL import Image
|
|
|
24 |
|
25 |
|
26 |
class DecodeImage(object):
|
|
|
403 |
return None, (None, None)
|
404 |
img = cv2.resize(img, (int(resize_w), int(resize_h)))
|
405 |
except BaseException:
|
406 |
+
logging.exception("{} {} {}".format(img.shape, resize_w, resize_h))
|
407 |
sys.exit(0)
|
408 |
ratio_h = resize_h / float(h)
|
409 |
ratio_w = resize_w / float(w)
|
deepdoc/vision/recognizer.py
CHANGED
@@ -11,6 +11,7 @@
|
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
|
|
|
14 |
import os
|
15 |
from copy import deepcopy
|
16 |
|
@@ -19,7 +20,6 @@ from huggingface_hub import snapshot_download
|
|
19 |
|
20 |
from api.utils.file_utils import get_project_base_directory
|
21 |
from .operators import *
|
22 |
-
from api.utils.log_utils import logger
|
23 |
|
24 |
|
25 |
class Recognizer(object):
|
@@ -440,7 +440,7 @@ class Recognizer(object):
|
|
440 |
end_index = min((i + 1) * batch_size, len(imgs))
|
441 |
batch_image_list = imgs[start_index:end_index]
|
442 |
inputs = self.preprocess(batch_image_list)
|
443 |
-
|
444 |
for ins in inputs:
|
445 |
bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names})[0], ins, thr)
|
446 |
res.append(bb)
|
|
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
|
14 |
+
import logging
|
15 |
import os
|
16 |
from copy import deepcopy
|
17 |
|
|
|
20 |
|
21 |
from api.utils.file_utils import get_project_base_directory
|
22 |
from .operators import *
|
|
|
23 |
|
24 |
|
25 |
class Recognizer(object):
|
|
|
440 |
end_index = min((i + 1) * batch_size, len(imgs))
|
441 |
batch_image_list = imgs[start_index:end_index]
|
442 |
inputs = self.preprocess(batch_image_list)
|
443 |
+
logging.debug("preprocess")
|
444 |
for ins in inputs:
|
445 |
bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names})[0], ins, thr)
|
446 |
res.append(bb)
|
deepdoc/vision/seeit.py
CHANGED
@@ -11,10 +11,10 @@
|
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
|
|
|
14 |
import os
|
15 |
import PIL
|
16 |
from PIL import ImageDraw
|
17 |
-
from api.utils.log_utils import logger
|
18 |
|
19 |
|
20 |
def save_results(image_list, results, labels, output_dir='output/', threshold=0.5):
|
@@ -25,7 +25,7 @@ def save_results(image_list, results, labels, output_dir='output/', threshold=0.
|
|
25 |
|
26 |
out_path = os.path.join(output_dir, f"{idx}.jpg")
|
27 |
im.save(out_path, quality=95)
|
28 |
-
|
29 |
|
30 |
|
31 |
def draw_box(im, result, lables, threshold=0.5):
|
|
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
|
14 |
+
import logging
|
15 |
import os
|
16 |
import PIL
|
17 |
from PIL import ImageDraw
|
|
|
18 |
|
19 |
|
20 |
def save_results(image_list, results, labels, output_dir='output/', threshold=0.5):
|
|
|
25 |
|
26 |
out_path = os.path.join(output_dir, f"{idx}.jpg")
|
27 |
im.save(out_path, quality=95)
|
28 |
+
logging.debug("save result to: " + out_path)
|
29 |
|
30 |
|
31 |
def draw_box(im, result, lables, threshold=0.5):
|
deepdoc/vision/t_recognizer.py
CHANGED
@@ -10,9 +10,9 @@
|
|
10 |
# See the License for the specific language governing permissions and
|
11 |
# limitations under the License.
|
12 |
#
|
|
|
13 |
import os
|
14 |
import sys
|
15 |
-
from api.utils.log_utils import logger
|
16 |
|
17 |
sys.path.insert(
|
18 |
0,
|
@@ -59,7 +59,7 @@ def main(args):
|
|
59 |
} for t in lyt]
|
60 |
img = draw_box(images[i], lyt, labels, float(args.threshold))
|
61 |
img.save(outputs[i], quality=95)
|
62 |
-
|
63 |
|
64 |
|
65 |
def get_table_html(img, tb_cpns, ocr):
|
|
|
10 |
# See the License for the specific language governing permissions and
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
+
import logging
|
14 |
import os
|
15 |
import sys
|
|
|
16 |
|
17 |
sys.path.insert(
|
18 |
0,
|
|
|
59 |
} for t in lyt]
|
60 |
img = draw_box(images[i], lyt, labels, float(args.threshold))
|
61 |
img.save(outputs[i], quality=95)
|
62 |
+
logging.info("save result to: " + outputs[i])
|
63 |
|
64 |
|
65 |
def get_table_html(img, tb_cpns, ocr):
|
deepdoc/vision/table_structure_recognizer.py
CHANGED
@@ -38,7 +38,7 @@ class TableStructureRecognizer(Recognizer):
|
|
38 |
super().__init__(self.labels, "tsr", os.path.join(
|
39 |
get_project_base_directory(),
|
40 |
"rag/res/deepdoc"))
|
41 |
-
except Exception
|
42 |
super().__init__(self.labels, "tsr", snapshot_download(repo_id="InfiniFlow/deepdoc",
|
43 |
local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
|
44 |
local_dir_use_symlinks=False))
|
|
|
38 |
super().__init__(self.labels, "tsr", os.path.join(
|
39 |
get_project_base_directory(),
|
40 |
"rag/res/deepdoc"))
|
41 |
+
except Exception:
|
42 |
super().__init__(self.labels, "tsr", snapshot_download(repo_id="InfiniFlow/deepdoc",
|
43 |
local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
|
44 |
local_dir_use_symlinks=False))
|
graphrag/claim_extractor.py
CHANGED
@@ -5,6 +5,7 @@ Reference:
|
|
5 |
- [graphrag](https://github.com/microsoft/graphrag)
|
6 |
"""
|
7 |
|
|
|
8 |
import argparse
|
9 |
import json
|
10 |
import re
|
@@ -17,7 +18,6 @@ import tiktoken
|
|
17 |
from graphrag.claim_prompt import CLAIM_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT
|
18 |
from rag.llm.chat_model import Base as CompletionLLM
|
19 |
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
|
20 |
-
from api.utils.log_utils import logger
|
21 |
|
22 |
DEFAULT_TUPLE_DELIMITER = "<|>"
|
23 |
DEFAULT_RECORD_DELIMITER = "##"
|
@@ -126,7 +126,7 @@ class ClaimExtractor:
|
|
126 |
]
|
127 |
source_doc_map[document_id] = text
|
128 |
except Exception as e:
|
129 |
-
|
130 |
self._on_error(
|
131 |
e,
|
132 |
traceback.format_exc(),
|
@@ -265,4 +265,4 @@ if __name__ == "__main__":
|
|
265 |
"claim_description": ""
|
266 |
}
|
267 |
claim = ex(info)
|
268 |
-
|
|
|
5 |
- [graphrag](https://github.com/microsoft/graphrag)
|
6 |
"""
|
7 |
|
8 |
+
import logging
|
9 |
import argparse
|
10 |
import json
|
11 |
import re
|
|
|
18 |
from graphrag.claim_prompt import CLAIM_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT
|
19 |
from rag.llm.chat_model import Base as CompletionLLM
|
20 |
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
|
|
|
21 |
|
22 |
DEFAULT_TUPLE_DELIMITER = "<|>"
|
23 |
DEFAULT_RECORD_DELIMITER = "##"
|
|
|
126 |
]
|
127 |
source_doc_map[document_id] = text
|
128 |
except Exception as e:
|
129 |
+
logging.exception("error extracting claim")
|
130 |
self._on_error(
|
131 |
e,
|
132 |
traceback.format_exc(),
|
|
|
265 |
"claim_description": ""
|
266 |
}
|
267 |
claim = ex(info)
|
268 |
+
logging.info(json.dumps(claim.output, ensure_ascii=False, indent=2))
|
graphrag/community_reports_extractor.py
CHANGED
@@ -5,6 +5,7 @@ Reference:
|
|
5 |
- [graphrag](https://github.com/microsoft/graphrag)
|
6 |
"""
|
7 |
|
|
|
8 |
import json
|
9 |
import re
|
10 |
import traceback
|
@@ -19,7 +20,6 @@ from rag.llm.chat_model import Base as CompletionLLM
|
|
19 |
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, dict_has_keys_with_types
|
20 |
from rag.utils import num_tokens_from_string
|
21 |
from timeit import default_timer as timer
|
22 |
-
from api.utils.log_utils import logger
|
23 |
|
24 |
|
25 |
@dataclass
|
@@ -80,7 +80,7 @@ class CommunityReportsExtractor:
|
|
80 |
response = re.sub(r"[^\}]*$", "", response)
|
81 |
response = re.sub(r"\{\{", "{", response)
|
82 |
response = re.sub(r"\}\}", "}", response)
|
83 |
-
|
84 |
response = json.loads(response)
|
85 |
if not dict_has_keys_with_types(response, [
|
86 |
("title", str),
|
@@ -92,7 +92,7 @@ class CommunityReportsExtractor:
|
|
92 |
response["weight"] = weight
|
93 |
response["entities"] = ents
|
94 |
except Exception as e:
|
95 |
-
|
96 |
self._on_error(e, traceback.format_exc(), None)
|
97 |
continue
|
98 |
|
|
|
5 |
- [graphrag](https://github.com/microsoft/graphrag)
|
6 |
"""
|
7 |
|
8 |
+
import logging
|
9 |
import json
|
10 |
import re
|
11 |
import traceback
|
|
|
20 |
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, dict_has_keys_with_types
|
21 |
from rag.utils import num_tokens_from_string
|
22 |
from timeit import default_timer as timer
|
|
|
23 |
|
24 |
|
25 |
@dataclass
|
|
|
80 |
response = re.sub(r"[^\}]*$", "", response)
|
81 |
response = re.sub(r"\{\{", "{", response)
|
82 |
response = re.sub(r"\}\}", "}", response)
|
83 |
+
logging.debug(response)
|
84 |
response = json.loads(response)
|
85 |
if not dict_has_keys_with_types(response, [
|
86 |
("title", str),
|
|
|
92 |
response["weight"] = weight
|
93 |
response["entities"] = ents
|
94 |
except Exception as e:
|
95 |
+
logging.exception("CommunityReportsExtractor got exception")
|
96 |
self._on_error(e, traceback.format_exc(), None)
|
97 |
continue
|
98 |
|
graphrag/description_summary.py
CHANGED
@@ -5,19 +5,11 @@ Reference:
|
|
5 |
- [graphrag](https://github.com/microsoft/graphrag)
|
6 |
"""
|
7 |
|
8 |
-
import argparse
|
9 |
-
import html
|
10 |
import json
|
11 |
-
import logging
|
12 |
-
import numbers
|
13 |
-
import re
|
14 |
-
import traceback
|
15 |
-
from collections.abc import Callable
|
16 |
from dataclasses import dataclass
|
17 |
|
18 |
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
|
19 |
from rag.llm.chat_model import Base as CompletionLLM
|
20 |
-
import networkx as nx
|
21 |
|
22 |
from rag.utils import num_tokens_from_string
|
23 |
|
|
|
5 |
- [graphrag](https://github.com/microsoft/graphrag)
|
6 |
"""
|
7 |
|
|
|
|
|
8 |
import json
|
|
|
|
|
|
|
|
|
|
|
9 |
from dataclasses import dataclass
|
10 |
|
11 |
from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
|
12 |
from rag.llm.chat_model import Base as CompletionLLM
|
|
|
13 |
|
14 |
from rag.utils import num_tokens_from_string
|
15 |
|
graphrag/entity_resolution.py
CHANGED
@@ -13,8 +13,8 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
-
import itertools
|
17 |
import logging
|
|
|
18 |
import re
|
19 |
import traceback
|
20 |
from dataclasses import dataclass
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
import logging
|
17 |
+
import itertools
|
18 |
import re
|
19 |
import traceback
|
20 |
from dataclasses import dataclass
|
graphrag/index.py
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
|
|
16 |
import os
|
17 |
from concurrent.futures import ThreadPoolExecutor
|
18 |
import json
|
@@ -28,7 +29,6 @@ from graphrag.graph_extractor import GraphExtractor, DEFAULT_ENTITY_TYPES
|
|
28 |
from graphrag.mind_map_extractor import MindMapExtractor
|
29 |
from rag.nlp import rag_tokenizer
|
30 |
from rag.utils import num_tokens_from_string
|
31 |
-
from api.utils.log_utils import logger
|
32 |
|
33 |
|
34 |
def graph_merge(g1, g2):
|
@@ -95,7 +95,7 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, en
|
|
95 |
chunks = []
|
96 |
for n, attr in graph.nodes(data=True):
|
97 |
if attr.get("rank", 0) == 0:
|
98 |
-
|
99 |
continue
|
100 |
chunk = {
|
101 |
"name_kwd": n,
|
@@ -137,7 +137,7 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, en
|
|
137 |
mg = mindmap(_chunks).output
|
138 |
if not len(mg.keys()): return chunks
|
139 |
|
140 |
-
|
141 |
chunks.append(
|
142 |
{
|
143 |
"content_with_weight": json.dumps(mg, ensure_ascii=False, indent=2),
|
|
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
+
import logging
|
17 |
import os
|
18 |
from concurrent.futures import ThreadPoolExecutor
|
19 |
import json
|
|
|
29 |
from graphrag.mind_map_extractor import MindMapExtractor
|
30 |
from rag.nlp import rag_tokenizer
|
31 |
from rag.utils import num_tokens_from_string
|
|
|
32 |
|
33 |
|
34 |
def graph_merge(g1, g2):
|
|
|
95 |
chunks = []
|
96 |
for n, attr in graph.nodes(data=True):
|
97 |
if attr.get("rank", 0) == 0:
|
98 |
+
logging.debug(f"Ignore entity: {n}")
|
99 |
continue
|
100 |
chunk = {
|
101 |
"name_kwd": n,
|
|
|
137 |
mg = mindmap(_chunks).output
|
138 |
if not len(mg.keys()): return chunks
|
139 |
|
140 |
+
logging.debug(json.dumps(mg, ensure_ascii=False, indent=2))
|
141 |
chunks.append(
|
142 |
{
|
143 |
"content_with_weight": json.dumps(mg, ensure_ascii=False, indent=2),
|
graphrag/leiden.py
CHANGED
@@ -14,8 +14,6 @@ from graspologic.utils import largest_connected_component
|
|
14 |
import networkx as nx
|
15 |
from networkx import is_empty
|
16 |
|
17 |
-
log = logging.getLogger(__name__)
|
18 |
-
|
19 |
|
20 |
def _stabilize_graph(graph: nx.Graph) -> nx.Graph:
|
21 |
"""Ensure an undirected graph with the same relationships will always be read the same way."""
|
@@ -99,7 +97,7 @@ def run(graph: nx.Graph, args: dict[str, Any]) -> dict[int, dict[str, dict]]:
|
|
99 |
max_cluster_size = args.get("max_cluster_size", 12)
|
100 |
use_lcc = args.get("use_lcc", True)
|
101 |
if args.get("verbose", False):
|
102 |
-
|
103 |
"Running leiden with max_cluster_size=%s, lcc=%s", max_cluster_size, use_lcc
|
104 |
)
|
105 |
if not graph.nodes(): return {}
|
|
|
14 |
import networkx as nx
|
15 |
from networkx import is_empty
|
16 |
|
|
|
|
|
17 |
|
18 |
def _stabilize_graph(graph: nx.Graph) -> nx.Graph:
|
19 |
"""Ensure an undirected graph with the same relationships will always be read the same way."""
|
|
|
97 |
max_cluster_size = args.get("max_cluster_size", 12)
|
98 |
use_lcc = args.get("use_lcc", True)
|
99 |
if args.get("verbose", False):
|
100 |
+
logging.debug(
|
101 |
"Running leiden with max_cluster_size=%s, lcc=%s", max_cluster_size, use_lcc
|
102 |
)
|
103 |
if not graph.nodes(): return {}
|
graphrag/mind_map_extractor.py
CHANGED
@@ -14,8 +14,8 @@
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
|
17 |
-
import collections
|
18 |
import logging
|
|
|
19 |
import os
|
20 |
import re
|
21 |
import traceback
|
@@ -29,7 +29,6 @@ from rag.llm.chat_model import Base as CompletionLLM
|
|
29 |
import markdown_to_json
|
30 |
from functools import reduce
|
31 |
from rag.utils import num_tokens_from_string
|
32 |
-
from api.utils.log_utils import logger
|
33 |
|
34 |
|
35 |
@dataclass
|
@@ -193,6 +192,6 @@ class MindMapExtractor:
|
|
193 |
gen_conf = {"temperature": 0.5}
|
194 |
response = self._llm.chat(text, [{"role": "user", "content": "Output:"}], gen_conf)
|
195 |
response = re.sub(r"```[^\n]*", "", response)
|
196 |
-
|
197 |
-
|
198 |
return self._todict(markdown_to_json.dictify(response))
|
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
|
|
|
17 |
import logging
|
18 |
+
import collections
|
19 |
import os
|
20 |
import re
|
21 |
import traceback
|
|
|
29 |
import markdown_to_json
|
30 |
from functools import reduce
|
31 |
from rag.utils import num_tokens_from_string
|
|
|
32 |
|
33 |
|
34 |
@dataclass
|
|
|
192 |
gen_conf = {"temperature": 0.5}
|
193 |
response = self._llm.chat(text, [{"role": "user", "content": "Output:"}], gen_conf)
|
194 |
response = re.sub(r"```[^\n]*", "", response)
|
195 |
+
logging.debug(response)
|
196 |
+
logging.debug(self._todict(markdown_to_json.dictify(response)))
|
197 |
return self._todict(markdown_to_json.dictify(response))
|
intergrations/chatgpt-on-wechat/plugins/ragflow_chat.py
CHANGED
@@ -1,8 +1,8 @@
|
|
|
|
1 |
import requests
|
2 |
from bridge.context import ContextType # Import Context, ContextType
|
3 |
from bridge.reply import Reply, ReplyType # Import Reply, ReplyType
|
4 |
from bridge import *
|
5 |
-
from api.utils.log_utils import logger
|
6 |
from plugins import Plugin, register # Import Plugin and register
|
7 |
from plugins.event import Event, EventContext, EventAction # Import event-related classes
|
8 |
|
@@ -16,7 +16,7 @@ class RAGFlowChat(Plugin):
|
|
16 |
self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context
|
17 |
# Store conversation_id for each user
|
18 |
self.conversations = {}
|
19 |
-
|
20 |
|
21 |
def on_handle_context(self, e_context: EventContext):
|
22 |
context = e_context['context']
|
@@ -45,7 +45,7 @@ class RAGFlowChat(Plugin):
|
|
45 |
user_id = session_id # Use session_id as user_id
|
46 |
|
47 |
if not api_key or not host_address:
|
48 |
-
|
49 |
return "The plugin configuration is incomplete. Please check the configuration."
|
50 |
|
51 |
headers = {
|
@@ -63,20 +63,20 @@ class RAGFlowChat(Plugin):
|
|
63 |
}
|
64 |
try:
|
65 |
response = requests.get(url_new_conversation, headers=headers, params=params_new_conversation)
|
66 |
-
|
67 |
if response.status_code == 200:
|
68 |
data = response.json()
|
69 |
if data.get("code") == 0:
|
70 |
conversation_id = data["data"]["id"]
|
71 |
self.conversations[user_id] = conversation_id
|
72 |
else:
|
73 |
-
|
74 |
return f"Sorry, unable to create a conversation: {data.get('message')}"
|
75 |
else:
|
76 |
-
|
77 |
return f"Sorry, unable to connect to RAGFlow API (create conversation). HTTP status code: {response.status_code}"
|
78 |
except Exception as e:
|
79 |
-
|
80 |
return f"Sorry, an internal error occurred: {str(e)}"
|
81 |
|
82 |
# Step 2: Send the message and get a reply
|
@@ -95,18 +95,18 @@ class RAGFlowChat(Plugin):
|
|
95 |
|
96 |
try:
|
97 |
response = requests.post(url_completion, headers=headers, json=payload_completion)
|
98 |
-
|
99 |
if response.status_code == 200:
|
100 |
data = response.json()
|
101 |
if data.get("code") == 0:
|
102 |
answer = data["data"]["answer"]
|
103 |
return answer
|
104 |
else:
|
105 |
-
|
106 |
return f"Sorry, unable to get a reply: {data.get('message')}"
|
107 |
else:
|
108 |
-
|
109 |
return f"Sorry, unable to connect to RAGFlow API (get reply). HTTP status code: {response.status_code}"
|
110 |
except Exception as e:
|
111 |
-
|
112 |
return f"Sorry, an internal error occurred: {str(e)}"
|
|
|
1 |
+
import logging
|
2 |
import requests
|
3 |
from bridge.context import ContextType # Import Context, ContextType
|
4 |
from bridge.reply import Reply, ReplyType # Import Reply, ReplyType
|
5 |
from bridge import *
|
|
|
6 |
from plugins import Plugin, register # Import Plugin and register
|
7 |
from plugins.event import Event, EventContext, EventAction # Import event-related classes
|
8 |
|
|
|
16 |
self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context
|
17 |
# Store conversation_id for each user
|
18 |
self.conversations = {}
|
19 |
+
logging.info("[RAGFlowChat] Plugin initialized")
|
20 |
|
21 |
def on_handle_context(self, e_context: EventContext):
|
22 |
context = e_context['context']
|
|
|
45 |
user_id = session_id # Use session_id as user_id
|
46 |
|
47 |
if not api_key or not host_address:
|
48 |
+
logging.error("[RAGFlowChat] Missing configuration")
|
49 |
return "The plugin configuration is incomplete. Please check the configuration."
|
50 |
|
51 |
headers = {
|
|
|
63 |
}
|
64 |
try:
|
65 |
response = requests.get(url_new_conversation, headers=headers, params=params_new_conversation)
|
66 |
+
logging.debug(f"[RAGFlowChat] New conversation response: {response.text}")
|
67 |
if response.status_code == 200:
|
68 |
data = response.json()
|
69 |
if data.get("code") == 0:
|
70 |
conversation_id = data["data"]["id"]
|
71 |
self.conversations[user_id] = conversation_id
|
72 |
else:
|
73 |
+
logging.error(f"[RAGFlowChat] Failed to create conversation: {data.get('message')}")
|
74 |
return f"Sorry, unable to create a conversation: {data.get('message')}"
|
75 |
else:
|
76 |
+
logging.error(f"[RAGFlowChat] HTTP error when creating conversation: {response.status_code}")
|
77 |
return f"Sorry, unable to connect to RAGFlow API (create conversation). HTTP status code: {response.status_code}"
|
78 |
except Exception as e:
|
79 |
+
logging.exception("[RAGFlowChat] Exception when creating conversation")
|
80 |
return f"Sorry, an internal error occurred: {str(e)}"
|
81 |
|
82 |
# Step 2: Send the message and get a reply
|
|
|
95 |
|
96 |
try:
|
97 |
response = requests.post(url_completion, headers=headers, json=payload_completion)
|
98 |
+
logging.debug(f"[RAGFlowChat] Completion response: {response.text}")
|
99 |
if response.status_code == 200:
|
100 |
data = response.json()
|
101 |
if data.get("code") == 0:
|
102 |
answer = data["data"]["answer"]
|
103 |
return answer
|
104 |
else:
|
105 |
+
logging.error(f"[RAGFlowChat] Failed to get answer: {data.get('message')}")
|
106 |
return f"Sorry, unable to get a reply: {data.get('message')}"
|
107 |
else:
|
108 |
+
logging.error(f"[RAGFlowChat] HTTP error when getting answer: {response.status_code}")
|
109 |
return f"Sorry, unable to connect to RAGFlow API (get reply). HTTP status code: {response.status_code}"
|
110 |
except Exception as e:
|
111 |
+
logging.exception("[RAGFlowChat] Exception when getting answer")
|
112 |
return f"Sorry, an internal error occurred: {str(e)}"
|
rag/app/book.py
CHANGED
@@ -10,6 +10,7 @@
|
|
10 |
# See the License for the specific language governing permissions and
|
11 |
# limitations under the License.
|
12 |
#
|
|
|
13 |
from tika import parser
|
14 |
import re
|
15 |
from io import BytesIO
|
@@ -20,7 +21,6 @@ from rag.nlp import bullets_category, is_english,remove_contents_table, \
|
|
20 |
tokenize_chunks
|
21 |
from rag.nlp import rag_tokenizer
|
22 |
from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
|
23 |
-
from api.utils.log_utils import logger
|
24 |
|
25 |
|
26 |
class Pdf(PdfParser):
|
@@ -39,7 +39,7 @@ class Pdf(PdfParser):
|
|
39 |
start = timer()
|
40 |
self._layouts_rec(zoomin)
|
41 |
callback(0.67, "Layout analysis finished")
|
42 |
-
|
43 |
self._table_transformer_job(zoomin)
|
44 |
callback(0.68, "Table analysis finished")
|
45 |
self._text_merge()
|
|
|
10 |
# See the License for the specific language governing permissions and
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
+
import logging
|
14 |
from tika import parser
|
15 |
import re
|
16 |
from io import BytesIO
|
|
|
21 |
tokenize_chunks
|
22 |
from rag.nlp import rag_tokenizer
|
23 |
from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
|
|
|
24 |
|
25 |
|
26 |
class Pdf(PdfParser):
|
|
|
39 |
start = timer()
|
40 |
self._layouts_rec(zoomin)
|
41 |
callback(0.67, "Layout analysis finished")
|
42 |
+
logging.debug("layouts: {}".format(timer() - start))
|
43 |
self._table_transformer_job(zoomin)
|
44 |
callback(0.68, "Table analysis finished")
|
45 |
self._text_merge()
|
rag/app/email.py
CHANGED
@@ -11,6 +11,7 @@
|
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
|
|
|
14 |
from email import policy
|
15 |
from email.parser import BytesParser
|
16 |
from rag.app.naive import chunk as naive_chunk
|
@@ -18,7 +19,6 @@ import re
|
|
18 |
from rag.nlp import rag_tokenizer, naive_merge, tokenize_chunks
|
19 |
from deepdoc.parser import HtmlParser, TxtParser
|
20 |
from timeit import default_timer as timer
|
21 |
-
from api.utils.log_utils import logger
|
22 |
import io
|
23 |
|
24 |
|
@@ -86,7 +86,7 @@ def chunk(
|
|
86 |
)
|
87 |
|
88 |
main_res.extend(tokenize_chunks(chunks, doc, eng, None))
|
89 |
-
|
90 |
# get the attachment info
|
91 |
for part in msg.iter_attachments():
|
92 |
content_disposition = part.get("Content-Disposition")
|
|
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
|
14 |
+
import logging
|
15 |
from email import policy
|
16 |
from email.parser import BytesParser
|
17 |
from rag.app.naive import chunk as naive_chunk
|
|
|
19 |
from rag.nlp import rag_tokenizer, naive_merge, tokenize_chunks
|
20 |
from deepdoc.parser import HtmlParser, TxtParser
|
21 |
from timeit import default_timer as timer
|
|
|
22 |
import io
|
23 |
|
24 |
|
|
|
86 |
)
|
87 |
|
88 |
main_res.extend(tokenize_chunks(chunks, doc, eng, None))
|
89 |
+
logging.debug("naive_merge({}): {}".format(filename, timer() - st))
|
90 |
# get the attachment info
|
91 |
for part in msg.iter_attachments():
|
92 |
content_disposition = part.get("Content-Disposition")
|
rag/app/laws.py
CHANGED
@@ -21,7 +21,6 @@ from rag.nlp import bullets_category, remove_contents_table, hierarchical_merge,
|
|
21 |
make_colon_as_title, tokenize_chunks, docx_question_level
|
22 |
from rag.nlp import rag_tokenizer
|
23 |
from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
|
24 |
-
from api.utils.log_utils import logger
|
25 |
|
26 |
|
27 |
class Docx(DocxParser):
|
@@ -122,7 +121,7 @@ class Pdf(PdfParser):
|
|
122 |
start = timer()
|
123 |
self._layouts_rec(zoomin)
|
124 |
callback(0.67, "Layout analysis finished")
|
125 |
-
|
126 |
))
|
127 |
self._naive_vertical_merge()
|
128 |
|
|
|
21 |
make_colon_as_title, tokenize_chunks, docx_question_level
|
22 |
from rag.nlp import rag_tokenizer
|
23 |
from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
|
|
|
24 |
|
25 |
|
26 |
class Docx(DocxParser):
|
|
|
121 |
start = timer()
|
122 |
self._layouts_rec(zoomin)
|
123 |
callback(0.67, "Layout analysis finished")
|
124 |
+
logging.debug("layouts:".format(
|
125 |
))
|
126 |
self._naive_vertical_merge()
|
127 |
|