zhichyu commited on
Commit
22fe41e
·
1 Parent(s): 6afa2cc

Rework logging (#3358)

Browse files

Unified all log files into one.

### What problem does this PR solve?

Unified all log files into one.

### Type of change

- [x] Refactoring

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. agent/canvas.py +5 -7
  2. agent/component/arxiv.py +2 -3
  3. agent/component/baidu.py +2 -4
  4. agent/component/base.py +7 -7
  5. agent/component/bing.py +2 -3
  6. agent/component/categorize.py +3 -3
  7. agent/component/duckduckgo.py +2 -2
  8. agent/component/github.py +2 -2
  9. agent/component/google.py +3 -3
  10. agent/component/googlescholar.py +4 -4
  11. agent/component/keyword.py +2 -2
  12. agent/component/pubmed.py +2 -2
  13. agent/component/relevant.py +2 -1
  14. agent/component/retrieval.py +2 -1
  15. agent/component/rewrite.py +2 -1
  16. agent/component/wikipedia.py +2 -4
  17. agent/component/yahoofinance.py +3 -2
  18. agent/settings.py +0 -16
  19. api/apps/__init__.py +5 -10
  20. api/apps/canvas_app.py +2 -1
  21. api/apps/llm_app.py +2 -1
  22. api/apps/sdk/dataset.py +1 -1
  23. api/apps/user_app.py +7 -7
  24. api/db/db_models.py +8 -11
  25. api/db/db_utils.py +0 -6
  26. api/db/init_data.py +14 -16
  27. api/db/operatioins.py +0 -21
  28. api/db/services/dialog_service.py +10 -13
  29. api/db/services/document_service.py +4 -4
  30. api/db/services/file_service.py +5 -4
  31. api/db/services/llm_service.py +17 -17
  32. api/ragflow_server.py +14 -18
  33. api/settings.py +0 -17
  34. api/utils/api_utils.py +4 -3
  35. api/utils/log_utils.py +25 -287
  36. deepdoc/parser/pdf_parser.py +25 -24
  37. deepdoc/parser/resume/entities/corporations.py +9 -3
  38. deepdoc/parser/resume/step_two.py +20 -15
  39. deepdoc/vision/operators.py +2 -2
  40. deepdoc/vision/recognizer.py +2 -1
  41. deepdoc/vision/seeit.py +2 -1
  42. deepdoc/vision/t_recognizer.py +5 -2
  43. graphrag/claim_extractor.py +3 -4
  44. graphrag/community_reports_extractor.py +4 -7
  45. graphrag/index.py +3 -2
  46. graphrag/mind_map_extractor.py +3 -3
  47. intergrations/chatgpt-on-wechat/plugins/ragflow_chat.py +3 -3
  48. rag/app/book.py +2 -1
  49. rag/app/email.py +2 -2
  50. rag/app/laws.py +3 -3
agent/canvas.py CHANGED
@@ -14,14 +14,12 @@
14
  # limitations under the License.
15
  #
16
  import json
17
- import traceback
18
  from abc import ABC
19
  from copy import deepcopy
20
  from functools import partial
21
  from agent.component import component_class
22
  from agent.component.base import ComponentBase
23
- from agent.settings import flow_logger, DEBUG
24
-
25
 
26
  class Canvas(ABC):
27
  """
@@ -189,7 +187,7 @@ class Canvas(ABC):
189
  if cpn.component_name == "Answer":
190
  self.answer.append(c)
191
  else:
192
- if DEBUG: print("RUN: ", c)
193
  cpids = cpn.get_dependent_components()
194
  if any([c not in self.path[-1] for c in cpids]):
195
  continue
@@ -199,7 +197,7 @@ class Canvas(ABC):
199
 
200
  prepare2run(self.components[self.path[-2][-1]]["downstream"])
201
  while 0 <= ran < len(self.path[-1]):
202
- if DEBUG: print(ran, self.path)
203
  cpn_id = self.path[-1][ran]
204
  cpn = self.get_component(cpn_id)
205
  if not cpn["downstream"]: break
@@ -219,7 +217,7 @@ class Canvas(ABC):
219
  self.get_component(p)["obj"].set_exception(e)
220
  prepare2run([p])
221
  break
222
- traceback.print_exc()
223
  break
224
  continue
225
 
@@ -231,7 +229,7 @@ class Canvas(ABC):
231
  self.get_component(p)["obj"].set_exception(e)
232
  prepare2run([p])
233
  break
234
- traceback.print_exc()
235
  break
236
 
237
  if self.answer:
 
14
  # limitations under the License.
15
  #
16
  import json
 
17
  from abc import ABC
18
  from copy import deepcopy
19
  from functools import partial
20
  from agent.component import component_class
21
  from agent.component.base import ComponentBase
22
+ from api.utils.log_utils import logger
 
23
 
24
  class Canvas(ABC):
25
  """
 
187
  if cpn.component_name == "Answer":
188
  self.answer.append(c)
189
  else:
190
+ logger.debug(f"Canvas.prepare2run: {c}")
191
  cpids = cpn.get_dependent_components()
192
  if any([c not in self.path[-1] for c in cpids]):
193
  continue
 
197
 
198
  prepare2run(self.components[self.path[-2][-1]]["downstream"])
199
  while 0 <= ran < len(self.path[-1]):
200
+ logger.debug(f"Canvas.run: {ran} {self.path}")
201
  cpn_id = self.path[-1][ran]
202
  cpn = self.get_component(cpn_id)
203
  if not cpn["downstream"]: break
 
217
  self.get_component(p)["obj"].set_exception(e)
218
  prepare2run([p])
219
  break
220
+ logger.exception("Canvas.run got exception")
221
  break
222
  continue
223
 
 
229
  self.get_component(p)["obj"].set_exception(e)
230
  prepare2run([p])
231
  break
232
+ logger.exception("Canvas.run got exception")
233
  break
234
 
235
  if self.answer:
agent/component/arxiv.py CHANGED
@@ -16,9 +16,8 @@
16
  from abc import ABC
17
  import arxiv
18
  import pandas as pd
19
- from agent.settings import DEBUG
20
  from agent.component.base import ComponentBase, ComponentParamBase
21
-
22
 
23
  class ArXivParam(ComponentParamBase):
24
  """
@@ -65,5 +64,5 @@ class ArXiv(ComponentBase, ABC):
65
  return ArXiv.be_output("")
66
 
67
  df = pd.DataFrame(arxiv_res)
68
- if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
69
  return df
 
16
  from abc import ABC
17
  import arxiv
18
  import pandas as pd
 
19
  from agent.component.base import ComponentBase, ComponentParamBase
20
+ from api.utils.log_utils import logger
21
 
22
  class ArXivParam(ComponentParamBase):
23
  """
 
64
  return ArXiv.be_output("")
65
 
66
  df = pd.DataFrame(arxiv_res)
67
+ logger.debug(f"df: {str(df)}")
68
  return df
agent/component/baidu.py CHANGED
@@ -13,14 +13,12 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
- import random
17
  from abc import ABC
18
- from functools import partial
19
  import pandas as pd
20
  import requests
21
  import re
22
- from agent.settings import DEBUG
23
  from agent.component.base import ComponentBase, ComponentParamBase
 
24
 
25
 
26
  class BaiduParam(ComponentParamBase):
@@ -64,6 +62,6 @@ class Baidu(ComponentBase, ABC):
64
  return Baidu.be_output("")
65
 
66
  df = pd.DataFrame(baidu_res)
67
- if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
68
  return df
69
 
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
 
17
  import pandas as pd
18
  import requests
19
  import re
 
20
  from agent.component.base import ComponentBase, ComponentParamBase
21
+ from api.utils.log_utils import logger
22
 
23
 
24
  class BaiduParam(ComponentParamBase):
 
62
  return Baidu.be_output("")
63
 
64
  df = pd.DataFrame(baidu_res)
65
+ logger.debug(f"df: {str(df)}")
66
  return df
67
 
agent/component/base.py CHANGED
@@ -17,14 +17,14 @@ from abc import ABC
17
  import builtins
18
  import json
19
  import os
20
- from copy import deepcopy
21
  from functools import partial
22
- from typing import List, Dict, Tuple, Union
23
 
24
  import pandas as pd
25
 
26
  from agent import settings
27
- from agent.settings import flow_logger, DEBUG
 
28
 
29
  _FEEDED_DEPRECATED_PARAMS = "_feeded_deprecated_params"
30
  _DEPRECATED_PARAMS = "_deprecated_params"
@@ -361,13 +361,13 @@ class ComponentParamBase(ABC):
361
 
362
  def _warn_deprecated_param(self, param_name, descr):
363
  if self._deprecated_params_set.get(param_name):
364
- flow_logger.warning(
365
  f"{descr} {param_name} is deprecated and ignored in this version."
366
  )
367
 
368
  def _warn_to_deprecate_param(self, param_name, descr, new_param):
369
  if self._deprecated_params_set.get(param_name):
370
- flow_logger.warning(
371
  f"{descr} {param_name} will be deprecated in future release; "
372
  f"please use {new_param} instead."
373
  )
@@ -403,7 +403,7 @@ class ComponentBase(ABC):
403
  return cpnts
404
 
405
  def run(self, history, **kwargs):
406
- flow_logger.info("{}, history: {}, kwargs: {}".format(self, json.dumps(history, ensure_ascii=False),
407
  json.dumps(kwargs, ensure_ascii=False)))
408
  try:
409
  res = self._run(history, **kwargs)
@@ -463,7 +463,7 @@ class ComponentBase(ABC):
463
  reversed_cpnts.extend(self._canvas.path[-2])
464
  reversed_cpnts.extend(self._canvas.path[-1])
465
 
466
- if DEBUG: print(self.component_name, reversed_cpnts[::-1])
467
  for u in reversed_cpnts[::-1]:
468
  if self.get_component_name(u) in ["switch", "concentrator"]: continue
469
  if self.component_name.lower() == "generate" and self.get_component_name(u) == "retrieval":
 
17
  import builtins
18
  import json
19
  import os
 
20
  from functools import partial
21
+ from typing import Tuple, Union
22
 
23
  import pandas as pd
24
 
25
  from agent import settings
26
+ from api.utils.log_utils import logger
27
+
28
 
29
  _FEEDED_DEPRECATED_PARAMS = "_feeded_deprecated_params"
30
  _DEPRECATED_PARAMS = "_deprecated_params"
 
361
 
362
  def _warn_deprecated_param(self, param_name, descr):
363
  if self._deprecated_params_set.get(param_name):
364
+ logger.warning(
365
  f"{descr} {param_name} is deprecated and ignored in this version."
366
  )
367
 
368
  def _warn_to_deprecate_param(self, param_name, descr, new_param):
369
  if self._deprecated_params_set.get(param_name):
370
+ logger.warning(
371
  f"{descr} {param_name} will be deprecated in future release; "
372
  f"please use {new_param} instead."
373
  )
 
403
  return cpnts
404
 
405
  def run(self, history, **kwargs):
406
+ logger.info("{}, history: {}, kwargs: {}".format(self, json.dumps(history, ensure_ascii=False),
407
  json.dumps(kwargs, ensure_ascii=False)))
408
  try:
409
  res = self._run(history, **kwargs)
 
463
  reversed_cpnts.extend(self._canvas.path[-2])
464
  reversed_cpnts.extend(self._canvas.path[-1])
465
 
466
+ logger.debug(f"{self.component_name} {reversed_cpnts[::-1]}")
467
  for u in reversed_cpnts[::-1]:
468
  if self.get_component_name(u) in ["switch", "concentrator"]: continue
469
  if self.component_name.lower() == "generate" and self.get_component_name(u) == "retrieval":
agent/component/bing.py CHANGED
@@ -16,9 +16,8 @@
16
  from abc import ABC
17
  import requests
18
  import pandas as pd
19
- from agent.settings import DEBUG
20
  from agent.component.base import ComponentBase, ComponentParamBase
21
-
22
 
23
  class BingParam(ComponentParamBase):
24
  """
@@ -81,5 +80,5 @@ class Bing(ComponentBase, ABC):
81
  return Bing.be_output("")
82
 
83
  df = pd.DataFrame(bing_res)
84
- if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
85
  return df
 
16
  from abc import ABC
17
  import requests
18
  import pandas as pd
 
19
  from agent.component.base import ComponentBase, ComponentParamBase
20
+ from api.utils.log_utils import logger
21
 
22
  class BingParam(ComponentParamBase):
23
  """
 
80
  return Bing.be_output("")
81
 
82
  df = pd.DataFrame(bing_res)
83
+ logger.debug(f"df: {str(df)}")
84
  return df
agent/component/categorize.py CHANGED
@@ -17,7 +17,7 @@ from abc import ABC
17
  from api.db import LLMType
18
  from api.db.services.llm_service import LLMBundle
19
  from agent.component import GenerateParam, Generate
20
- from agent.settings import DEBUG
21
 
22
 
23
  class CategorizeParam(GenerateParam):
@@ -34,7 +34,7 @@ class CategorizeParam(GenerateParam):
34
  super().check()
35
  self.check_empty(self.category_description, "[Categorize] Category examples")
36
  for k, v in self.category_description.items():
37
- if not k: raise ValueError(f"[Categorize] Category name can not be empty!")
38
  if not v.get("to"): raise ValueError(f"[Categorize] 'To' of category {k} can not be empty!")
39
 
40
  def get_prompt(self):
@@ -77,7 +77,7 @@ class Categorize(Generate, ABC):
77
  chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
78
  ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": input}],
79
  self._param.gen_conf())
80
- if DEBUG: print(ans, ":::::::::::::::::::::::::::::::::", input)
81
  for c in self._param.category_description.keys():
82
  if ans.lower().find(c.lower()) >= 0:
83
  return Categorize.be_output(self._param.category_description[c]["to"])
 
17
  from api.db import LLMType
18
  from api.db.services.llm_service import LLMBundle
19
  from agent.component import GenerateParam, Generate
20
+ from api.utils.log_utils import logger
21
 
22
 
23
  class CategorizeParam(GenerateParam):
 
34
  super().check()
35
  self.check_empty(self.category_description, "[Categorize] Category examples")
36
  for k, v in self.category_description.items():
37
+ if not k: raise ValueError("[Categorize] Category name can not be empty!")
38
  if not v.get("to"): raise ValueError(f"[Categorize] 'To' of category {k} can not be empty!")
39
 
40
  def get_prompt(self):
 
77
  chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
78
  ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": input}],
79
  self._param.gen_conf())
80
+ logger.debug(f"input: {input}, answer: {str(ans)}")
81
  for c in self._param.category_description.keys():
82
  if ans.lower().find(c.lower()) >= 0:
83
  return Categorize.be_output(self._param.category_description[c]["to"])
agent/component/duckduckgo.py CHANGED
@@ -16,8 +16,8 @@
16
  from abc import ABC
17
  from duckduckgo_search import DDGS
18
  import pandas as pd
19
- from agent.settings import DEBUG
20
  from agent.component.base import ComponentBase, ComponentParamBase
 
21
 
22
 
23
  class DuckDuckGoParam(ComponentParamBase):
@@ -62,5 +62,5 @@ class DuckDuckGo(ComponentBase, ABC):
62
  return DuckDuckGo.be_output("")
63
 
64
  df = pd.DataFrame(duck_res)
65
- if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
66
  return df
 
16
  from abc import ABC
17
  from duckduckgo_search import DDGS
18
  import pandas as pd
 
19
  from agent.component.base import ComponentBase, ComponentParamBase
20
+ from api.utils.log_utils import logger
21
 
22
 
23
  class DuckDuckGoParam(ComponentParamBase):
 
62
  return DuckDuckGo.be_output("")
63
 
64
  df = pd.DataFrame(duck_res)
65
+ logger.debug("df: {df}")
66
  return df
agent/component/github.py CHANGED
@@ -16,8 +16,8 @@
16
  from abc import ABC
17
  import pandas as pd
18
  import requests
19
- from agent.settings import DEBUG
20
  from agent.component.base import ComponentBase, ComponentParamBase
 
21
 
22
 
23
  class GitHubParam(ComponentParamBase):
@@ -57,5 +57,5 @@ class GitHub(ComponentBase, ABC):
57
  return GitHub.be_output("")
58
 
59
  df = pd.DataFrame(github_res)
60
- if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
61
  return df
 
16
  from abc import ABC
17
  import pandas as pd
18
  import requests
 
19
  from agent.component.base import ComponentBase, ComponentParamBase
20
+ from api.utils.log_utils import logger
21
 
22
 
23
  class GitHubParam(ComponentParamBase):
 
57
  return GitHub.be_output("")
58
 
59
  df = pd.DataFrame(github_res)
60
+ logger.debug(f"df: {df}")
61
  return df
agent/component/google.py CHANGED
@@ -16,8 +16,8 @@
16
  from abc import ABC
17
  from serpapi import GoogleSearch
18
  import pandas as pd
19
- from agent.settings import DEBUG
20
  from agent.component.base import ComponentBase, ComponentParamBase
 
21
 
22
 
23
  class GoogleParam(ComponentParamBase):
@@ -85,12 +85,12 @@ class Google(ComponentBase, ABC):
85
  "hl": self._param.language, "num": self._param.top_n})
86
  google_res = [{"content": '<a href="' + i["link"] + '">' + i["title"] + '</a> ' + i["snippet"]} for i in
87
  client.get_dict()["organic_results"]]
88
- except Exception as e:
89
  return Google.be_output("**ERROR**: Existing Unavailable Parameters!")
90
 
91
  if not google_res:
92
  return Google.be_output("")
93
 
94
  df = pd.DataFrame(google_res)
95
- if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
96
  return df
 
16
  from abc import ABC
17
  from serpapi import GoogleSearch
18
  import pandas as pd
 
19
  from agent.component.base import ComponentBase, ComponentParamBase
20
+ from api.utils.log_utils import logger
21
 
22
 
23
  class GoogleParam(ComponentParamBase):
 
85
  "hl": self._param.language, "num": self._param.top_n})
86
  google_res = [{"content": '<a href="' + i["link"] + '">' + i["title"] + '</a> ' + i["snippet"]} for i in
87
  client.get_dict()["organic_results"]]
88
+ except Exception:
89
  return Google.be_output("**ERROR**: Existing Unavailable Parameters!")
90
 
91
  if not google_res:
92
  return Google.be_output("")
93
 
94
  df = pd.DataFrame(google_res)
95
+ logger.debug(f"df: {df}")
96
  return df
agent/component/googlescholar.py CHANGED
@@ -15,9 +15,9 @@
15
  #
16
  from abc import ABC
17
  import pandas as pd
18
- from agent.settings import DEBUG
19
  from agent.component.base import ComponentBase, ComponentParamBase
20
  from scholarly import scholarly
 
21
 
22
 
23
  class GoogleScholarParam(ComponentParamBase):
@@ -58,13 +58,13 @@ class GoogleScholar(ComponentBase, ABC):
58
  'pub_url'] + '"></a> ' + "\n author: " + ",".join(pub['bib']['author']) + '\n Abstract: ' + pub[
59
  'bib'].get('abstract', 'no abstract')})
60
 
61
- except StopIteration or Exception as e:
62
- print("**ERROR** " + str(e))
63
  break
64
 
65
  if not scholar_res:
66
  return GoogleScholar.be_output("")
67
 
68
  df = pd.DataFrame(scholar_res)
69
- if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
70
  return df
 
15
  #
16
  from abc import ABC
17
  import pandas as pd
 
18
  from agent.component.base import ComponentBase, ComponentParamBase
19
  from scholarly import scholarly
20
+ from api.utils.log_utils import logger
21
 
22
 
23
  class GoogleScholarParam(ComponentParamBase):
 
58
  'pub_url'] + '"></a> ' + "\n author: " + ",".join(pub['bib']['author']) + '\n Abstract: ' + pub[
59
  'bib'].get('abstract', 'no abstract')})
60
 
61
+ except StopIteration or Exception:
62
+ logger.exception("GoogleScholar")
63
  break
64
 
65
  if not scholar_res:
66
  return GoogleScholar.be_output("")
67
 
68
  df = pd.DataFrame(scholar_res)
69
+ logger.debug(f"df: {df}")
70
  return df
agent/component/keyword.py CHANGED
@@ -18,7 +18,7 @@ from abc import ABC
18
  from api.db import LLMType
19
  from api.db.services.llm_service import LLMBundle
20
  from agent.component import GenerateParam, Generate
21
- from agent.settings import DEBUG
22
 
23
 
24
  class KeywordExtractParam(GenerateParam):
@@ -58,5 +58,5 @@ class KeywordExtract(Generate, ABC):
58
  self._param.gen_conf())
59
 
60
  ans = re.sub(r".*keyword:", "", ans).strip()
61
- if DEBUG: print(ans, ":::::::::::::::::::::::::::::::::")
62
  return KeywordExtract.be_output(ans)
 
18
  from api.db import LLMType
19
  from api.db.services.llm_service import LLMBundle
20
  from agent.component import GenerateParam, Generate
21
+ from api.utils.log_utils import logger
22
 
23
 
24
  class KeywordExtractParam(GenerateParam):
 
58
  self._param.gen_conf())
59
 
60
  ans = re.sub(r".*keyword:", "", ans).strip()
61
+ logger.info(f"ans: {ans}")
62
  return KeywordExtract.be_output(ans)
agent/component/pubmed.py CHANGED
@@ -18,8 +18,8 @@ from Bio import Entrez
18
  import re
19
  import pandas as pd
20
  import xml.etree.ElementTree as ET
21
- from agent.settings import DEBUG
22
  from agent.component.base import ComponentBase, ComponentParamBase
 
23
 
24
 
25
  class PubMedParam(ComponentParamBase):
@@ -65,5 +65,5 @@ class PubMed(ComponentBase, ABC):
65
  return PubMed.be_output("")
66
 
67
  df = pd.DataFrame(pubmed_res)
68
- if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
69
  return df
 
18
  import re
19
  import pandas as pd
20
  import xml.etree.ElementTree as ET
 
21
  from agent.component.base import ComponentBase, ComponentParamBase
22
+ from api.utils.log_utils import logger
23
 
24
 
25
  class PubMedParam(ComponentParamBase):
 
65
  return PubMed.be_output("")
66
 
67
  df = pd.DataFrame(pubmed_res)
68
+ logger.debug(f"df: {df}")
69
  return df
agent/component/relevant.py CHANGED
@@ -18,6 +18,7 @@ from api.db import LLMType
18
  from api.db.services.llm_service import LLMBundle
19
  from agent.component import GenerateParam, Generate
20
  from rag.utils import num_tokens_from_string, encoder
 
21
 
22
 
23
  class RelevantParam(GenerateParam):
@@ -70,7 +71,7 @@ class Relevant(Generate, ABC):
70
  ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": ans}],
71
  self._param.gen_conf())
72
 
73
- print(ans, ":::::::::::::::::::::::::::::::::")
74
  if ans.lower().find("yes") >= 0:
75
  return Relevant.be_output(self._param.yes)
76
  if ans.lower().find("no") >= 0:
 
18
  from api.db.services.llm_service import LLMBundle
19
  from agent.component import GenerateParam, Generate
20
  from rag.utils import num_tokens_from_string, encoder
21
+ from api.utils.log_utils import logger
22
 
23
 
24
  class RelevantParam(GenerateParam):
 
71
  ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": ans}],
72
  self._param.gen_conf())
73
 
74
+ logger.info(ans)
75
  if ans.lower().find("yes") >= 0:
76
  return Relevant.be_output(self._param.yes)
77
  if ans.lower().find("no") >= 0:
agent/component/retrieval.py CHANGED
@@ -22,6 +22,7 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
22
  from api.db.services.llm_service import LLMBundle
23
  from api.settings import retrievaler
24
  from agent.component.base import ComponentBase, ComponentParamBase
 
25
 
26
 
27
  class RetrievalParam(ComponentParamBase):
@@ -80,7 +81,7 @@ class Retrieval(ComponentBase, ABC):
80
  df = pd.DataFrame(kbinfos["chunks"])
81
  df["content"] = df["content_with_weight"]
82
  del df["content_with_weight"]
83
- print(">>>>>>>>>>>>>>>>>>>>>>>>>>\n", query, df)
84
  return df
85
 
86
 
 
22
  from api.db.services.llm_service import LLMBundle
23
  from api.settings import retrievaler
24
  from agent.component.base import ComponentBase, ComponentParamBase
25
+ from api.utils.log_utils import logger
26
 
27
 
28
  class RetrievalParam(ComponentParamBase):
 
81
  df = pd.DataFrame(kbinfos["chunks"])
82
  df["content"] = df["content_with_weight"]
83
  del df["content_with_weight"]
84
+ logger.debug("{} {}".format(query, df))
85
  return df
86
 
87
 
agent/component/rewrite.py CHANGED
@@ -17,6 +17,7 @@ from abc import ABC
17
  from api.db import LLMType
18
  from api.db.services.llm_service import LLMBundle
19
  from agent.component import GenerateParam, Generate
 
20
 
21
 
22
  class RewriteQuestionParam(GenerateParam):
@@ -104,7 +105,7 @@ class RewriteQuestion(Generate, ABC):
104
  self._canvas.history.pop()
105
  self._canvas.history.append(("user", ans))
106
 
107
- print(ans, ":::::::::::::::::::::::::::::::::")
108
  return RewriteQuestion.be_output(ans)
109
 
110
 
 
17
  from api.db import LLMType
18
  from api.db.services.llm_service import LLMBundle
19
  from agent.component import GenerateParam, Generate
20
+ from api.utils.log_utils import logger
21
 
22
 
23
  class RewriteQuestionParam(GenerateParam):
 
105
  self._canvas.history.pop()
106
  self._canvas.history.append(("user", ans))
107
 
108
+ logger.info(ans)
109
  return RewriteQuestion.be_output(ans)
110
 
111
 
agent/component/wikipedia.py CHANGED
@@ -13,13 +13,11 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
- import random
17
  from abc import ABC
18
- from functools import partial
19
  import wikipedia
20
  import pandas as pd
21
- from agent.settings import DEBUG
22
  from agent.component.base import ComponentBase, ComponentParamBase
 
23
 
24
 
25
  class WikipediaParam(ComponentParamBase):
@@ -65,5 +63,5 @@ class Wikipedia(ComponentBase, ABC):
65
  return Wikipedia.be_output("")
66
 
67
  df = pd.DataFrame(wiki_res)
68
- if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
69
  return df
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
 
17
  import wikipedia
18
  import pandas as pd
 
19
  from agent.component.base import ComponentBase, ComponentParamBase
20
+ from api.utils.log_utils import logger
21
 
22
 
23
  class WikipediaParam(ComponentParamBase):
 
63
  return Wikipedia.be_output("")
64
 
65
  df = pd.DataFrame(wiki_res)
66
+ logger.debug(f"df: {df}")
67
  return df
agent/component/yahoofinance.py CHANGED
@@ -17,6 +17,7 @@ from abc import ABC
17
  import pandas as pd
18
  from agent.component.base import ComponentBase, ComponentParamBase
19
  import yfinance as yf
 
20
 
21
 
22
  class YahooFinanceParam(ComponentParamBase):
@@ -74,8 +75,8 @@ class YahooFinance(ComponentBase, ABC):
74
  {"content": "quarterly cash flow statement:\n" + msft.quarterly_cashflow.to_markdown() + "\n"})
75
  if self._param.news:
76
  yohoo_res.append({"content": "news:\n" + pd.DataFrame(msft.news).to_markdown() + "\n"})
77
- except Exception as e:
78
- print("**ERROR** " + str(e))
79
 
80
  if not yohoo_res:
81
  return YahooFinance.be_output("")
 
17
  import pandas as pd
18
  from agent.component.base import ComponentBase, ComponentParamBase
19
  import yfinance as yf
20
+ from api.utils.log_utils import logger
21
 
22
 
23
  class YahooFinanceParam(ComponentParamBase):
 
75
  {"content": "quarterly cash flow statement:\n" + msft.quarterly_cashflow.to_markdown() + "\n"})
76
  if self._param.news:
77
  yohoo_res.append({"content": "news:\n" + pd.DataFrame(msft.news).to_markdown() + "\n"})
78
+ except Exception:
79
+ logger.exception("YahooFinance got exception")
80
 
81
  if not yohoo_res:
82
  return YahooFinance.be_output("")
agent/settings.py CHANGED
@@ -13,22 +13,6 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
- # Logger
17
- import os
18
 
19
- from api.utils.file_utils import get_project_base_directory
20
- from api.utils.log_utils import LoggerFactory, getLogger
21
-
22
- DEBUG = 0
23
- LoggerFactory.set_directory(
24
- os.path.join(
25
- get_project_base_directory(),
26
- "logs",
27
- "flow"))
28
- # {CRITICAL: 50, FATAL:50, ERROR:40, WARNING:30, WARN:30, INFO:20, DEBUG:10, NOTSET:0}
29
- LoggerFactory.LEVEL = 30
30
-
31
- flow_logger = getLogger("flow")
32
- database_logger = getLogger("database")
33
  FLOAT_ZERO = 1e-8
34
  PARAM_MAXDEPTH = 5
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
 
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  FLOAT_ZERO = 1e-8
18
  PARAM_MAXDEPTH = 5
api/apps/__init__.py CHANGED
@@ -13,7 +13,6 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
- import logging
17
  import os
18
  import sys
19
  from importlib.util import module_from_spec, spec_from_file_location
@@ -30,18 +29,14 @@ from api.utils import CustomJSONEncoder, commands
30
 
31
  from flask_session import Session
32
  from flask_login import LoginManager
33
- from api.settings import SECRET_KEY, stat_logger
34
- from api.settings import API_VERSION, access_logger
35
  from api.utils.api_utils import server_error_response
 
36
  from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
37
 
38
  __all__ = ["app"]
39
 
40
-
41
- logger = logging.getLogger("flask.app")
42
- for h in access_logger.handlers:
43
- logger.addHandler(h)
44
-
45
  Request.json = property(lambda self: self.get_json(force=True, silent=True))
46
 
47
  app = Flask(__name__)
@@ -158,8 +153,8 @@ def load_user(web_request):
158
  return user[0]
159
  else:
160
  return None
161
- except Exception as e:
162
- stat_logger.exception(e)
163
  return None
164
  else:
165
  return None
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  import os
17
  import sys
18
  from importlib.util import module_from_spec, spec_from_file_location
 
29
 
30
  from flask_session import Session
31
  from flask_login import LoginManager
32
+ from api.settings import SECRET_KEY
33
+ from api.settings import API_VERSION
34
  from api.utils.api_utils import server_error_response
35
+ from api.utils.log_utils import logger
36
  from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
37
 
38
  __all__ = ["app"]
39
 
 
 
 
 
 
40
  Request.json = property(lambda self: self.get_json(force=True, silent=True))
41
 
42
  app = Flask(__name__)
 
153
  return user[0]
154
  else:
155
  return None
156
+ except Exception:
157
+ logger.exception("load_user got exception")
158
  return None
159
  else:
160
  return None
api/apps/canvas_app.py CHANGED
@@ -23,6 +23,7 @@ from api.utils import get_uuid
23
  from api.utils.api_utils import get_json_result, server_error_response, validate_request, get_data_error_result
24
  from agent.canvas import Canvas
25
  from peewee import MySQLDatabase, PostgresqlDatabase
 
26
 
27
 
28
  @manager.route('/templates', methods=['GET'])
@@ -114,7 +115,7 @@ def run():
114
  pass
115
  canvas.add_user_input(req["message"])
116
  answer = canvas.run(stream=stream)
117
- print(canvas)
118
  except Exception as e:
119
  return server_error_response(e)
120
 
 
23
  from api.utils.api_utils import get_json_result, server_error_response, validate_request, get_data_error_result
24
  from agent.canvas import Canvas
25
  from peewee import MySQLDatabase, PostgresqlDatabase
26
+ from api.utils.log_utils import logger
27
 
28
 
29
  @manager.route('/templates', methods=['GET'])
 
115
  pass
116
  canvas.add_user_input(req["message"])
117
  answer = canvas.run(stream=stream)
118
+ logger.info(canvas)
119
  except Exception as e:
120
  return server_error_response(e)
121
 
api/apps/llm_app.py CHANGED
@@ -25,6 +25,7 @@ from api.db.db_models import TenantLLM
25
  from api.utils.api_utils import get_json_result
26
  from rag.llm import EmbeddingModel, ChatModel, RerankModel, CvModel, TTSModel
27
  import requests
 
28
 
29
 
30
  @manager.route('/factories', methods=['GET'])
@@ -89,7 +90,7 @@ def set_api_key():
89
  if len(arr) == 0 or tc == 0:
90
  raise Exception("Fail")
91
  rerank_passed = True
92
- print(f'passed model rerank{llm.llm_name}',flush=True)
93
  except Exception as e:
94
  msg += f"\nFail to access model({llm.llm_name}) using this api key." + str(
95
  e)
 
25
  from api.utils.api_utils import get_json_result
26
  from rag.llm import EmbeddingModel, ChatModel, RerankModel, CvModel, TTSModel
27
  import requests
28
+ from api.utils.log_utils import logger
29
 
30
 
31
  @manager.route('/factories', methods=['GET'])
 
90
  if len(arr) == 0 or tc == 0:
91
  raise Exception("Fail")
92
  rerank_passed = True
93
+ logger.info(f'passed model rerank {llm.llm_name}')
94
  except Exception as e:
95
  msg += f"\nFail to access model({llm.llm_name}) using this api key." + str(
96
  e)
api/apps/sdk/dataset.py CHANGED
@@ -526,4 +526,4 @@ def list(tenant_id):
526
  new_key = key_mapping.get(key, key)
527
  renamed_data[new_key] = value
528
  renamed_list.append(renamed_data)
529
- return get_result(data=renamed_list)
 
526
  new_key = key_mapping.get(key, key)
527
  renamed_data[new_key] = value
528
  renamed_list.append(renamed_data)
529
+ return get_result(data=renamed_list)
api/apps/user_app.py CHANGED
@@ -53,8 +53,8 @@ from api.settings import (
53
  )
54
  from api.db.services.user_service import UserService, TenantService, UserTenantService
55
  from api.db.services.file_service import FileService
56
- from api.settings import stat_logger
57
  from api.utils.api_utils import get_json_result, construct_response
 
58
 
59
 
60
  @manager.route("/login", methods=["POST", "GET"])
@@ -177,7 +177,7 @@ def github_callback():
177
  try:
178
  avatar = download_img(user_info["avatar_url"])
179
  except Exception as e:
180
- stat_logger.exception(e)
181
  avatar = ""
182
  users = user_register(
183
  user_id,
@@ -202,7 +202,7 @@ def github_callback():
202
  return redirect("/?auth=%s" % user.get_id())
203
  except Exception as e:
204
  rollback_user_registration(user_id)
205
- stat_logger.exception(e)
206
  return redirect("/?error=%s" % str(e))
207
 
208
  # User has already registered, try to log in
@@ -279,7 +279,7 @@ def feishu_callback():
279
  try:
280
  avatar = download_img(user_info["avatar_url"])
281
  except Exception as e:
282
- stat_logger.exception(e)
283
  avatar = ""
284
  users = user_register(
285
  user_id,
@@ -304,7 +304,7 @@ def feishu_callback():
304
  return redirect("/?auth=%s" % user.get_id())
305
  except Exception as e:
306
  rollback_user_registration(user_id)
307
- stat_logger.exception(e)
308
  return redirect("/?error=%s" % str(e))
309
 
310
  # User has already registered, try to log in
@@ -436,7 +436,7 @@ def setting_user():
436
  UserService.update_by_id(current_user.id, update_dict)
437
  return get_json_result(data=True)
438
  except Exception as e:
439
- stat_logger.exception(e)
440
  return get_json_result(
441
  data=False, message="Update failure!", code=RetCode.EXCEPTION_ERROR
442
  )
@@ -621,7 +621,7 @@ def user_add():
621
  )
622
  except Exception as e:
623
  rollback_user_registration(user_id)
624
- stat_logger.exception(e)
625
  return get_json_result(
626
  data=False,
627
  message=f"User registration failure, error: {str(e)}",
 
53
  )
54
  from api.db.services.user_service import UserService, TenantService, UserTenantService
55
  from api.db.services.file_service import FileService
 
56
  from api.utils.api_utils import get_json_result, construct_response
57
+ from api.utils.log_utils import logger
58
 
59
 
60
  @manager.route("/login", methods=["POST", "GET"])
 
177
  try:
178
  avatar = download_img(user_info["avatar_url"])
179
  except Exception as e:
180
+ logger.exception(e)
181
  avatar = ""
182
  users = user_register(
183
  user_id,
 
202
  return redirect("/?auth=%s" % user.get_id())
203
  except Exception as e:
204
  rollback_user_registration(user_id)
205
+ logger.exception(e)
206
  return redirect("/?error=%s" % str(e))
207
 
208
  # User has already registered, try to log in
 
279
  try:
280
  avatar = download_img(user_info["avatar_url"])
281
  except Exception as e:
282
+ logger.exception(e)
283
  avatar = ""
284
  users = user_register(
285
  user_id,
 
304
  return redirect("/?auth=%s" % user.get_id())
305
  except Exception as e:
306
  rollback_user_registration(user_id)
307
+ logger.exception(e)
308
  return redirect("/?error=%s" % str(e))
309
 
310
  # User has already registered, try to log in
 
436
  UserService.update_by_id(current_user.id, update_dict)
437
  return get_json_result(data=True)
438
  except Exception as e:
439
+ logger.exception(e)
440
  return get_json_result(
441
  data=False, message="Update failure!", code=RetCode.EXCEPTION_ERROR
442
  )
 
621
  )
622
  except Exception as e:
623
  rollback_user_registration(user_id)
624
+ logger.exception(e)
625
  return get_json_result(
626
  data=False,
627
  message=f"User registration failure, error: {str(e)}",
api/db/db_models.py CHANGED
@@ -30,12 +30,9 @@ from peewee import (
30
  )
31
  from playhouse.pool import PooledMySQLDatabase, PooledPostgresqlDatabase
32
  from api.db import SerializedType, ParserType
33
- from api.settings import DATABASE, stat_logger, SECRET_KEY, DATABASE_TYPE
34
- from api.utils.log_utils import getLogger
35
  from api import utils
36
-
37
- LOGGER = getLogger()
38
-
39
 
40
  def singleton(cls, *args, **kw):
41
  instances = {}
@@ -288,7 +285,7 @@ class BaseDataBase:
288
  database_config = DATABASE.copy()
289
  db_name = database_config.pop("name")
290
  self.database_connection = PooledDatabase[DATABASE_TYPE.upper()].value(db_name, **database_config)
291
- stat_logger.info('init database on cluster mode successfully')
292
 
293
  class PostgresDatabaseLock:
294
  def __init__(self, lock_name, timeout=10, db=None):
@@ -396,7 +393,7 @@ def close_connection():
396
  if DB:
397
  DB.close_stale(age=30)
398
  except Exception as e:
399
- LOGGER.exception(e)
400
 
401
 
402
  class DataBaseModel(BaseModel):
@@ -412,15 +409,15 @@ def init_database_tables(alter_fields=[]):
412
  for name, obj in members:
413
  if obj != DataBaseModel and issubclass(obj, DataBaseModel):
414
  table_objs.append(obj)
415
- LOGGER.info(f"start create table {obj.__name__}")
416
  try:
417
  obj.create_table()
418
- LOGGER.info(f"create table success: {obj.__name__}")
419
  except Exception as e:
420
- LOGGER.exception(e)
421
  create_failed_list.append(obj.__name__)
422
  if create_failed_list:
423
- LOGGER.info(f"create tables failed: {create_failed_list}")
424
  raise Exception(f"create tables failed: {create_failed_list}")
425
  migrate_db()
426
 
 
30
  )
31
  from playhouse.pool import PooledMySQLDatabase, PooledPostgresqlDatabase
32
  from api.db import SerializedType, ParserType
33
+ from api.settings import DATABASE, SECRET_KEY, DATABASE_TYPE
 
34
  from api import utils
35
+ from api.utils.log_utils import logger
 
 
36
 
37
  def singleton(cls, *args, **kw):
38
  instances = {}
 
285
  database_config = DATABASE.copy()
286
  db_name = database_config.pop("name")
287
  self.database_connection = PooledDatabase[DATABASE_TYPE.upper()].value(db_name, **database_config)
288
+ logger.info('init database on cluster mode successfully')
289
 
290
  class PostgresDatabaseLock:
291
  def __init__(self, lock_name, timeout=10, db=None):
 
393
  if DB:
394
  DB.close_stale(age=30)
395
  except Exception as e:
396
+ logger.exception(e)
397
 
398
 
399
  class DataBaseModel(BaseModel):
 
409
  for name, obj in members:
410
  if obj != DataBaseModel and issubclass(obj, DataBaseModel):
411
  table_objs.append(obj)
412
+ logger.info(f"start create table {obj.__name__}")
413
  try:
414
  obj.create_table()
415
+ logger.info(f"create table success: {obj.__name__}")
416
  except Exception as e:
417
+ logger.exception(e)
418
  create_failed_list.append(obj.__name__)
419
  if create_failed_list:
420
+ logger.info(f"create tables failed: {create_failed_list}")
421
  raise Exception(f"create tables failed: {create_failed_list}")
422
  migrate_db()
423
 
api/db/db_utils.py CHANGED
@@ -22,12 +22,6 @@ from playhouse.pool import PooledMySQLDatabase
22
  from api.utils import current_timestamp, timestamp_to_date
23
 
24
  from api.db.db_models import DB, DataBaseModel
25
- from api.db.runtime_config import RuntimeConfig
26
- from api.utils.log_utils import getLogger
27
- from enum import Enum
28
-
29
-
30
- LOGGER = getLogger()
31
 
32
 
33
  @DB.connection_context()
 
22
  from api.utils import current_timestamp, timestamp_to_date
23
 
24
  from api.db.db_models import DB, DataBaseModel
 
 
 
 
 
 
25
 
26
 
27
  @DB.connection_context()
api/db/init_data.py CHANGED
@@ -30,6 +30,7 @@ from api.db.services.llm_service import LLMFactoriesService, LLMService, TenantL
30
  from api.db.services.user_service import TenantService, UserTenantService
31
  from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY, LLM_BASE_URL
32
  from api.utils.file_utils import get_project_base_directory
 
33
 
34
 
35
  def encode_to_base64(input_string):
@@ -69,36 +70,34 @@ def init_superuser():
69
  "api_key": API_KEY, "api_base": LLM_BASE_URL})
70
 
71
  if not UserService.save(**user_info):
72
- print("\033[93m【ERROR】\033[0mcan't init admin.")
73
  return
74
  TenantService.insert(**tenant)
75
  UserTenantService.insert(**usr_tenant)
76
  TenantLLMService.insert_many(tenant_llm)
77
- print(
78
- "【INFO】Super user initialized. \033[93memail: [email protected], password: admin\033[0m. Changing the password after logining is strongly recomanded.")
79
 
80
  chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
81
  msg = chat_mdl.chat(system="", history=[
82
  {"role": "user", "content": "Hello!"}], gen_conf={})
83
  if msg.find("ERROR: ") == 0:
84
- print(
85
- "\33[91m【ERROR】\33[0m: ",
86
  "'{}' dosen't work. {}".format(
87
  tenant["llm_id"],
88
  msg))
89
  embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
90
  v, c = embd_mdl.encode(["Hello!"])
91
  if c == 0:
92
- print(
93
- "\33[91m【ERROR】\33[0m:",
94
- " '{}' dosen't work!".format(
95
  tenant["embd_id"]))
96
 
97
 
98
  def init_llm_factory():
99
  try:
100
  LLMService.filter_delete([(LLM.fid == "MiniMax" or LLM.fid == "Minimax")])
101
- except Exception as e:
102
  pass
103
 
104
  factory_llm_infos = json.load(
@@ -111,14 +110,14 @@ def init_llm_factory():
111
  llm_infos = factory_llm_info.pop("llm")
112
  try:
113
  LLMFactoriesService.save(**factory_llm_info)
114
- except Exception as e:
115
  pass
116
  LLMService.filter_delete([LLM.fid == factory_llm_info["name"]])
117
  for llm_info in llm_infos:
118
  llm_info["fid"] = factory_llm_info["name"]
119
  try:
120
  LLMService.save(**llm_info)
121
- except Exception as e:
122
  pass
123
 
124
  LLMFactoriesService.filter_delete([LLMFactories.name == "Local"])
@@ -145,7 +144,7 @@ def init_llm_factory():
145
  row = deepcopy(row)
146
  row["llm_name"] = "text-embedding-3-large"
147
  TenantLLMService.save(**row)
148
- except Exception as e:
149
  pass
150
  break
151
  for kb_id in KnowledgebaseService.get_all_ids():
@@ -169,9 +168,8 @@ def add_graph_templates():
169
  CanvasTemplateService.save(**cnvs)
170
  except:
171
  CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
172
- except Exception as e:
173
- print("Add graph templates error: ", e)
174
- print("------------", flush=True)
175
 
176
 
177
  def init_web_data():
@@ -182,7 +180,7 @@ def init_web_data():
182
  # init_superuser()
183
 
184
  add_graph_templates()
185
- print("init web data success:{}".format(time.time() - start_time))
186
 
187
 
188
  if __name__ == '__main__':
 
30
  from api.db.services.user_service import TenantService, UserTenantService
31
  from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY, LLM_BASE_URL
32
  from api.utils.file_utils import get_project_base_directory
33
+ from api.utils.log_utils import logger
34
 
35
 
36
  def encode_to_base64(input_string):
 
70
  "api_key": API_KEY, "api_base": LLM_BASE_URL})
71
 
72
  if not UserService.save(**user_info):
73
+ logger.info("can't init admin.")
74
  return
75
  TenantService.insert(**tenant)
76
  UserTenantService.insert(**usr_tenant)
77
  TenantLLMService.insert_many(tenant_llm)
78
+ logger.info(
79
+ "Super user initialized. email: [email protected], password: admin. Changing the password after logining is strongly recomanded.")
80
 
81
  chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
82
  msg = chat_mdl.chat(system="", history=[
83
  {"role": "user", "content": "Hello!"}], gen_conf={})
84
  if msg.find("ERROR: ") == 0:
85
+ logger.error(
 
86
  "'{}' dosen't work. {}".format(
87
  tenant["llm_id"],
88
  msg))
89
  embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
90
  v, c = embd_mdl.encode(["Hello!"])
91
  if c == 0:
92
+ logger.error(
93
+ "'{}' dosen't work!".format(
 
94
  tenant["embd_id"]))
95
 
96
 
97
  def init_llm_factory():
98
  try:
99
  LLMService.filter_delete([(LLM.fid == "MiniMax" or LLM.fid == "Minimax")])
100
+ except Exception:
101
  pass
102
 
103
  factory_llm_infos = json.load(
 
110
  llm_infos = factory_llm_info.pop("llm")
111
  try:
112
  LLMFactoriesService.save(**factory_llm_info)
113
+ except Exception:
114
  pass
115
  LLMService.filter_delete([LLM.fid == factory_llm_info["name"]])
116
  for llm_info in llm_infos:
117
  llm_info["fid"] = factory_llm_info["name"]
118
  try:
119
  LLMService.save(**llm_info)
120
+ except Exception:
121
  pass
122
 
123
  LLMFactoriesService.filter_delete([LLMFactories.name == "Local"])
 
144
  row = deepcopy(row)
145
  row["llm_name"] = "text-embedding-3-large"
146
  TenantLLMService.save(**row)
147
+ except Exception:
148
  pass
149
  break
150
  for kb_id in KnowledgebaseService.get_all_ids():
 
168
  CanvasTemplateService.save(**cnvs)
169
  except:
170
  CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
171
+ except Exception:
172
+ logger.exception("Add graph templates error: ")
 
173
 
174
 
175
  def init_web_data():
 
180
  # init_superuser()
181
 
182
  add_graph_templates()
183
+ logger.info("init web data success:{}".format(time.time() - start_time))
184
 
185
 
186
  if __name__ == '__main__':
api/db/operatioins.py DELETED
@@ -1,21 +0,0 @@
1
- #
2
- # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- #
16
-
17
- import operator
18
- import time
19
- import typing
20
- from api.utils.log_utils import sql_logger
21
- import peewee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
api/db/services/dialog_service.py CHANGED
@@ -26,11 +26,12 @@ from api.db.db_models import Dialog, Conversation,DB
26
  from api.db.services.common_service import CommonService
27
  from api.db.services.knowledgebase_service import KnowledgebaseService
28
  from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle
29
- from api.settings import chat_logger, retrievaler, kg_retrievaler
30
  from rag.app.resume import forbidden_select_fields4resume
31
  from rag.nlp.search import index_name
32
  from rag.utils import rmSpace, num_tokens_from_string, encoder
33
  from api.utils.file_utils import get_project_base_directory
 
34
 
35
 
36
  class DialogService(CommonService):
@@ -177,7 +178,7 @@ def chat(dialog, messages, stream=True, **kwargs):
177
  tts_mdl = LLMBundle(dialog.tenant_id, LLMType.TTS)
178
  # try to use sql if field mapping is good to go
179
  if field_map:
180
- chat_logger.info("Use SQL to retrieval:{}".format(questions[-1]))
181
  ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True))
182
  if ans:
183
  yield ans
@@ -219,7 +220,7 @@ def chat(dialog, messages, stream=True, **kwargs):
219
  doc_ids=attachments,
220
  top=dialog.top_k, aggs=False, rerank_mdl=rerank_mdl)
221
  knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]]
222
- chat_logger.info(
223
  "{}->{}".format(" ".join(questions), "\n->".join(knowledges)))
224
  retrieval_tm = timer()
225
 
@@ -291,7 +292,7 @@ def chat(dialog, messages, stream=True, **kwargs):
291
  yield decorate_answer(answer)
292
  else:
293
  answer = chat_mdl.chat(prompt, msg[1:], gen_conf)
294
- chat_logger.info("User: {}|Assistant: {}".format(
295
  msg[-1]["content"], answer))
296
  res = decorate_answer(answer)
297
  res["audio_binary"] = tts(tts_mdl, answer)
@@ -319,8 +320,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
319
  nonlocal sys_prompt, user_promt, question, tried_times
320
  sql = chat_mdl.chat(sys_prompt, [{"role": "user", "content": user_promt}], {
321
  "temperature": 0.06})
322
- print(user_promt, sql)
323
- chat_logger.info(f"“{question}”==>{user_promt} get SQL: {sql}")
324
  sql = re.sub(r"[\r\n]+", " ", sql.lower())
325
  sql = re.sub(r".*select ", "select ", sql.lower())
326
  sql = re.sub(r" +", " ", sql)
@@ -340,9 +340,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
340
  flds.append(k)
341
  sql = "select doc_id,docnm_kwd," + ",".join(flds) + sql[8:]
342
 
343
- print(f"{question} get SQL(refined): {sql}")
344
-
345
- chat_logger.info(f"“{question}” get SQL(refined): {sql}")
346
  tried_times += 1
347
  return retrievaler.sql_retrieval(sql, format="json"), sql
348
 
@@ -371,10 +369,9 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
371
  question, sql, tbl["error"]
372
  )
373
  tbl, sql = get_table()
374
- chat_logger.info("TRY it again: {}".format(sql))
375
 
376
- chat_logger.info("GET table: {}".format(tbl))
377
- print(tbl)
378
  if tbl.get("error") or len(tbl["rows"]) == 0:
379
  return None
380
 
@@ -404,7 +401,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
404
  rows = re.sub(r"T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+Z)?\|", "|", rows)
405
 
406
  if not docid_idx or not docnm_idx:
407
- chat_logger.warning("SQL missing field: " + sql)
408
  return {
409
  "answer": "\n".join([clmns, line, rows]),
410
  "reference": {"chunks": [], "doc_aggs": []},
 
26
  from api.db.services.common_service import CommonService
27
  from api.db.services.knowledgebase_service import KnowledgebaseService
28
  from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle
29
+ from api.settings import retrievaler, kg_retrievaler
30
  from rag.app.resume import forbidden_select_fields4resume
31
  from rag.nlp.search import index_name
32
  from rag.utils import rmSpace, num_tokens_from_string, encoder
33
  from api.utils.file_utils import get_project_base_directory
34
+ from api.utils.log_utils import logger
35
 
36
 
37
  class DialogService(CommonService):
 
178
  tts_mdl = LLMBundle(dialog.tenant_id, LLMType.TTS)
179
  # try to use sql if field mapping is good to go
180
  if field_map:
181
+ logger.info("Use SQL to retrieval:{}".format(questions[-1]))
182
  ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True))
183
  if ans:
184
  yield ans
 
220
  doc_ids=attachments,
221
  top=dialog.top_k, aggs=False, rerank_mdl=rerank_mdl)
222
  knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]]
223
+ logger.info(
224
  "{}->{}".format(" ".join(questions), "\n->".join(knowledges)))
225
  retrieval_tm = timer()
226
 
 
292
  yield decorate_answer(answer)
293
  else:
294
  answer = chat_mdl.chat(prompt, msg[1:], gen_conf)
295
+ logger.info("User: {}|Assistant: {}".format(
296
  msg[-1]["content"], answer))
297
  res = decorate_answer(answer)
298
  res["audio_binary"] = tts(tts_mdl, answer)
 
320
  nonlocal sys_prompt, user_promt, question, tried_times
321
  sql = chat_mdl.chat(sys_prompt, [{"role": "user", "content": user_promt}], {
322
  "temperature": 0.06})
323
+ logger.info(f"{question} ==> {user_promt} get SQL: {sql}")
 
324
  sql = re.sub(r"[\r\n]+", " ", sql.lower())
325
  sql = re.sub(r".*select ", "select ", sql.lower())
326
  sql = re.sub(r" +", " ", sql)
 
340
  flds.append(k)
341
  sql = "select doc_id,docnm_kwd," + ",".join(flds) + sql[8:]
342
 
343
+ logger.info(f"{question} get SQL(refined): {sql}")
 
 
344
  tried_times += 1
345
  return retrievaler.sql_retrieval(sql, format="json"), sql
346
 
 
369
  question, sql, tbl["error"]
370
  )
371
  tbl, sql = get_table()
372
+ logger.info("TRY it again: {}".format(sql))
373
 
374
+ logger.info("GET table: {}".format(tbl))
 
375
  if tbl.get("error") or len(tbl["rows"]) == 0:
376
  return None
377
 
 
401
  rows = re.sub(r"T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+Z)?\|", "|", rows)
402
 
403
  if not docid_idx or not docnm_idx:
404
+ logger.warning("SQL missing field: " + sql)
405
  return {
406
  "answer": "\n".join([clmns, line, rows]),
407
  "reference": {"chunks": [], "doc_aggs": []},
api/db/services/document_service.py CHANGED
@@ -17,7 +17,6 @@ import hashlib
17
  import json
18
  import random
19
  import re
20
- import traceback
21
  from concurrent.futures import ThreadPoolExecutor
22
  from copy import deepcopy
23
  from datetime import datetime
@@ -26,7 +25,7 @@ from io import BytesIO
26
  from peewee import fn
27
 
28
  from api.db.db_utils import bulk_insert_into_db
29
- from api.settings import stat_logger, docStoreConn
30
  from api.utils import current_timestamp, get_format_time, get_uuid
31
  from graphrag.mind_map_extractor import MindMapExtractor
32
  from rag.settings import SVR_QUEUE_NAME
@@ -40,6 +39,7 @@ from api.db.services.common_service import CommonService
40
  from api.db.services.knowledgebase_service import KnowledgebaseService
41
  from api.db import StatusEnum
42
  from rag.utils.redis_conn import REDIS_CONN
 
43
 
44
 
45
  class DocumentService(CommonService):
@@ -387,7 +387,7 @@ class DocumentService(CommonService):
387
  cls.update_by_id(d["id"], info)
388
  except Exception as e:
389
  if str(e).find("'0'") < 0:
390
- stat_logger.error("fetch task exception:" + str(e))
391
 
392
  @classmethod
393
  @DB.connection_context()
@@ -544,7 +544,7 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
544
  "knowledge_graph_kwd": "mind_map"
545
  })
546
  except Exception as e:
547
- stat_logger.error("Mind map generation error:", traceback.format_exc())
548
 
549
  vects = embedding(doc_id, [c["content_with_weight"] for c in cks])
550
  assert len(cks) == len(vects)
 
17
  import json
18
  import random
19
  import re
 
20
  from concurrent.futures import ThreadPoolExecutor
21
  from copy import deepcopy
22
  from datetime import datetime
 
25
  from peewee import fn
26
 
27
  from api.db.db_utils import bulk_insert_into_db
28
+ from api.settings import docStoreConn
29
  from api.utils import current_timestamp, get_format_time, get_uuid
30
  from graphrag.mind_map_extractor import MindMapExtractor
31
  from rag.settings import SVR_QUEUE_NAME
 
39
  from api.db.services.knowledgebase_service import KnowledgebaseService
40
  from api.db import StatusEnum
41
  from rag.utils.redis_conn import REDIS_CONN
42
+ from api.utils.log_utils import logger
43
 
44
 
45
  class DocumentService(CommonService):
 
387
  cls.update_by_id(d["id"], info)
388
  except Exception as e:
389
  if str(e).find("'0'") < 0:
390
+ logger.exception("fetch task exception")
391
 
392
  @classmethod
393
  @DB.connection_context()
 
544
  "knowledge_graph_kwd": "mind_map"
545
  })
546
  except Exception as e:
547
+ logger.exception("Mind map generation error")
548
 
549
  vects = embedding(doc_id, [c["content_with_weight"] for c in cks])
550
  assert len(cks) == len(vects)
api/db/services/file_service.py CHANGED
@@ -28,6 +28,7 @@ from api.db.services.file2document_service import File2DocumentService
28
  from api.utils import get_uuid
29
  from api.utils.file_utils import filename_type, thumbnail_img
30
  from rag.utils.storage_factory import STORAGE_IMPL
 
31
 
32
 
33
  class FileService(CommonService):
@@ -272,8 +273,8 @@ class FileService(CommonService):
272
  cls.delete_folder_by_pf_id(user_id, file.id)
273
  return cls.model.delete().where((cls.model.tenant_id == user_id)
274
  & (cls.model.id == folder_id)).execute(),
275
- except Exception as e:
276
- print(e)
277
  raise RuntimeError("Database error (File retrieval)!")
278
 
279
  @classmethod
@@ -321,8 +322,8 @@ class FileService(CommonService):
321
  def move_file(cls, file_ids, folder_id):
322
  try:
323
  cls.filter_update((cls.model.id << file_ids, ), { 'parent_id': folder_id })
324
- except Exception as e:
325
- print(e)
326
  raise RuntimeError("Database error (File move)!")
327
 
328
  @classmethod
 
28
  from api.utils import get_uuid
29
  from api.utils.file_utils import filename_type, thumbnail_img
30
  from rag.utils.storage_factory import STORAGE_IMPL
31
+ from api.utils.log_utils import logger
32
 
33
 
34
  class FileService(CommonService):
 
273
  cls.delete_folder_by_pf_id(user_id, file.id)
274
  return cls.model.delete().where((cls.model.tenant_id == user_id)
275
  & (cls.model.id == folder_id)).execute(),
276
+ except Exception:
277
+ logger.exception("delete_folder_by_pf_id")
278
  raise RuntimeError("Database error (File retrieval)!")
279
 
280
  @classmethod
 
322
  def move_file(cls, file_ids, folder_id):
323
  try:
324
  cls.filter_update((cls.model.id << file_ids, ), { 'parent_id': folder_id })
325
+ except Exception:
326
+ logger.exception("move_file")
327
  raise RuntimeError("Database error (File move)!")
328
 
329
  @classmethod
api/db/services/llm_service.py CHANGED
@@ -14,12 +14,12 @@
14
  # limitations under the License.
15
  #
16
  from api.db.services.user_service import TenantService
17
- from api.settings import database_logger
18
  from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel, TTSModel
19
  from api.db import LLMType
20
  from api.db.db_models import DB
21
  from api.db.db_models import LLMFactories, LLM, TenantLLM
22
  from api.db.services.common_service import CommonService
 
23
 
24
 
25
  class LLMFactoriesService(CommonService):
@@ -209,40 +209,40 @@ class LLMBundle(object):
209
  emd, used_tokens = self.mdl.encode(texts, batch_size)
210
  if not TenantLLMService.increase_usage(
211
  self.tenant_id, self.llm_type, used_tokens):
212
- database_logger.error(
213
- "Can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
214
  return emd, used_tokens
215
 
216
  def encode_queries(self, query: str):
217
  emd, used_tokens = self.mdl.encode_queries(query)
218
  if not TenantLLMService.increase_usage(
219
  self.tenant_id, self.llm_type, used_tokens):
220
- database_logger.error(
221
- "Can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
222
  return emd, used_tokens
223
 
224
  def similarity(self, query: str, texts: list):
225
  sim, used_tokens = self.mdl.similarity(query, texts)
226
  if not TenantLLMService.increase_usage(
227
  self.tenant_id, self.llm_type, used_tokens):
228
- database_logger.error(
229
- "Can't update token usage for {}/RERANK used_tokens: {}".format(self.tenant_id, used_tokens))
230
  return sim, used_tokens
231
 
232
  def describe(self, image, max_tokens=300):
233
  txt, used_tokens = self.mdl.describe(image, max_tokens)
234
  if not TenantLLMService.increase_usage(
235
  self.tenant_id, self.llm_type, used_tokens):
236
- database_logger.error(
237
- "Can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
238
  return txt
239
 
240
  def transcription(self, audio):
241
  txt, used_tokens = self.mdl.transcription(audio)
242
  if not TenantLLMService.increase_usage(
243
  self.tenant_id, self.llm_type, used_tokens):
244
- database_logger.error(
245
- "Can't update token usage for {}/SEQUENCE2TXT used_tokens: {}".format(self.tenant_id, used_tokens))
246
  return txt
247
 
248
  def tts(self, text):
@@ -250,8 +250,8 @@ class LLMBundle(object):
250
  if isinstance(chunk,int):
251
  if not TenantLLMService.increase_usage(
252
  self.tenant_id, self.llm_type, chunk, self.llm_name):
253
- database_logger.error(
254
- "Can't update token usage for {}/TTS".format(self.tenant_id))
255
  return
256
  yield chunk
257
 
@@ -259,8 +259,8 @@ class LLMBundle(object):
259
  txt, used_tokens = self.mdl.chat(system, history, gen_conf)
260
  if isinstance(txt, int) and not TenantLLMService.increase_usage(
261
  self.tenant_id, self.llm_type, used_tokens, self.llm_name):
262
- database_logger.error(
263
- "Can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, used_tokens))
264
  return txt
265
 
266
  def chat_streamly(self, system, history, gen_conf):
@@ -268,7 +268,7 @@ class LLMBundle(object):
268
  if isinstance(txt, int):
269
  if not TenantLLMService.increase_usage(
270
  self.tenant_id, self.llm_type, txt, self.llm_name):
271
- database_logger.error(
272
- "Can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name, txt))
273
  return
274
  yield txt
 
14
  # limitations under the License.
15
  #
16
  from api.db.services.user_service import TenantService
 
17
  from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel, TTSModel
18
  from api.db import LLMType
19
  from api.db.db_models import DB
20
  from api.db.db_models import LLMFactories, LLM, TenantLLM
21
  from api.db.services.common_service import CommonService
22
+ from api.utils.log_utils import logger
23
 
24
 
25
  class LLMFactoriesService(CommonService):
 
209
  emd, used_tokens = self.mdl.encode(texts, batch_size)
210
  if not TenantLLMService.increase_usage(
211
  self.tenant_id, self.llm_type, used_tokens):
212
+ logger.error(
213
+ "LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
214
  return emd, used_tokens
215
 
216
  def encode_queries(self, query: str):
217
  emd, used_tokens = self.mdl.encode_queries(query)
218
  if not TenantLLMService.increase_usage(
219
  self.tenant_id, self.llm_type, used_tokens):
220
+ logger.error(
221
+ "LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
222
  return emd, used_tokens
223
 
224
  def similarity(self, query: str, texts: list):
225
  sim, used_tokens = self.mdl.similarity(query, texts)
226
  if not TenantLLMService.increase_usage(
227
  self.tenant_id, self.llm_type, used_tokens):
228
+ logger.error(
229
+ "LLMBundle.similarity can't update token usage for {}/RERANK used_tokens: {}".format(self.tenant_id, used_tokens))
230
  return sim, used_tokens
231
 
232
  def describe(self, image, max_tokens=300):
233
  txt, used_tokens = self.mdl.describe(image, max_tokens)
234
  if not TenantLLMService.increase_usage(
235
  self.tenant_id, self.llm_type, used_tokens):
236
+ logger.error(
237
+ "LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
238
  return txt
239
 
240
  def transcription(self, audio):
241
  txt, used_tokens = self.mdl.transcription(audio)
242
  if not TenantLLMService.increase_usage(
243
  self.tenant_id, self.llm_type, used_tokens):
244
+ logger.error(
245
+ "LLMBundle.transcription can't update token usage for {}/SEQUENCE2TXT used_tokens: {}".format(self.tenant_id, used_tokens))
246
  return txt
247
 
248
  def tts(self, text):
 
250
  if isinstance(chunk,int):
251
  if not TenantLLMService.increase_usage(
252
  self.tenant_id, self.llm_type, chunk, self.llm_name):
253
+ logger.error(
254
+ "LLMBundle.tts can't update token usage for {}/TTS".format(self.tenant_id))
255
  return
256
  yield chunk
257
 
 
259
  txt, used_tokens = self.mdl.chat(system, history, gen_conf)
260
  if isinstance(txt, int) and not TenantLLMService.increase_usage(
261
  self.tenant_id, self.llm_type, used_tokens, self.llm_name):
262
+ logger.error(
263
+ "LLMBundle.chat can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, used_tokens))
264
  return txt
265
 
266
  def chat_streamly(self, system, history, gen_conf):
 
268
  if isinstance(txt, int):
269
  if not TenantLLMService.increase_usage(
270
  self.tenant_id, self.llm_type, txt, self.llm_name):
271
+ logger.error(
272
+ "LLMBundle.chat_streamly can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name, txt))
273
  return
274
  yield txt
api/ragflow_server.py CHANGED
@@ -27,13 +27,10 @@ from api.apps import app
27
  from api.db.runtime_config import RuntimeConfig
28
  from api.db.services.document_service import DocumentService
29
  from api.settings import (
30
- HOST,
31
- HTTP_PORT,
32
- access_logger,
33
- database_logger,
34
- stat_logger,
35
  )
36
  from api import utils
 
37
 
38
  from api.db.db_models import init_database_tables as init_web_db
39
  from api.db.init_data import init_web_data
@@ -45,23 +42,22 @@ def update_progress():
45
  time.sleep(3)
46
  try:
47
  DocumentService.update_progress()
48
- except Exception as e:
49
- stat_logger.error("update_progress exception:" + str(e))
50
 
51
 
52
- if __name__ == "__main__":
53
- print(
54
- r"""
55
  ____ ___ ______ ______ __
56
  / __ \ / | / ____// ____// /____ _ __
57
  / /_/ // /| | / / __ / /_ / // __ \| | /| / /
58
  / _, _// ___ |/ /_/ // __/ / // /_/ /| |/ |/ /
59
  /_/ |_|/_/ |_|\____//_/ /_/ \____/ |__/|__/
60
 
61
- """,
62
- flush=True,
 
63
  )
64
- stat_logger.info(f"project base: {utils.file_utils.get_project_base_directory()}")
65
 
66
  # init db
67
  init_web_db()
@@ -83,7 +79,7 @@ if __name__ == "__main__":
83
 
84
  RuntimeConfig.DEBUG = args.debug
85
  if RuntimeConfig.DEBUG:
86
- stat_logger.info("run on debug mode")
87
 
88
  RuntimeConfig.init_env()
89
  RuntimeConfig.init_config(JOB_SERVER_HOST=HOST, HTTP_PORT=HTTP_PORT)
@@ -91,17 +87,17 @@ if __name__ == "__main__":
91
  peewee_logger = logging.getLogger("peewee")
92
  peewee_logger.propagate = False
93
  # rag_arch.common.log.ROpenHandler
94
- peewee_logger.addHandler(database_logger.handlers[0])
95
- peewee_logger.setLevel(database_logger.level)
96
 
97
  thr = ThreadPoolExecutor(max_workers=1)
98
  thr.submit(update_progress)
99
 
100
  # start http server
101
  try:
102
- stat_logger.info("RAG Flow http server start...")
103
  werkzeug_logger = logging.getLogger("werkzeug")
104
- for h in access_logger.handlers:
105
  werkzeug_logger.addHandler(h)
106
  run_simple(
107
  hostname=HOST,
 
27
  from api.db.runtime_config import RuntimeConfig
28
  from api.db.services.document_service import DocumentService
29
  from api.settings import (
30
+ HOST, HTTP_PORT
 
 
 
 
31
  )
32
  from api import utils
33
+ from api.utils.log_utils import logger
34
 
35
  from api.db.db_models import init_database_tables as init_web_db
36
  from api.db.init_data import init_web_data
 
42
  time.sleep(3)
43
  try:
44
  DocumentService.update_progress()
45
+ except Exception:
46
+ logger.exception("update_progress exception")
47
 
48
 
49
+ if __name__ == '__main__':
50
+ logger.info(r"""
 
51
  ____ ___ ______ ______ __
52
  / __ \ / | / ____// ____// /____ _ __
53
  / /_/ // /| | / / __ / /_ / // __ \| | /| / /
54
  / _, _// ___ |/ /_/ // __/ / // /_/ /| |/ |/ /
55
  /_/ |_|/_/ |_|\____//_/ /_/ \____/ |__/|__/
56
 
57
+ """)
58
+ logger.info(
59
+ f'project base: {utils.file_utils.get_project_base_directory()}'
60
  )
 
61
 
62
  # init db
63
  init_web_db()
 
79
 
80
  RuntimeConfig.DEBUG = args.debug
81
  if RuntimeConfig.DEBUG:
82
+ logger.info("run on debug mode")
83
 
84
  RuntimeConfig.init_env()
85
  RuntimeConfig.init_config(JOB_SERVER_HOST=HOST, HTTP_PORT=HTTP_PORT)
 
87
  peewee_logger = logging.getLogger("peewee")
88
  peewee_logger.propagate = False
89
  # rag_arch.common.log.ROpenHandler
90
+ peewee_logger.addHandler(logger.handlers[0])
91
+ peewee_logger.setLevel(logger.handlers[0].level)
92
 
93
  thr = ThreadPoolExecutor(max_workers=1)
94
  thr.submit(update_progress)
95
 
96
  # start http server
97
  try:
98
+ logger.info("RAG Flow http server start...")
99
  werkzeug_logger = logging.getLogger("werkzeug")
100
+ for h in logger.handlers:
101
  werkzeug_logger.addHandler(h)
102
  run_simple(
103
  hostname=HOST,
api/settings.py CHANGED
@@ -17,24 +17,9 @@ import os
17
  from datetime import date
18
  from enum import IntEnum, Enum
19
  from api.utils.file_utils import get_project_base_directory
20
- from api.utils.log_utils import LoggerFactory, getLogger
21
  import rag.utils.es_conn
22
  import rag.utils.infinity_conn
23
 
24
- # Logger
25
- LoggerFactory.set_directory(
26
- os.path.join(
27
- get_project_base_directory(),
28
- "logs",
29
- "api"))
30
- # {CRITICAL: 50, FATAL:50, ERROR:40, WARNING:30, WARN:30, INFO:20, DEBUG:10, NOTSET:0}
31
- LoggerFactory.LEVEL = 30
32
-
33
- stat_logger = getLogger("stat")
34
- access_logger = getLogger("access")
35
- database_logger = getLogger("database")
36
- chat_logger = getLogger("chat")
37
-
38
  import rag.utils
39
  from rag.nlp import search
40
  from graphrag import search as kg_search
@@ -47,8 +32,6 @@ TEMP_DIRECTORY = os.path.join(get_project_base_directory(), "temp")
47
  RAG_FLOW_CONF_PATH = os.path.join(get_project_base_directory(), "conf")
48
  LIGHTEN = int(os.environ.get('LIGHTEN', "0"))
49
 
50
- SUBPROCESS_STD_LOG_NAME = "std.log"
51
-
52
  ERROR_REPORT = True
53
  ERROR_REPORT_WITH_PATH = False
54
 
 
17
  from datetime import date
18
  from enum import IntEnum, Enum
19
  from api.utils.file_utils import get_project_base_directory
 
20
  import rag.utils.es_conn
21
  import rag.utils.infinity_conn
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  import rag.utils
24
  from rag.nlp import search
25
  from graphrag import search as kg_search
 
32
  RAG_FLOW_CONF_PATH = os.path.join(get_project_base_directory(), "conf")
33
  LIGHTEN = int(os.environ.get('LIGHTEN', "0"))
34
 
 
 
35
  ERROR_REPORT = True
36
  ERROR_REPORT_WITH_PATH = False
37
 
api/utils/api_utils.py CHANGED
@@ -35,11 +35,12 @@ from werkzeug.http import HTTP_STATUS_CODES
35
  from api.db.db_models import APIToken
36
  from api.settings import (
37
  REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC,
38
- stat_logger, CLIENT_AUTHENTICATION, HTTP_APP_KEY, SECRET_KEY
39
  )
40
  from api.settings import RetCode
41
  from api.utils import CustomJSONEncoder, get_uuid
42
  from api.utils import json_dumps
 
43
 
44
  requests.models.complexjson.dumps = functools.partial(
45
  json.dumps, cls=CustomJSONEncoder)
@@ -117,7 +118,7 @@ def get_data_error_result(code=RetCode.DATA_ERROR,
117
 
118
 
119
  def server_error_response(e):
120
- stat_logger.exception(e)
121
  try:
122
  if e.code == 401:
123
  return get_json_result(code=401, message=repr(e))
@@ -258,7 +259,7 @@ def construct_json_result(code=RetCode.SUCCESS, message='success', data=None):
258
 
259
 
260
  def construct_error_response(e):
261
- stat_logger.exception(e)
262
  try:
263
  if e.code == 401:
264
  return construct_json_result(code=RetCode.UNAUTHORIZED, message=repr(e))
 
35
  from api.db.db_models import APIToken
36
  from api.settings import (
37
  REQUEST_MAX_WAIT_SEC, REQUEST_WAIT_SEC,
38
+ CLIENT_AUTHENTICATION, HTTP_APP_KEY, SECRET_KEY
39
  )
40
  from api.settings import RetCode
41
  from api.utils import CustomJSONEncoder, get_uuid
42
  from api.utils import json_dumps
43
+ from api.utils.log_utils import logger
44
 
45
  requests.models.complexjson.dumps = functools.partial(
46
  json.dumps, cls=CustomJSONEncoder)
 
118
 
119
 
120
  def server_error_response(e):
121
+ logger.exception(e)
122
  try:
123
  if e.code == 401:
124
  return get_json_result(code=401, message=repr(e))
 
259
 
260
 
261
  def construct_error_response(e):
262
+ logger.exception(e)
263
  try:
264
  if e.code == 401:
265
  return construct_json_result(code=RetCode.UNAUTHORIZED, message=repr(e))
api/utils/log_utils.py CHANGED
@@ -14,300 +14,38 @@
14
  # limitations under the License.
15
  #
16
  import os
17
- import typing
18
- import traceback
19
  import logging
20
- import inspect
21
- from logging.handlers import TimedRotatingFileHandler
22
- from threading import RLock
23
 
24
- from api.utils import file_utils
25
 
 
 
 
 
26
 
27
- class LoggerFactory(object):
28
- TYPE = "FILE"
29
- LOG_FORMAT = "[%(levelname)s] [%(asctime)s] [%(module)s.%(funcName)s] [line:%(lineno)d]: %(message)s"
30
- logging.basicConfig(format=LOG_FORMAT)
31
- LEVEL = logging.DEBUG
32
- logger_dict = {}
33
- global_handler_dict = {}
34
-
35
- LOG_DIR = None
36
- PARENT_LOG_DIR = None
37
- log_share = True
38
-
39
- append_to_parent_log = None
40
-
41
- lock = RLock()
42
- # CRITICAL = 50
43
- # FATAL = CRITICAL
44
- # ERROR = 40
45
- # WARNING = 30
46
- # WARN = WARNING
47
- # INFO = 20
48
- # DEBUG = 10
49
- # NOTSET = 0
50
- levels = (10, 20, 30, 40)
51
- schedule_logger_dict = {}
52
-
53
- @staticmethod
54
- def set_directory(directory=None, parent_log_dir=None,
55
- append_to_parent_log=None, force=False):
56
- if parent_log_dir:
57
- LoggerFactory.PARENT_LOG_DIR = parent_log_dir
58
- if append_to_parent_log:
59
- LoggerFactory.append_to_parent_log = append_to_parent_log
60
- with LoggerFactory.lock:
61
- if not directory:
62
- directory = file_utils.get_project_base_directory("logs")
63
- if not LoggerFactory.LOG_DIR or force:
64
- LoggerFactory.LOG_DIR = directory
65
- if LoggerFactory.log_share:
66
- oldmask = os.umask(000)
67
- os.makedirs(LoggerFactory.LOG_DIR, exist_ok=True)
68
- os.umask(oldmask)
69
- else:
70
- os.makedirs(LoggerFactory.LOG_DIR, exist_ok=True)
71
- for loggerName, ghandler in LoggerFactory.global_handler_dict.items():
72
- for className, (logger,
73
- handler) in LoggerFactory.logger_dict.items():
74
- logger.removeHandler(ghandler)
75
- ghandler.close()
76
- LoggerFactory.global_handler_dict = {}
77
- for className, (logger,
78
- handler) in LoggerFactory.logger_dict.items():
79
- logger.removeHandler(handler)
80
- _handler = None
81
- if handler:
82
- handler.close()
83
- if className != "default":
84
- _handler = LoggerFactory.get_handler(className)
85
- logger.addHandler(_handler)
86
- LoggerFactory.assemble_global_handler(logger)
87
- LoggerFactory.logger_dict[className] = logger, _handler
88
-
89
- @staticmethod
90
- def new_logger(name):
91
- logger = logging.getLogger(name)
92
- logger.propagate = False
93
- logger.setLevel(LoggerFactory.LEVEL)
94
  return logger
95
 
96
- @staticmethod
97
- def get_logger(class_name=None):
98
- with LoggerFactory.lock:
99
- if class_name in LoggerFactory.logger_dict.keys():
100
- logger, handler = LoggerFactory.logger_dict[class_name]
101
- if not logger:
102
- logger, handler = LoggerFactory.init_logger(class_name)
103
- else:
104
- logger, handler = LoggerFactory.init_logger(class_name)
105
- return logger
106
-
107
- @staticmethod
108
- def get_global_handler(logger_name, level=None, log_dir=None):
109
- if not LoggerFactory.LOG_DIR:
110
- return logging.StreamHandler()
111
- if log_dir:
112
- logger_name_key = logger_name + "_" + log_dir
113
- else:
114
- logger_name_key = logger_name + "_" + LoggerFactory.LOG_DIR
115
- # if loggerName not in LoggerFactory.globalHandlerDict:
116
- if logger_name_key not in LoggerFactory.global_handler_dict:
117
- with LoggerFactory.lock:
118
- if logger_name_key not in LoggerFactory.global_handler_dict:
119
- handler = LoggerFactory.get_handler(
120
- logger_name, level, log_dir)
121
- LoggerFactory.global_handler_dict[logger_name_key] = handler
122
- return LoggerFactory.global_handler_dict[logger_name_key]
123
-
124
- @staticmethod
125
- def get_handler(class_name, level=None, log_dir=None,
126
- log_type=None, job_id=None):
127
- if not log_type:
128
- if not LoggerFactory.LOG_DIR or not class_name:
129
- return logging.StreamHandler()
130
- # return Diy_StreamHandler()
131
-
132
- if not log_dir:
133
- log_file = os.path.join(
134
- LoggerFactory.LOG_DIR,
135
- "{}.log".format(class_name))
136
- else:
137
- log_file = os.path.join(log_dir, "{}.log".format(class_name))
138
- else:
139
- log_file = os.path.join(log_dir, "rag_flow_{}.log".format(
140
- log_type) if level == LoggerFactory.LEVEL else 'rag_flow_{}_error.log'.format(log_type))
141
-
142
- os.makedirs(os.path.dirname(log_file), exist_ok=True)
143
- if LoggerFactory.log_share:
144
- handler = ROpenHandler(log_file,
145
- when='D',
146
- interval=1,
147
- backupCount=14,
148
- delay=True)
149
- else:
150
- handler = TimedRotatingFileHandler(log_file,
151
- when='D',
152
- interval=1,
153
- backupCount=14,
154
- delay=True)
155
- if level:
156
- handler.level = level
157
-
158
- return handler
159
-
160
- @staticmethod
161
- def init_logger(class_name):
162
- with LoggerFactory.lock:
163
- logger = LoggerFactory.new_logger(class_name)
164
- handler = None
165
- if class_name:
166
- handler = LoggerFactory.get_handler(class_name)
167
- logger.addHandler(handler)
168
- LoggerFactory.logger_dict[class_name] = logger, handler
169
-
170
- else:
171
- LoggerFactory.logger_dict["default"] = logger, handler
172
-
173
- LoggerFactory.assemble_global_handler(logger)
174
- return logger, handler
175
-
176
- @staticmethod
177
- def assemble_global_handler(logger):
178
- if LoggerFactory.LOG_DIR:
179
- for level in LoggerFactory.levels:
180
- if level >= LoggerFactory.LEVEL:
181
- level_logger_name = logging._levelToName[level]
182
- logger.addHandler(
183
- LoggerFactory.get_global_handler(
184
- level_logger_name, level))
185
- if LoggerFactory.append_to_parent_log and LoggerFactory.PARENT_LOG_DIR:
186
- for level in LoggerFactory.levels:
187
- if level >= LoggerFactory.LEVEL:
188
- level_logger_name = logging._levelToName[level]
189
- logger.addHandler(
190
- LoggerFactory.get_global_handler(level_logger_name, level, LoggerFactory.PARENT_LOG_DIR))
191
-
192
-
193
- def setDirectory(directory=None):
194
- LoggerFactory.set_directory(directory)
195
-
196
-
197
- def setLevel(level):
198
- LoggerFactory.LEVEL = level
199
-
200
-
201
- def getLogger(className=None, useLevelFile=False):
202
- if className is None:
203
- frame = inspect.stack()[1]
204
- module = inspect.getmodule(frame[0])
205
- className = 'stat'
206
- return LoggerFactory.get_logger(className)
207
-
208
 
209
- def exception_to_trace_string(ex):
210
- return "".join(traceback.TracebackException.from_exception(ex).format())
 
 
 
211
 
 
 
 
 
 
212
 
213
- class ROpenHandler(TimedRotatingFileHandler):
214
- def _open(self):
215
- prevumask = os.umask(000)
216
- rtv = TimedRotatingFileHandler._open(self)
217
- os.umask(prevumask)
218
- return rtv
219
-
220
-
221
- def sql_logger(job_id='', log_type='sql'):
222
- key = job_id + log_type
223
- if key in LoggerFactory.schedule_logger_dict.keys():
224
- return LoggerFactory.schedule_logger_dict[key]
225
- return get_job_logger(job_id=job_id, log_type=log_type)
226
-
227
-
228
- def ready_log(msg, job=None, task=None, role=None, party_id=None, detail=None):
229
- prefix, suffix = base_msg(job, task, role, party_id, detail)
230
- return f"{prefix}{msg} ready{suffix}"
231
-
232
-
233
- def start_log(msg, job=None, task=None, role=None, party_id=None, detail=None):
234
- prefix, suffix = base_msg(job, task, role, party_id, detail)
235
- return f"{prefix}start to {msg}{suffix}"
236
-
237
-
238
- def successful_log(msg, job=None, task=None, role=None,
239
- party_id=None, detail=None):
240
- prefix, suffix = base_msg(job, task, role, party_id, detail)
241
- return f"{prefix}{msg} successfully{suffix}"
242
-
243
-
244
- def warning_log(msg, job=None, task=None, role=None,
245
- party_id=None, detail=None):
246
- prefix, suffix = base_msg(job, task, role, party_id, detail)
247
- return f"{prefix}{msg} is not effective{suffix}"
248
-
249
-
250
- def failed_log(msg, job=None, task=None, role=None,
251
- party_id=None, detail=None):
252
- prefix, suffix = base_msg(job, task, role, party_id, detail)
253
- return f"{prefix}failed to {msg}{suffix}"
254
-
255
-
256
- def base_msg(job=None, task=None, role: str = None,
257
- party_id: typing.Union[str, int] = None, detail=None):
258
- if detail:
259
- detail_msg = f" detail: \n{detail}"
260
- else:
261
- detail_msg = ""
262
- if task is not None:
263
- return f"task {task.f_task_id} {task.f_task_version} ", f" on {task.f_role} {task.f_party_id}{detail_msg}"
264
- elif job is not None:
265
- return "", f" on {job.f_role} {job.f_party_id}{detail_msg}"
266
- elif role and party_id:
267
- return "", f" on {role} {party_id}{detail_msg}"
268
- else:
269
- return "", f"{detail_msg}"
270
-
271
-
272
- def exception_to_trace_string(ex):
273
- return "".join(traceback.TracebackException.from_exception(ex).format())
274
-
275
-
276
- def get_logger_base_dir():
277
- job_log_dir = file_utils.get_rag_flow_directory('logs')
278
- return job_log_dir
279
-
280
-
281
- def get_job_logger(job_id, log_type):
282
- rag_flow_log_dir = file_utils.get_rag_flow_directory('logs', 'rag_flow')
283
- job_log_dir = file_utils.get_rag_flow_directory('logs', job_id)
284
- if not job_id:
285
- log_dirs = [rag_flow_log_dir]
286
- else:
287
- if log_type == 'audit':
288
- log_dirs = [job_log_dir, rag_flow_log_dir]
289
- else:
290
- log_dirs = [job_log_dir]
291
- if LoggerFactory.log_share:
292
- oldmask = os.umask(000)
293
- os.makedirs(job_log_dir, exist_ok=True)
294
- os.makedirs(rag_flow_log_dir, exist_ok=True)
295
- os.umask(oldmask)
296
- else:
297
- os.makedirs(job_log_dir, exist_ok=True)
298
- os.makedirs(rag_flow_log_dir, exist_ok=True)
299
- logger = LoggerFactory.new_logger(f"{job_id}_{log_type}")
300
- for job_log_dir in log_dirs:
301
- handler = LoggerFactory.get_handler(class_name=None, level=LoggerFactory.LEVEL,
302
- log_dir=job_log_dir, log_type=log_type, job_id=job_id)
303
- error_handler = LoggerFactory.get_handler(
304
- class_name=None,
305
- level=logging.ERROR,
306
- log_dir=job_log_dir,
307
- log_type=log_type,
308
- job_id=job_id)
309
- logger.addHandler(handler)
310
- logger.addHandler(error_handler)
311
- with LoggerFactory.lock:
312
- LoggerFactory.schedule_logger_dict[job_id + log_type] = logger
313
  return logger
 
 
 
14
  # limitations under the License.
15
  #
16
  import os
 
 
17
  import logging
18
+ from logging.handlers import RotatingFileHandler
 
 
19
 
20
+ from api.utils.file_utils import get_project_base_directory
21
 
22
+ LOG_LEVEL = logging.INFO
23
+ LOG_FILE = os.path.abspath(os.path.join(get_project_base_directory(), "logs", f"ragflow_{os.getpid()}.log"))
24
+ LOG_FORMAT = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"
25
+ logger = None
26
 
27
+ def getLogger():
28
+ global logger
29
+ if logger is not None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  return logger
31
 
32
+ print(f"log file path: {LOG_FILE}")
33
+ os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
34
+ logger = logging.getLogger("ragflow")
35
+ logger.setLevel(LOG_LEVEL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ handler1 = RotatingFileHandler(LOG_FILE, maxBytes=10*1024*1024, backupCount=5)
38
+ handler1.setLevel(LOG_LEVEL)
39
+ formatter1 = logging.Formatter(LOG_FORMAT)
40
+ handler1.setFormatter(formatter1)
41
+ logger.addHandler(handler1)
42
 
43
+ handler2 = logging.StreamHandler()
44
+ handler2.setLevel(LOG_LEVEL)
45
+ formatter2 = logging.Formatter(LOG_FORMAT)
46
+ handler2.setFormatter(formatter2)
47
+ logger.addHandler(handler2)
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  return logger
50
+
51
+ logger = getLogger()
deepdoc/parser/pdf_parser.py CHANGED
@@ -19,13 +19,14 @@ from io import BytesIO
19
  import re
20
  import pdfplumber
21
  import logging
22
- from PIL import Image, ImageDraw
23
  import numpy as np
24
  from timeit import default_timer as timer
25
  from pypdf import PdfReader as pdf2_read
26
 
27
  from api.settings import LIGHTEN
28
  from api.utils.file_utils import get_project_base_directory
 
29
  from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
30
  from rag.nlp import rag_tokenizer
31
  from copy import deepcopy
@@ -49,15 +50,15 @@ class RAGFlowPdfParser:
49
  import torch
50
  if torch.cuda.is_available():
51
  self.updown_cnt_mdl.set_param({"device": "cuda"})
52
- except Exception as e:
53
- logging.error(str(e))
54
  try:
55
  model_dir = os.path.join(
56
  get_project_base_directory(),
57
  "rag/res/deepdoc")
58
  self.updown_cnt_mdl.load_model(os.path.join(
59
  model_dir, "updown_concat_xgb.model"))
60
- except Exception as e:
61
  model_dir = snapshot_download(
62
  repo_id="InfiniFlow/text_concat_xgb_v1.0",
63
  local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
@@ -187,7 +188,7 @@ class RAGFlowPdfParser:
187
  return True
188
 
189
  def _table_transformer_job(self, ZM):
190
- logging.info("Table processing...")
191
  imgs, pos = [], []
192
  tbcnt = [0]
193
  MARGIN = 10
@@ -425,12 +426,12 @@ class RAGFlowPdfParser:
425
  detach_feats = [b["x1"] < b_["x0"],
426
  b["x0"] > b_["x1"]]
427
  if (any(feats) and not any(concatting_feats)) or any(detach_feats):
428
- print(
429
  b["text"],
430
  b_["text"],
431
  any(feats),
432
  any(concatting_feats),
433
- any(detach_feats))
434
  i += 1
435
  continue
436
  # merge up and down
@@ -726,14 +727,14 @@ class RAGFlowPdfParser:
726
  # continue
727
  if tv < fv and tk:
728
  tables[tk].insert(0, c)
729
- logging.debug(
730
  "TABLE:" +
731
  self.boxes[i]["text"] +
732
  "; Cap: " +
733
  tk)
734
  elif fk:
735
  figures[fk].insert(0, c)
736
- logging.debug(
737
  "FIGURE:" +
738
  self.boxes[i]["text"] +
739
  "; Cap: " +
@@ -760,7 +761,7 @@ class RAGFlowPdfParser:
760
  if ii is not None:
761
  b = louts[ii]
762
  else:
763
- logging.warn(
764
  f"Missing layout match: {pn + 1},%s" %
765
  (bxs[0].get(
766
  "layoutno", "")))
@@ -918,8 +919,8 @@ class RAGFlowPdfParser:
918
  if usefull(boxes[0]):
919
  dfs(boxes[0], 0)
920
  else:
921
- logging.debug("WASTE: " + boxes[0]["text"])
922
- except Exception as e:
923
  pass
924
  boxes.pop(0)
925
  mw = np.mean(widths)
@@ -927,7 +928,7 @@ class RAGFlowPdfParser:
927
  res.append(
928
  "\n".join([c["text"] + self._line_tag(c, ZM) for c in lines]))
929
  else:
930
- logging.debug("REMOVED: " +
931
  "<<".join([c["text"] for c in lines]))
932
 
933
  return "\n\n".join(res)
@@ -938,8 +939,8 @@ class RAGFlowPdfParser:
938
  pdf = pdfplumber.open(
939
  fnm) if not binary else pdfplumber.open(BytesIO(binary))
940
  return len(pdf.pages)
941
- except Exception as e:
942
- logging.error(str(e))
943
 
944
  def __images__(self, fnm, zoomin=3, page_from=0,
945
  page_to=299, callback=None):
@@ -962,8 +963,8 @@ class RAGFlowPdfParser:
962
  self.page_chars = [[{**c, 'top': c['top'], 'bottom': c['bottom']} for c in page.dedupe_chars().chars if self._has_color(c)] for page in
963
  self.pdf.pages[page_from:page_to]]
964
  self.total_page = len(self.pdf.pages)
965
- except Exception as e:
966
- logging.error(str(e))
967
 
968
  self.outlines = []
969
  try:
@@ -979,11 +980,11 @@ class RAGFlowPdfParser:
979
 
980
  dfs(outlines, 0)
981
  except Exception as e:
982
- logging.warning(f"Outlines exception: {e}")
983
  if not self.outlines:
984
- logging.warning(f"Miss outlines")
985
 
986
- logging.info("Images converted.")
987
  self.is_english = [re.search(r"[a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(
988
  random.choices([c["text"] for c in self.page_chars[i]], k=min(100, len(self.page_chars[i]))))) for i in
989
  range(len(self.page_chars))]
@@ -1023,7 +1024,7 @@ class RAGFlowPdfParser:
1023
  self.is_english = re.search(r"[\na-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}",
1024
  "".join([b["text"] for b in random.choices(bxes, k=min(30, len(bxes)))]))
1025
 
1026
- logging.info("Is it English:", self.is_english)
1027
 
1028
  self.page_cum_height = np.cumsum(self.page_cum_height)
1029
  assert len(self.page_cum_height) == len(self.page_images) + 1
@@ -1162,10 +1163,10 @@ class PlainParser(object):
1162
  dfs(a, depth + 1)
1163
 
1164
  dfs(outlines, 0)
1165
- except Exception as e:
1166
- logging.warning(f"Outlines exception: {e}")
1167
  if not self.outlines:
1168
- logging.warning(f"Miss outlines")
1169
 
1170
  return [(l, "") for l in lines], []
1171
 
 
19
  import re
20
  import pdfplumber
21
  import logging
22
+ from PIL import Image
23
  import numpy as np
24
  from timeit import default_timer as timer
25
  from pypdf import PdfReader as pdf2_read
26
 
27
  from api.settings import LIGHTEN
28
  from api.utils.file_utils import get_project_base_directory
29
+ from api.utils.log_utils import logger
30
  from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
31
  from rag.nlp import rag_tokenizer
32
  from copy import deepcopy
 
50
  import torch
51
  if torch.cuda.is_available():
52
  self.updown_cnt_mdl.set_param({"device": "cuda"})
53
+ except Exception:
54
+ logger.exception("RAGFlowPdfParser __init__")
55
  try:
56
  model_dir = os.path.join(
57
  get_project_base_directory(),
58
  "rag/res/deepdoc")
59
  self.updown_cnt_mdl.load_model(os.path.join(
60
  model_dir, "updown_concat_xgb.model"))
61
+ except Exception:
62
  model_dir = snapshot_download(
63
  repo_id="InfiniFlow/text_concat_xgb_v1.0",
64
  local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
 
188
  return True
189
 
190
  def _table_transformer_job(self, ZM):
191
+ logger.info("Table processing...")
192
  imgs, pos = [], []
193
  tbcnt = [0]
194
  MARGIN = 10
 
426
  detach_feats = [b["x1"] < b_["x0"],
427
  b["x0"] > b_["x1"]]
428
  if (any(feats) and not any(concatting_feats)) or any(detach_feats):
429
+ logger.info("{} {} {} {}".format(
430
  b["text"],
431
  b_["text"],
432
  any(feats),
433
  any(concatting_feats),
434
+ ))
435
  i += 1
436
  continue
437
  # merge up and down
 
727
  # continue
728
  if tv < fv and tk:
729
  tables[tk].insert(0, c)
730
+ logger.debug(
731
  "TABLE:" +
732
  self.boxes[i]["text"] +
733
  "; Cap: " +
734
  tk)
735
  elif fk:
736
  figures[fk].insert(0, c)
737
+ logger.debug(
738
  "FIGURE:" +
739
  self.boxes[i]["text"] +
740
  "; Cap: " +
 
761
  if ii is not None:
762
  b = louts[ii]
763
  else:
764
+ logger.warn(
765
  f"Missing layout match: {pn + 1},%s" %
766
  (bxs[0].get(
767
  "layoutno", "")))
 
919
  if usefull(boxes[0]):
920
  dfs(boxes[0], 0)
921
  else:
922
+ logger.debug("WASTE: " + boxes[0]["text"])
923
+ except Exception:
924
  pass
925
  boxes.pop(0)
926
  mw = np.mean(widths)
 
928
  res.append(
929
  "\n".join([c["text"] + self._line_tag(c, ZM) for c in lines]))
930
  else:
931
+ logger.debug("REMOVED: " +
932
  "<<".join([c["text"] for c in lines]))
933
 
934
  return "\n\n".join(res)
 
939
  pdf = pdfplumber.open(
940
  fnm) if not binary else pdfplumber.open(BytesIO(binary))
941
  return len(pdf.pages)
942
+ except Exception:
943
+ logger.exception("total_page_number")
944
 
945
  def __images__(self, fnm, zoomin=3, page_from=0,
946
  page_to=299, callback=None):
 
963
  self.page_chars = [[{**c, 'top': c['top'], 'bottom': c['bottom']} for c in page.dedupe_chars().chars if self._has_color(c)] for page in
964
  self.pdf.pages[page_from:page_to]]
965
  self.total_page = len(self.pdf.pages)
966
+ except Exception:
967
+ logger.exception("RAGFlowPdfParser __images__")
968
 
969
  self.outlines = []
970
  try:
 
980
 
981
  dfs(outlines, 0)
982
  except Exception as e:
983
+ logger.warning(f"Outlines exception: {e}")
984
  if not self.outlines:
985
+ logger.warning("Miss outlines")
986
 
987
+ logger.info("Images converted.")
988
  self.is_english = [re.search(r"[a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(
989
  random.choices([c["text"] for c in self.page_chars[i]], k=min(100, len(self.page_chars[i]))))) for i in
990
  range(len(self.page_chars))]
 
1024
  self.is_english = re.search(r"[\na-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}",
1025
  "".join([b["text"] for b in random.choices(bxes, k=min(30, len(bxes)))]))
1026
 
1027
+ logger.info("Is it English:", self.is_english)
1028
 
1029
  self.page_cum_height = np.cumsum(self.page_cum_height)
1030
  assert len(self.page_cum_height) == len(self.page_images) + 1
 
1163
  dfs(a, depth + 1)
1164
 
1165
  dfs(outlines, 0)
1166
+ except Exception:
1167
+ logger.exception("Outlines exception")
1168
  if not self.outlines:
1169
+ logger.warning("Miss outlines")
1170
 
1171
  return [(l, "") for l in lines], []
1172
 
deepdoc/parser/resume/entities/corporations.py CHANGED
@@ -11,10 +11,15 @@
11
  # limitations under the License.
12
  #
13
 
14
- import re,json,os
 
 
15
  import pandas as pd
16
  from rag.nlp import rag_tokenizer
17
  from . import regions
 
 
 
18
  current_file_path = os.path.dirname(os.path.abspath(__file__))
19
  GOODS = pd.read_csv(os.path.join(current_file_path, "res/corp_baike_len.csv"), sep="\t", header=0).fillna(0)
20
  GOODS["cid"] = GOODS["cid"].astype(str)
@@ -27,7 +32,7 @@ def baike(cid, default_v=0):
27
  global GOODS
28
  try:
29
  return GOODS.loc[str(cid), "len"]
30
- except Exception as e:
31
  pass
32
  return default_v
33
 
@@ -65,7 +70,8 @@ def rmNoise(n):
65
  GOOD_CORP = set([corpNorm(rmNoise(c), False) for c in GOOD_CORP])
66
  for c,v in CORP_TAG.items():
67
  cc = corpNorm(rmNoise(c), False)
68
- if not cc: print (c)
 
69
  CORP_TAG = {corpNorm(rmNoise(c), False):v for c,v in CORP_TAG.items()}
70
 
71
  def is_good(nm):
 
11
  # limitations under the License.
12
  #
13
 
14
+ import re
15
+ import json
16
+ import os
17
  import pandas as pd
18
  from rag.nlp import rag_tokenizer
19
  from . import regions
20
+ from api.utils.log_utils import logger
21
+
22
+
23
  current_file_path = os.path.dirname(os.path.abspath(__file__))
24
  GOODS = pd.read_csv(os.path.join(current_file_path, "res/corp_baike_len.csv"), sep="\t", header=0).fillna(0)
25
  GOODS["cid"] = GOODS["cid"].astype(str)
 
32
  global GOODS
33
  try:
34
  return GOODS.loc[str(cid), "len"]
35
+ except Exception:
36
  pass
37
  return default_v
38
 
 
70
  GOOD_CORP = set([corpNorm(rmNoise(c), False) for c in GOOD_CORP])
71
  for c,v in CORP_TAG.items():
72
  cc = corpNorm(rmNoise(c), False)
73
+ if not cc:
74
+ logger.info(c)
75
  CORP_TAG = {corpNorm(rmNoise(c), False):v for c,v in CORP_TAG.items()}
76
 
77
  def is_good(nm):
deepdoc/parser/resume/step_two.py CHANGED
@@ -11,13 +11,19 @@
11
  # limitations under the License.
12
  #
13
 
14
- import re, copy, time, datetime, demjson3, \
15
- traceback, signal
 
 
 
 
 
16
  import numpy as np
17
  from deepdoc.parser.resume.entities import degrees, schools, corporations
18
  from rag.nlp import rag_tokenizer, surname
19
  from xpinyin import Pinyin
20
  from contextlib import contextmanager
 
21
 
22
 
23
  class TimeoutException(Exception): pass
@@ -79,7 +85,7 @@ def forEdu(cv):
79
  y, m, d = getYMD(dt)
80
  st_dt.append(str(y))
81
  e["start_dt_kwd"] = str(y)
82
- except Exception as e:
83
  pass
84
 
85
  r = schools.select(n.get("school_name", ""))
@@ -158,7 +164,7 @@ def forEdu(cv):
158
  y, m, d = getYMD(edu_end_dt)
159
  cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
160
  except Exception as e:
161
- print("EXCEPTION: ", e, edu_end_dt, cv.get("work_exp_flt"))
162
  if sch:
163
  cv["school_name_kwd"] = sch
164
  if (len(cv.get("degree_kwd", [])) >= 1 and "本科" in cv["degree_kwd"]) \
@@ -233,7 +239,7 @@ def forWork(cv):
233
  if type(n) == type(""):
234
  try:
235
  n = json_loads(n)
236
- except Exception as e:
237
  continue
238
 
239
  if n.get("start_time") and (not work_st_tm or n["start_time"] < work_st_tm): work_st_tm = n["start_time"]
@@ -269,8 +275,8 @@ def forWork(cv):
269
 
270
  try:
271
  duas.append((datetime.datetime.strptime(ed, "%Y-%m-%d") - datetime.datetime.strptime(st, "%Y-%m-%d")).days)
272
- except Exception as e:
273
- print("kkkkkkkkkkkkkkkkkkkk", n.get("start_time"), n.get("end_time"))
274
 
275
  if n.get("scale"):
276
  r = re.search(r"^([0-9]+)", str(n["scale"]))
@@ -327,7 +333,7 @@ def forWork(cv):
327
  y, m, d = getYMD(work_st_tm)
328
  cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
329
  except Exception as e:
330
- print("EXCEPTION: ", e, work_st_tm, cv.get("work_exp_flt"))
331
 
332
  cv["job_num_int"] = 0
333
  if duas:
@@ -457,8 +463,8 @@ def parse(cv):
457
  t = k[:-4]
458
  cv[f"{t}_kwd"] = nms
459
  cv[f"{t}_tks"] = rag_tokenizer.tokenize(" ".join(nms))
460
- except Exception as e:
461
- print("【EXCEPTION】:", str(traceback.format_exc()), cv[k])
462
  cv[k] = []
463
 
464
  # tokenize fields
@@ -524,7 +530,7 @@ def parse(cv):
524
  if not y: y = "2012"
525
  if not m: m = "01"
526
  if not d: d = "01"
527
- cv["updated_at_dt"] = f"%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
528
  # long text tokenize
529
 
530
  if cv.get("responsibilities"): cv["responsibilities_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(cv["responsibilities"]))
@@ -556,10 +562,10 @@ def parse(cv):
556
  cv["work_exp_flt"] = (time.time() - int(int(cv["work_start_time"]) / 1000)) / 3600. / 24. / 365.
557
  elif re.match(r"[0-9]{4}[^0-9]", str(cv["work_start_time"])):
558
  y, m, d = getYMD(str(cv["work_start_time"]))
559
- cv["work_start_dt"] = f"%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
560
  cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y)
561
  except Exception as e:
562
- print("【EXCEPTION】", e, "==>", cv.get("work_start_time"))
563
  if "work_exp_flt" not in cv and cv.get("work_experience", 0): cv["work_exp_flt"] = int(cv["work_experience"]) / 12.
564
 
565
  keys = list(cv.keys())
@@ -574,7 +580,7 @@ def parse(cv):
574
 
575
  cv["tob_resume_id"] = str(cv["tob_resume_id"])
576
  cv["id"] = cv["tob_resume_id"]
577
- print("CCCCCCCCCCCCCCC")
578
 
579
  return dealWithInt64(cv)
580
 
@@ -589,4 +595,3 @@ def dealWithInt64(d):
589
 
590
  if isinstance(d, np.integer): d = int(d)
591
  return d
592
-
 
11
  # limitations under the License.
12
  #
13
 
14
+ import re
15
+ import copy
16
+ import time
17
+ import datetime
18
+ import demjson3
19
+ import traceback
20
+ import signal
21
  import numpy as np
22
  from deepdoc.parser.resume.entities import degrees, schools, corporations
23
  from rag.nlp import rag_tokenizer, surname
24
  from xpinyin import Pinyin
25
  from contextlib import contextmanager
26
+ from api.utils.log_utils import logger
27
 
28
 
29
  class TimeoutException(Exception): pass
 
85
  y, m, d = getYMD(dt)
86
  st_dt.append(str(y))
87
  e["start_dt_kwd"] = str(y)
88
+ except Exception:
89
  pass
90
 
91
  r = schools.select(n.get("school_name", ""))
 
164
  y, m, d = getYMD(edu_end_dt)
165
  cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
166
  except Exception as e:
167
+ logger.exception("forEdu {} {} {}".format(e, edu_end_dt, cv.get("work_exp_flt")))
168
  if sch:
169
  cv["school_name_kwd"] = sch
170
  if (len(cv.get("degree_kwd", [])) >= 1 and "本科" in cv["degree_kwd"]) \
 
239
  if type(n) == type(""):
240
  try:
241
  n = json_loads(n)
242
+ except Exception:
243
  continue
244
 
245
  if n.get("start_time") and (not work_st_tm or n["start_time"] < work_st_tm): work_st_tm = n["start_time"]
 
275
 
276
  try:
277
  duas.append((datetime.datetime.strptime(ed, "%Y-%m-%d") - datetime.datetime.strptime(st, "%Y-%m-%d")).days)
278
+ except Exception:
279
+ logger.exception("forWork {} {}".format(n.get("start_time"), n.get("end_time")))
280
 
281
  if n.get("scale"):
282
  r = re.search(r"^([0-9]+)", str(n["scale"]))
 
333
  y, m, d = getYMD(work_st_tm)
334
  cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
335
  except Exception as e:
336
+ logger.exception("forWork {} {} {}".format(e, work_st_tm, cv.get("work_exp_flt")))
337
 
338
  cv["job_num_int"] = 0
339
  if duas:
 
463
  t = k[:-4]
464
  cv[f"{t}_kwd"] = nms
465
  cv[f"{t}_tks"] = rag_tokenizer.tokenize(" ".join(nms))
466
+ except Exception:
467
+ logger.exception("parse {} {}".format(str(traceback.format_exc()), cv[k]))
468
  cv[k] = []
469
 
470
  # tokenize fields
 
530
  if not y: y = "2012"
531
  if not m: m = "01"
532
  if not d: d = "01"
533
+ cv["updated_at_dt"] = "%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
534
  # long text tokenize
535
 
536
  if cv.get("responsibilities"): cv["responsibilities_ltks"] = rag_tokenizer.tokenize(rmHtmlTag(cv["responsibilities"]))
 
562
  cv["work_exp_flt"] = (time.time() - int(int(cv["work_start_time"]) / 1000)) / 3600. / 24. / 365.
563
  elif re.match(r"[0-9]{4}[^0-9]", str(cv["work_start_time"])):
564
  y, m, d = getYMD(str(cv["work_start_time"]))
565
+ cv["work_start_dt"] = "%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
566
  cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y)
567
  except Exception as e:
568
+ logger.exception("parse {} ==> {}".format(e, cv.get("work_start_time")))
569
  if "work_exp_flt" not in cv and cv.get("work_experience", 0): cv["work_exp_flt"] = int(cv["work_experience"]) / 12.
570
 
571
  keys = list(cv.keys())
 
580
 
581
  cv["tob_resume_id"] = str(cv["tob_resume_id"])
582
  cv["id"] = cv["tob_resume_id"]
583
+ logger.info("CCCCCCCCCCCCCCC")
584
 
585
  return dealWithInt64(cv)
586
 
 
595
 
596
  if isinstance(d, np.integer): d = int(d)
597
  return d
 
deepdoc/vision/operators.py CHANGED
@@ -20,6 +20,7 @@ import cv2
20
  import numpy as np
21
  import math
22
  from PIL import Image
 
23
 
24
 
25
  class DecodeImage(object):
@@ -402,7 +403,7 @@ class DetResizeForTest(object):
402
  return None, (None, None)
403
  img = cv2.resize(img, (int(resize_w), int(resize_h)))
404
  except BaseException:
405
- print(img.shape, resize_w, resize_h)
406
  sys.exit(0)
407
  ratio_h = resize_h / float(h)
408
  ratio_w = resize_w / float(w)
@@ -452,7 +453,6 @@ class E2EResizeForTest(object):
452
  return data
453
 
454
  def resize_image_for_totaltext(self, im, max_side_len=512):
455
-
456
  h, w, _ = im.shape
457
  resize_w = w
458
  resize_h = h
 
20
  import numpy as np
21
  import math
22
  from PIL import Image
23
+ from api.utils.log_utils import logger
24
 
25
 
26
  class DecodeImage(object):
 
403
  return None, (None, None)
404
  img = cv2.resize(img, (int(resize_w), int(resize_h)))
405
  except BaseException:
406
+ logger.exception("{} {} {}".format(img.shape, resize_w, resize_h))
407
  sys.exit(0)
408
  ratio_h = resize_h / float(h)
409
  ratio_w = resize_w / float(w)
 
453
  return data
454
 
455
  def resize_image_for_totaltext(self, im, max_side_len=512):
 
456
  h, w, _ = im.shape
457
  resize_w = w
458
  resize_h = h
deepdoc/vision/recognizer.py CHANGED
@@ -19,6 +19,7 @@ from huggingface_hub import snapshot_download
19
 
20
  from api.utils.file_utils import get_project_base_directory
21
  from .operators import *
 
22
 
23
 
24
  class Recognizer(object):
@@ -439,7 +440,7 @@ class Recognizer(object):
439
  end_index = min((i + 1) * batch_size, len(imgs))
440
  batch_image_list = imgs[start_index:end_index]
441
  inputs = self.preprocess(batch_image_list)
442
- print("preprocess")
443
  for ins in inputs:
444
  bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names})[0], ins, thr)
445
  res.append(bb)
 
19
 
20
  from api.utils.file_utils import get_project_base_directory
21
  from .operators import *
22
+ from api.utils.log_utils import logger
23
 
24
 
25
  class Recognizer(object):
 
440
  end_index = min((i + 1) * batch_size, len(imgs))
441
  batch_image_list = imgs[start_index:end_index]
442
  inputs = self.preprocess(batch_image_list)
443
+ logger.info("preprocess")
444
  for ins in inputs:
445
  bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names})[0], ins, thr)
446
  res.append(bb)
deepdoc/vision/seeit.py CHANGED
@@ -14,6 +14,7 @@
14
  import os
15
  import PIL
16
  from PIL import ImageDraw
 
17
 
18
 
19
  def save_results(image_list, results, labels, output_dir='output/', threshold=0.5):
@@ -24,7 +25,7 @@ def save_results(image_list, results, labels, output_dir='output/', threshold=0.
24
 
25
  out_path = os.path.join(output_dir, f"{idx}.jpg")
26
  im.save(out_path, quality=95)
27
- print("save result to: " + out_path)
28
 
29
 
30
  def draw_box(im, result, lables, threshold=0.5):
 
14
  import os
15
  import PIL
16
  from PIL import ImageDraw
17
+ from api.utils.log_utils import logger
18
 
19
 
20
  def save_results(image_list, results, labels, output_dir='output/', threshold=0.5):
 
25
 
26
  out_path = os.path.join(output_dir, f"{idx}.jpg")
27
  im.save(out_path, quality=95)
28
+ logger.info("save result to: " + out_path)
29
 
30
 
31
  def draw_box(im, result, lables, threshold=0.5):
deepdoc/vision/t_recognizer.py CHANGED
@@ -10,7 +10,10 @@
10
  # See the License for the specific language governing permissions and
11
  # limitations under the License.
12
  #
13
- import os, sys
 
 
 
14
  sys.path.insert(
15
  0,
16
  os.path.abspath(
@@ -56,7 +59,7 @@ def main(args):
56
  } for t in lyt]
57
  img = draw_box(images[i], lyt, labels, float(args.threshold))
58
  img.save(outputs[i], quality=95)
59
- print("save result to: " + outputs[i])
60
 
61
 
62
  def get_table_html(img, tb_cpns, ocr):
 
10
  # See the License for the specific language governing permissions and
11
  # limitations under the License.
12
  #
13
+ import os
14
+ import sys
15
+ from api.utils.log_utils import logger
16
+
17
  sys.path.insert(
18
  0,
19
  os.path.abspath(
 
59
  } for t in lyt]
60
  img = draw_box(images[i], lyt, labels, float(args.threshold))
61
  img.save(outputs[i], quality=95)
62
+ logger.info("save result to: " + outputs[i])
63
 
64
 
65
  def get_table_html(img, tb_cpns, ocr):
graphrag/claim_extractor.py CHANGED
@@ -7,7 +7,6 @@ Reference:
7
 
8
  import argparse
9
  import json
10
- import logging
11
  import re
12
  import traceback
13
  from dataclasses import dataclass
@@ -18,12 +17,12 @@ import tiktoken
18
  from graphrag.claim_prompt import CLAIM_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT
19
  from rag.llm.chat_model import Base as CompletionLLM
20
  from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
 
21
 
22
  DEFAULT_TUPLE_DELIMITER = "<|>"
23
  DEFAULT_RECORD_DELIMITER = "##"
24
  DEFAULT_COMPLETION_DELIMITER = "<|COMPLETE|>"
25
  CLAIM_MAX_GLEANINGS = 1
26
- log = logging.getLogger(__name__)
27
 
28
 
29
  @dataclass
@@ -127,7 +126,7 @@ class ClaimExtractor:
127
  ]
128
  source_doc_map[document_id] = text
129
  except Exception as e:
130
- log.exception("error extracting claim")
131
  self._on_error(
132
  e,
133
  traceback.format_exc(),
@@ -266,4 +265,4 @@ if __name__ == "__main__":
266
  "claim_description": ""
267
  }
268
  claim = ex(info)
269
- print(json.dumps(claim.output, ensure_ascii=False, indent=2))
 
7
 
8
  import argparse
9
  import json
 
10
  import re
11
  import traceback
12
  from dataclasses import dataclass
 
17
  from graphrag.claim_prompt import CLAIM_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT
18
  from rag.llm.chat_model import Base as CompletionLLM
19
  from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
20
+ from api.utils.log_utils import logger
21
 
22
  DEFAULT_TUPLE_DELIMITER = "<|>"
23
  DEFAULT_RECORD_DELIMITER = "##"
24
  DEFAULT_COMPLETION_DELIMITER = "<|COMPLETE|>"
25
  CLAIM_MAX_GLEANINGS = 1
 
26
 
27
 
28
  @dataclass
 
126
  ]
127
  source_doc_map[document_id] = text
128
  except Exception as e:
129
+ logger.exception("error extracting claim")
130
  self._on_error(
131
  e,
132
  traceback.format_exc(),
 
265
  "claim_description": ""
266
  }
267
  claim = ex(info)
268
+ logger.info(json.dumps(claim.output, ensure_ascii=False, indent=2))
graphrag/community_reports_extractor.py CHANGED
@@ -6,11 +6,10 @@ Reference:
6
  """
7
 
8
  import json
9
- import logging
10
  import re
11
  import traceback
12
  from dataclasses import dataclass
13
- from typing import Any, List, Callable
14
  import networkx as nx
15
  import pandas as pd
16
  from graphrag import leiden
@@ -20,8 +19,7 @@ from rag.llm.chat_model import Base as CompletionLLM
20
  from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, dict_has_keys_with_types
21
  from rag.utils import num_tokens_from_string
22
  from timeit import default_timer as timer
23
-
24
- log = logging.getLogger(__name__)
25
 
26
 
27
  @dataclass
@@ -82,7 +80,7 @@ class CommunityReportsExtractor:
82
  response = re.sub(r"[^\}]*$", "", response)
83
  response = re.sub(r"\{\{", "{", response)
84
  response = re.sub(r"\}\}", "}", response)
85
- print(response)
86
  response = json.loads(response)
87
  if not dict_has_keys_with_types(response, [
88
  ("title", str),
@@ -94,7 +92,7 @@ class CommunityReportsExtractor:
94
  response["weight"] = weight
95
  response["entities"] = ents
96
  except Exception as e:
97
- print("ERROR: ", traceback.format_exc())
98
  self._on_error(e, traceback.format_exc(), None)
99
  continue
100
 
@@ -127,5 +125,4 @@ class CommunityReportsExtractor:
127
  report_sections = "\n\n".join(
128
  f"## {finding_summary(f)}\n\n{finding_explanation(f)}" for f in findings
129
  )
130
-
131
  return f"# {title}\n\n{summary}\n\n{report_sections}"
 
6
  """
7
 
8
  import json
 
9
  import re
10
  import traceback
11
  from dataclasses import dataclass
12
+ from typing import List, Callable
13
  import networkx as nx
14
  import pandas as pd
15
  from graphrag import leiden
 
19
  from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, dict_has_keys_with_types
20
  from rag.utils import num_tokens_from_string
21
  from timeit import default_timer as timer
22
+ from api.utils.log_utils import logger
 
23
 
24
 
25
  @dataclass
 
80
  response = re.sub(r"[^\}]*$", "", response)
81
  response = re.sub(r"\{\{", "{", response)
82
  response = re.sub(r"\}\}", "}", response)
83
+ logger.info(response)
84
  response = json.loads(response)
85
  if not dict_has_keys_with_types(response, [
86
  ("title", str),
 
92
  response["weight"] = weight
93
  response["entities"] = ents
94
  except Exception as e:
95
+ logger.exception("CommunityReportsExtractor got exception")
96
  self._on_error(e, traceback.format_exc(), None)
97
  continue
98
 
 
125
  report_sections = "\n\n".join(
126
  f"## {finding_summary(f)}\n\n{finding_explanation(f)}" for f in findings
127
  )
 
128
  return f"# {title}\n\n{summary}\n\n{report_sections}"
graphrag/index.py CHANGED
@@ -28,6 +28,7 @@ from graphrag.graph_extractor import GraphExtractor, DEFAULT_ENTITY_TYPES
28
  from graphrag.mind_map_extractor import MindMapExtractor
29
  from rag.nlp import rag_tokenizer
30
  from rag.utils import num_tokens_from_string
 
31
 
32
 
33
  def graph_merge(g1, g2):
@@ -94,7 +95,7 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, en
94
  chunks = []
95
  for n, attr in graph.nodes(data=True):
96
  if attr.get("rank", 0) == 0:
97
- print(f"Ignore entity: {n}")
98
  continue
99
  chunk = {
100
  "name_kwd": n,
@@ -136,7 +137,7 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, en
136
  mg = mindmap(_chunks).output
137
  if not len(mg.keys()): return chunks
138
 
139
- print(json.dumps(mg, ensure_ascii=False, indent=2))
140
  chunks.append(
141
  {
142
  "content_with_weight": json.dumps(mg, ensure_ascii=False, indent=2),
 
28
  from graphrag.mind_map_extractor import MindMapExtractor
29
  from rag.nlp import rag_tokenizer
30
  from rag.utils import num_tokens_from_string
31
+ from api.utils.log_utils import logger
32
 
33
 
34
  def graph_merge(g1, g2):
 
95
  chunks = []
96
  for n, attr in graph.nodes(data=True):
97
  if attr.get("rank", 0) == 0:
98
+ logger.info(f"Ignore entity: {n}")
99
  continue
100
  chunk = {
101
  "name_kwd": n,
 
137
  mg = mindmap(_chunks).output
138
  if not len(mg.keys()): return chunks
139
 
140
+ logger.info(json.dumps(mg, ensure_ascii=False, indent=2))
141
  chunks.append(
142
  {
143
  "content_with_weight": json.dumps(mg, ensure_ascii=False, indent=2),
graphrag/mind_map_extractor.py CHANGED
@@ -18,7 +18,6 @@ import collections
18
  import logging
19
  import os
20
  import re
21
- import logging
22
  import traceback
23
  from concurrent.futures import ThreadPoolExecutor
24
  from dataclasses import dataclass
@@ -30,6 +29,7 @@ from rag.llm.chat_model import Base as CompletionLLM
30
  import markdown_to_json
31
  from functools import reduce
32
  from rag.utils import num_tokens_from_string
 
33
 
34
 
35
  @dataclass
@@ -193,6 +193,6 @@ class MindMapExtractor:
193
  gen_conf = {"temperature": 0.5}
194
  response = self._llm.chat(text, [{"role": "user", "content": "Output:"}], gen_conf)
195
  response = re.sub(r"```[^\n]*", "", response)
196
- print(response)
197
- print("---------------------------------------------------\n", self._todict(markdown_to_json.dictify(response)))
198
  return self._todict(markdown_to_json.dictify(response))
 
18
  import logging
19
  import os
20
  import re
 
21
  import traceback
22
  from concurrent.futures import ThreadPoolExecutor
23
  from dataclasses import dataclass
 
29
  import markdown_to_json
30
  from functools import reduce
31
  from rag.utils import num_tokens_from_string
32
+ from api.utils.log_utils import logger
33
 
34
 
35
  @dataclass
 
193
  gen_conf = {"temperature": 0.5}
194
  response = self._llm.chat(text, [{"role": "user", "content": "Output:"}], gen_conf)
195
  response = re.sub(r"```[^\n]*", "", response)
196
+ logger.info(response)
197
+ logger.info(self._todict(markdown_to_json.dictify(response)))
198
  return self._todict(markdown_to_json.dictify(response))
intergrations/chatgpt-on-wechat/plugins/ragflow_chat.py CHANGED
@@ -2,7 +2,7 @@ import requests
2
  from bridge.context import ContextType # Import Context, ContextType
3
  from bridge.reply import Reply, ReplyType # Import Reply, ReplyType
4
  from bridge import *
5
- from common.log import logger
6
  from plugins import Plugin, register # Import Plugin and register
7
  from plugins.event import Event, EventContext, EventAction # Import event-related classes
8
 
@@ -76,7 +76,7 @@ class RAGFlowChat(Plugin):
76
  logger.error(f"[RAGFlowChat] HTTP error when creating conversation: {response.status_code}")
77
  return f"Sorry, unable to connect to RAGFlow API (create conversation). HTTP status code: {response.status_code}"
78
  except Exception as e:
79
- logger.exception(f"[RAGFlowChat] Exception when creating conversation: {e}")
80
  return f"Sorry, an internal error occurred: {str(e)}"
81
 
82
  # Step 2: Send the message and get a reply
@@ -108,5 +108,5 @@ class RAGFlowChat(Plugin):
108
  logger.error(f"[RAGFlowChat] HTTP error when getting answer: {response.status_code}")
109
  return f"Sorry, unable to connect to RAGFlow API (get reply). HTTP status code: {response.status_code}"
110
  except Exception as e:
111
- logger.exception(f"[RAGFlowChat] Exception when getting answer: {e}")
112
  return f"Sorry, an internal error occurred: {str(e)}"
 
2
  from bridge.context import ContextType # Import Context, ContextType
3
  from bridge.reply import Reply, ReplyType # Import Reply, ReplyType
4
  from bridge import *
5
+ from api.utils.log_utils import logger
6
  from plugins import Plugin, register # Import Plugin and register
7
  from plugins.event import Event, EventContext, EventAction # Import event-related classes
8
 
 
76
  logger.error(f"[RAGFlowChat] HTTP error when creating conversation: {response.status_code}")
77
  return f"Sorry, unable to connect to RAGFlow API (create conversation). HTTP status code: {response.status_code}"
78
  except Exception as e:
79
+ logger.exception("[RAGFlowChat] Exception when creating conversation")
80
  return f"Sorry, an internal error occurred: {str(e)}"
81
 
82
  # Step 2: Send the message and get a reply
 
108
  logger.error(f"[RAGFlowChat] HTTP error when getting answer: {response.status_code}")
109
  return f"Sorry, unable to connect to RAGFlow API (get reply). HTTP status code: {response.status_code}"
110
  except Exception as e:
111
+ logger.exception("[RAGFlowChat] Exception when getting answer")
112
  return f"Sorry, an internal error occurred: {str(e)}"
rag/app/book.py CHANGED
@@ -20,6 +20,7 @@ from rag.nlp import bullets_category, is_english,remove_contents_table, \
20
  tokenize_chunks
21
  from rag.nlp import rag_tokenizer
22
  from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
 
23
 
24
 
25
  class Pdf(PdfParser):
@@ -38,7 +39,7 @@ class Pdf(PdfParser):
38
  start = timer()
39
  self._layouts_rec(zoomin)
40
  callback(0.67, "Layout analysis finished")
41
- print("layouts:", timer() - start)
42
  self._table_transformer_job(zoomin)
43
  callback(0.68, "Table analysis finished")
44
  self._text_merge()
 
20
  tokenize_chunks
21
  from rag.nlp import rag_tokenizer
22
  from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
23
+ from api.utils.log_utils import logger
24
 
25
 
26
  class Pdf(PdfParser):
 
39
  start = timer()
40
  self._layouts_rec(zoomin)
41
  callback(0.67, "Layout analysis finished")
42
+ logger.info("layouts: {}".format(timer() - start))
43
  self._table_transformer_job(zoomin)
44
  callback(0.68, "Table analysis finished")
45
  self._text_merge()
rag/app/email.py CHANGED
@@ -18,7 +18,7 @@ import re
18
  from rag.nlp import rag_tokenizer, naive_merge, tokenize_chunks
19
  from deepdoc.parser import HtmlParser, TxtParser
20
  from timeit import default_timer as timer
21
- from rag.settings import cron_logger
22
  import io
23
 
24
 
@@ -86,7 +86,7 @@ def chunk(
86
  )
87
 
88
  main_res.extend(tokenize_chunks(chunks, doc, eng, None))
89
- cron_logger.info("naive_merge({}): {}".format(filename, timer() - st))
90
  # get the attachment info
91
  for part in msg.iter_attachments():
92
  content_disposition = part.get("Content-Disposition")
 
18
  from rag.nlp import rag_tokenizer, naive_merge, tokenize_chunks
19
  from deepdoc.parser import HtmlParser, TxtParser
20
  from timeit import default_timer as timer
21
+ from api.utils.log_utils import logger
22
  import io
23
 
24
 
 
86
  )
87
 
88
  main_res.extend(tokenize_chunks(chunks, doc, eng, None))
89
+ logger.info("naive_merge({}): {}".format(filename, timer() - st))
90
  # get the attachment info
91
  for part in msg.iter_attachments():
92
  content_disposition = part.get("Content-Disposition")
rag/app/laws.py CHANGED
@@ -21,7 +21,7 @@ from rag.nlp import bullets_category, remove_contents_table, hierarchical_merge,
21
  make_colon_as_title, tokenize_chunks, docx_question_level
22
  from rag.nlp import rag_tokenizer
23
  from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
24
- from rag.settings import cron_logger
25
 
26
 
27
  class Docx(DocxParser):
@@ -122,8 +122,8 @@ class Pdf(PdfParser):
122
  start = timer()
123
  self._layouts_rec(zoomin)
124
  callback(0.67, "Layout analysis finished")
125
- cron_logger.info("layouts:".format(
126
- (timer() - start) / (self.total_page + 0.1)))
127
  self._naive_vertical_merge()
128
 
129
  callback(0.8, "Text extraction finished")
 
21
  make_colon_as_title, tokenize_chunks, docx_question_level
22
  from rag.nlp import rag_tokenizer
23
  from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
24
+ from api.utils.log_utils import logger
25
 
26
 
27
  class Docx(DocxParser):
 
122
  start = timer()
123
  self._layouts_rec(zoomin)
124
  callback(0.67, "Layout analysis finished")
125
+ logger.info("layouts:".format(
126
+ ))
127
  self._naive_vertical_merge()
128
 
129
  callback(0.8, "Text extraction finished")